Files
wifi-densepose/crates/ruvector-temporal-tensor/tests/benchmarks.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

1013 lines
32 KiB
Rust

//! Acceptance tests and microbenchmarks for the temporal tensor store (ADR-023).
//!
//! Runs via `cargo test --release -p ruvector-temporal-tensor --test benchmarks -- --nocapture`
//!
//! All timing uses `std::time::Instant` to maintain the zero-dependency constraint.
//! No external crates (criterion, rand, etc.) are used.
use std::time::Instant;
use ruvector_temporal_tensor::bitpack;
use ruvector_temporal_tensor::quantizer;
use ruvector_temporal_tensor::segment;
use ruvector_temporal_tensor::tier_policy::TierPolicy;
use ruvector_temporal_tensor::tiering::{self, BlockKey, BlockMeta, Tier, TierConfig};
use ruvector_temporal_tensor::TemporalTensorCompressor;
// ---------------------------------------------------------------------------
// Deterministic PRNG (LCG) -- no external deps
// ---------------------------------------------------------------------------
/// Simple linear congruential generator. Constants from Knuth MMIX.
struct SimpleRng {
state: u64,
}
impl SimpleRng {
fn new(seed: u64) -> Self {
Self { state: seed }
}
fn next_u64(&mut self) -> u64 {
self.state = self
.state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
self.state
}
/// Uniform f64 in [0, 1).
fn next_f64(&mut self) -> f64 {
(self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
}
/// Uniform f32 in [0, 1).
#[allow(dead_code)]
fn next_f32(&mut self) -> f32 {
self.next_f64() as f32
}
}
// ---------------------------------------------------------------------------
// Zipf distribution sampler -- no external deps
// ---------------------------------------------------------------------------
/// Rejection-free inverse-CDF Zipf sampler.
struct ZipfSampler {
n: usize,
#[allow(dead_code)]
s: f64,
/// Cumulative distribution table (precomputed for inverse-CDF sampling).
cdf: Vec<f64>,
}
impl ZipfSampler {
fn new(n: usize, s: f64) -> Self {
let mut cdf = Vec::with_capacity(n);
let mut cumulative = 0.0f64;
for k in 1..=n {
cumulative += 1.0 / (k as f64).powf(s);
cdf.push(cumulative);
}
let total = cumulative;
for v in cdf.iter_mut() {
*v /= total;
}
Self { n, s, cdf }
}
/// Sample a value in [0, n). Uses binary search on the CDF.
fn sample(&self, rng: &mut SimpleRng) -> usize {
let u = rng.next_f64();
let mut lo = 0usize;
let mut hi = self.n;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.cdf[mid] < u {
lo = mid + 1;
} else {
hi = mid;
}
}
lo.min(self.n - 1)
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Generate deterministic pseudo-random f32 data in [-1, 1].
fn generate_f32_data(rng: &mut SimpleRng, len: usize) -> Vec<f32> {
(0..len)
.map(|_| rng.next_f64() as f32 * 2.0 - 1.0)
.collect()
}
/// Generate f32 data with guaranteed minimum magnitude (for quality tests).
/// Values are in [-1.0, -min_mag] union [min_mag, 1.0].
fn generate_f32_data_no_near_zero(rng: &mut SimpleRng, len: usize, min_mag: f32) -> Vec<f32> {
let range = 1.0 - min_mag;
(0..len)
.map(|_| {
let sign = if rng.next_u64() & 1 == 0 {
1.0f32
} else {
-1.0
};
let mag = min_mag + rng.next_f64() as f32 * range;
sign * mag
})
.collect()
}
/// Measure function execution over N iterations, return (total, per_iter).
fn bench_loop<F: FnMut()>(iters: u32, mut f: F) -> (std::time::Duration, std::time::Duration) {
let start = Instant::now();
for _ in 0..iters {
f();
}
let total = start.elapsed();
let per_iter = total / iters;
(total, per_iter)
}
// ---------------------------------------------------------------------------
// 1. Zipf Access Simulation (Acceptance Test)
// ---------------------------------------------------------------------------
/// Acceptance test: Zipf access simulation using the `tiering` module.
/// - 10,000 blocks (scaled down from 1M for test speed)
/// - 100,000 accesses (scaled down from 10M)
/// - PASS criteria:
/// 1. Tier1 count stays under cap (Zipf concentrates on a small hot head)
/// 2. Tier flips per block per minute < 0.1 (hysteresis dampens oscillation)
/// 3. P95 read latency within target
#[test]
fn zipf_acceptance_test() {
const NUM_BLOCKS: usize = 10_000;
const NUM_ACCESSES: usize = 100_000;
const TENSOR_LEN: u32 = 64;
let zipf = ZipfSampler::new(NUM_BLOCKS, 1.1);
let mut rng = SimpleRng::new(0xDEAD_BEEF);
// Pre-generate one frame per block
let mut block_frames: Vec<Vec<f32>> = Vec::with_capacity(NUM_BLOCKS);
for _ in 0..NUM_BLOCKS {
block_frames.push(generate_f32_data(&mut rng, TENSOR_LEN as usize));
}
let tier_config = TierConfig::default();
// Per-block state: tiering metadata + compressor + segments
struct BlockState {
meta: BlockMeta,
compressor: TemporalTensorCompressor,
segments: Vec<Vec<u8>>,
flip_count: u32,
last_tier: Tier,
}
let policy = TierPolicy::default();
let mut blocks: Vec<BlockState> = (0..NUM_BLOCKS)
.map(|_| {
let meta = BlockMeta::new(0);
let last_tier = meta.current_tier;
BlockState {
meta,
compressor: TemporalTensorCompressor::new(policy, TENSOR_LEN, 0),
segments: Vec::new(),
flip_count: 0,
last_tier,
}
})
.collect();
let mut read_latencies_ns: Vec<u64> = Vec::with_capacity(NUM_ACCESSES);
let sim_start = Instant::now();
for access_i in 0..NUM_ACCESSES {
let block_idx = zipf.sample(&mut rng);
let now = access_i as u64;
let block = &mut blocks[block_idx];
// Update tiering metadata
tiering::touch(&tier_config, now, &mut block.meta);
// Check for tier migration via hysteresis-guarded scoring
if let Some(new_tier) = tiering::choose_tier(&tier_config, now, &block.meta) {
block.meta.current_tier = new_tier;
block.meta.tier_since = now;
if new_tier != block.last_tier {
block.flip_count += 1;
block.last_tier = new_tier;
}
}
// Push frame through compressor
let bits = tiering::bits_for_tier(&tier_config, block.meta.current_tier, 0);
if bits > 0 {
// Sync compressor access state to match tier
let ts32 = now as u32;
block.compressor.touch(ts32);
let mut seg_out = Vec::new();
block
.compressor
.push_frame(&block_frames[block_idx], ts32, &mut seg_out);
if !seg_out.is_empty() {
block.segments.push(seg_out);
}
}
// Measure read latency (decode last segment)
let read_start = Instant::now();
if let Some(last_seg) = block.segments.last() {
let mut decoded = Vec::new();
segment::decode(last_seg, &mut decoded);
std::hint::black_box(&decoded);
}
read_latencies_ns.push(read_start.elapsed().as_nanos() as u64);
}
// Decay untouched blocks at end
let sim_elapsed = sim_start.elapsed();
// Flush all
for block in blocks.iter_mut() {
let mut seg_out = Vec::new();
block.compressor.flush(&mut seg_out);
if !seg_out.is_empty() {
block.segments.push(seg_out);
}
}
// --- Evaluate criteria ---
// 1. Tier distribution
let tier1_count = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier1)
.count();
let tier2_count = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier2)
.count();
let tier3_count = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier3)
.count();
// Under Zipf(1.1), ~20% of blocks receive ~80% of accesses. The hot set
// should be bounded. Use 40% as a generous cap (Zipf head + warm zone).
let tier1_cap = NUM_BLOCKS * 40 / 100;
// 2. Flip rate per block per simulated minute
let total_flips: u32 = blocks.iter().map(|b| b.flip_count).sum();
// Scale: 10,000 accesses = 1 simulated minute
let sim_minutes = NUM_ACCESSES as f64 / 10_000.0;
let flip_rate = if sim_minutes > 0.0 && NUM_BLOCKS > 0 {
total_flips as f64 / NUM_BLOCKS as f64 / sim_minutes
} else {
0.0
};
// 3. P95 read latency
read_latencies_ns.sort_unstable();
let p95_idx = (read_latencies_ns.len() as f64 * 0.95) as usize;
let p95_latency_ns = read_latencies_ns.get(p95_idx).copied().unwrap_or(0);
// --- Report ---
eprintln!();
eprintln!("--- Zipf Acceptance Test ---");
eprintln!();
eprintln!(" Blocks: {} Accesses: {}", NUM_BLOCKS, NUM_ACCESSES);
eprintln!(" Wall time: {:.2?}", sim_elapsed);
eprintln!(
" Tier1: {} Tier2: {} Tier3: {}",
tier1_count, tier2_count, tier3_count
);
eprintln!(
" Tier1 blocks: {} (cap: {}) {}",
tier1_count,
tier1_cap,
if tier1_count <= tier1_cap {
"PASS"
} else {
"FAIL"
}
);
eprintln!(
" Tier flip rate: {:.4}/block/min (threshold: 0.1) {}",
flip_rate,
if flip_rate < 0.1 { "PASS" } else { "FAIL" }
);
eprintln!(
" P95 read latency: {} ns {}",
p95_latency_ns,
if p95_latency_ns < 50_000 {
"PASS"
} else {
"WARN"
}
);
eprintln!();
assert!(
tier1_count <= tier1_cap,
"Tier1 count {} exceeds cap {}",
tier1_count,
tier1_cap
);
assert!(
flip_rate < 0.1,
"Tier flip rate {:.4}/block/min exceeds 0.1 threshold",
flip_rate
);
}
// ---------------------------------------------------------------------------
// 2. Quantize Microbenchmarks
// ---------------------------------------------------------------------------
/// Benchmark quantize + pack for different bit widths.
#[test]
fn bench_quantize_all_widths() {
const ELEM_COUNT: usize = 4096; // 16KB of f32
const ITERS: u32 = 1000;
const GROUP_LEN: usize = 64;
const RAW_BYTES: f64 = (ELEM_COUNT * 4) as f64;
let mut rng = SimpleRng::new(42);
let data = generate_f32_data(&mut rng, ELEM_COUNT);
eprintln!();
eprintln!("--- Temporal Tensor Store Benchmarks ---");
eprintln!();
eprintln!("Quantize (16KB block, {} iters):", ITERS);
for &bits in &[8u8, 7, 5, 3] {
let scales = quantizer::compute_scales(&data, GROUP_LEN, bits);
let scales_f32 = quantizer::scales_to_f32(&scales);
let mut packed = Vec::with_capacity(ELEM_COUNT);
let (_total, per_iter) = bench_loop(ITERS, || {
packed.clear();
quantizer::quantize_and_pack_f32(&data, &scales_f32, GROUP_LEN, bits, &mut packed);
std::hint::black_box(&packed);
});
let ns = per_iter.as_nanos();
let throughput_gbs = RAW_BYTES / (ns as f64);
eprintln!(
" {}-bit: {:>7} ns/iter ({:.2} GB/s)",
bits, ns, throughput_gbs
);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 3. Dequantize Microbenchmarks
// ---------------------------------------------------------------------------
/// Benchmark dequantize + unpack for different bit widths.
#[test]
fn bench_dequantize_all_widths() {
const ELEM_COUNT: usize = 4096;
const ITERS: u32 = 1000;
const GROUP_LEN: usize = 64;
const RAW_BYTES: f64 = (ELEM_COUNT * 4) as f64;
let mut rng = SimpleRng::new(42);
let data = generate_f32_data(&mut rng, ELEM_COUNT);
eprintln!("Dequantize (16KB block, {} iters):", ITERS);
for &bits in &[8u8, 7, 5, 3] {
let scales = quantizer::compute_scales(&data, GROUP_LEN, bits);
let scales_f32 = quantizer::scales_to_f32(&scales);
let mut packed = Vec::new();
quantizer::quantize_and_pack_f32(&data, &scales_f32, GROUP_LEN, bits, &mut packed);
let mut decoded = Vec::with_capacity(ELEM_COUNT);
let (_total, per_iter) = bench_loop(ITERS, || {
decoded.clear();
quantizer::dequantize_f32(
&packed,
&scales_f32,
GROUP_LEN,
bits,
ELEM_COUNT,
1,
&mut decoded,
);
std::hint::black_box(&decoded);
});
let ns = per_iter.as_nanos();
let throughput_gbs = RAW_BYTES / (ns as f64);
eprintln!(
" {}-bit: {:>7} ns/iter ({:.2} GB/s)",
bits, ns, throughput_gbs
);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 4. Bit Packing Microbenchmarks
// ---------------------------------------------------------------------------
/// Benchmark raw bit packing speed.
#[test]
fn bench_bitpack_speed() {
const COUNT: usize = 4096;
const ITERS: u32 = 1000;
eprintln!("Bitpack (4096 codes, {} iters):", ITERS);
for &bits in &[8u32, 7, 5, 3] {
let mask = (1u32 << bits) - 1;
let codes: Vec<u32> = (0..COUNT as u32).map(|i| i & mask).collect();
let mut packed = Vec::with_capacity(COUNT);
let (_total, per_iter) = bench_loop(ITERS, || {
packed.clear();
bitpack::pack(&codes, bits, &mut packed);
std::hint::black_box(&packed);
});
let ns = per_iter.as_nanos();
let raw_bytes = (COUNT * bits as usize).div_ceil(8);
let throughput_gbs = raw_bytes as f64 / (ns as f64);
eprintln!(
" {}-bit pack: {:>7} ns/iter ({:.2} GB/s output)",
bits, ns, throughput_gbs
);
// Unpack benchmark
let mut unpacked = Vec::with_capacity(COUNT);
let (_total, per_iter) = bench_loop(ITERS, || {
unpacked.clear();
bitpack::unpack(&packed, bits, COUNT, &mut unpacked);
std::hint::black_box(&unpacked);
});
let ns = per_iter.as_nanos();
let throughput_gbs = raw_bytes as f64 / (ns as f64);
eprintln!(
" {}-bit unpack: {:>7} ns/iter ({:.2} GB/s input)",
bits, ns, throughput_gbs
);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 5. Score Computation Benchmark
// ---------------------------------------------------------------------------
/// Benchmark score computation per block (tiering module).
#[test]
fn bench_score_computation() {
const ITERS: u32 = 100_000;
let config = TierConfig::default();
let mut rng = SimpleRng::new(99);
// Pre-generate block metadata with varied access patterns
let metas: Vec<BlockMeta> = (0..1000)
.map(|_| {
let mut m = BlockMeta::new(0);
m.ema_rate = (rng.next_u64() % 100) as f32 / 100.0;
m.access_window = rng.next_u64();
m.last_access = (rng.next_u64() % 10_000) as u64;
m.access_count = (rng.next_u64() % 1000) as u64;
m
})
.collect();
let start = Instant::now();
let mut score_sink = 0.0f32;
for i in 0..ITERS {
let idx = (i as usize) % 1000;
let now = metas[idx].last_access + 100;
let score = tiering::compute_score(&config, now, &metas[idx]);
score_sink += score;
}
let elapsed = start.elapsed();
std::hint::black_box(score_sink);
let ns_per_iter = elapsed.as_nanos() / ITERS as u128;
eprintln!("Score computation ({} iters):", ITERS);
eprintln!(" tiering::compute_score: {} ns/iter", ns_per_iter);
// Also benchmark the legacy TierPolicy::select_bits for comparison
let policy = TierPolicy::default();
let access_counts: Vec<u32> = (0..1000).map(|_| (rng.next_u64() % 1000) as u32).collect();
let timestamps: Vec<u32> = (0..1000)
.map(|_| (rng.next_u64() % 100_000) as u32)
.collect();
let start = Instant::now();
let mut bits_sink = 0u32;
for i in 0..ITERS {
let idx = (i as usize) % 1000;
let now_ts = timestamps[idx].wrapping_add(100);
let bits = policy.select_bits(access_counts[idx], timestamps[idx], now_ts);
bits_sink = bits_sink.wrapping_add(bits as u32);
}
let elapsed = start.elapsed();
std::hint::black_box(bits_sink);
let ns_per_iter = elapsed.as_nanos() / ITERS as u128;
eprintln!(" TierPolicy::select_bits: {} ns/iter", ns_per_iter);
eprintln!();
}
// ---------------------------------------------------------------------------
// 6. Quality Metrics Test
// ---------------------------------------------------------------------------
/// Verify reconstruction quality meets ADR targets.
///
/// Uses data with guaranteed minimum magnitude to avoid spurious relative
/// error spikes on near-zero values (where quantization step > |value|).
/// The ADR-023 error bounds apply to values with significant magnitude
/// relative to the group scale.
#[test]
fn quality_metrics_test() {
const ELEM_COUNT: usize = 4096;
const GROUP_LEN: usize = 64;
// Minimum magnitude: values are in [-1, -0.15] union [0.15, 1.0].
// This ensures all values are at least 15% of the max possible value,
// so the quantization step size is always small relative to the value.
const MIN_MAG: f32 = 0.15;
let mut rng = SimpleRng::new(12345);
let data = generate_f32_data_no_near_zero(&mut rng, ELEM_COUNT, MIN_MAG);
// ADR-023 max relative error bounds per tier.
// These bounds apply to values with |v| >= MIN_MAG.
let configs: &[(u8, f64, &str)] = &[
(8, 0.008, "0.80"), // 8-bit: <0.8%
(7, 0.016, "1.60"), // 7-bit: <1.6%
(5, 0.065, "6.50"), // 5-bit: <6.5%
(3, 0.30, "30.0"), // 3-bit: <30%
];
eprintln!("Quality:");
let mut all_pass = true;
for &(bits, max_rel_err_bound, label_pct) in configs {
let scales = quantizer::compute_scales(&data, GROUP_LEN, bits);
let scales_f32 = quantizer::scales_to_f32(&scales);
let mut packed = Vec::new();
quantizer::quantize_and_pack_f32(&data, &scales_f32, GROUP_LEN, bits, &mut packed);
let mut decoded = Vec::new();
quantizer::dequantize_f32(
&packed,
&scales_f32,
GROUP_LEN,
bits,
ELEM_COUNT,
1,
&mut decoded,
);
// Compute MSE and per-group max relative error.
// Relative error is measured against the group's scale (max |v|),
// which is the meaningful reference for quantization quality.
let mut sum_sq_err = 0.0f64;
let mut max_rel_err = 0.0f64;
let mut count_rel = 0usize;
for (group_idx, chunk) in data.chunks(GROUP_LEN).enumerate() {
// Group max magnitude (the reference for relative error)
let group_max: f32 = chunk.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
if group_max < 1e-10 {
continue;
}
let offset = group_idx * GROUP_LEN;
for (j, &orig) in chunk.iter().enumerate() {
let dec = decoded[offset + j];
let err = (orig - dec) as f64;
sum_sq_err += err * err;
// Relative error versus group max (the scale reference)
let rel = err.abs() / group_max as f64;
if rel > max_rel_err {
max_rel_err = rel;
}
count_rel += 1;
}
}
let mse = sum_sq_err / ELEM_COUNT as f64;
let pass = max_rel_err < max_rel_err_bound;
let status = if pass { "PASS" } else { "FAIL" };
if !pass {
all_pass = false;
}
eprintln!(
" {}-bit MSE: {:.6} max_rel_err: {:.2}% (bound: {}%) {} (samples: {})",
bits,
mse,
max_rel_err * 100.0,
label_pct,
status,
count_rel,
);
}
eprintln!();
assert!(
all_pass,
"One or more quality checks failed -- see output above"
);
}
// ---------------------------------------------------------------------------
// 7. Adversarial Access Pattern Test
// ---------------------------------------------------------------------------
/// Test graceful degradation under adversarial access using the `tiering`
/// module's hysteresis and minimum-residency guards.
///
/// Simulates blocks whose access scores hover near the Tier1/Tier2 boundary.
/// Without hysteresis, small noise would cause continuous oscillation.
/// With hysteresis + min_residency, the flip rate should stay below threshold.
///
/// The test runs two configurations:
/// 1. Noisy-boundary: scores jitter around the t1 threshold (0.7)
/// 2. Burst-noise: stable cold blocks hit by brief access bursts
///
/// Both should have tier flips < 0.1/block/min.
#[test]
fn adversarial_access_test() {
const NUM_BLOCKS: usize = 100;
const TOTAL_TICKS: u64 = 10_000;
let config = TierConfig {
hysteresis: 0.05,
min_residency: 10,
..TierConfig::default()
};
let mut rng = SimpleRng::new(0xCAFE);
struct AdversarialBlock {
meta: BlockMeta,
flip_count: u32,
last_tier: Tier,
}
let mut blocks: Vec<AdversarialBlock> = (0..NUM_BLOCKS)
.map(|_| {
let meta = BlockMeta::new(0);
let last_tier = meta.current_tier;
AdversarialBlock {
meta,
flip_count: 0,
last_tier,
}
})
.collect();
// Warm up blocks so their scores sit near the Tier1/Tier2 boundary.
// The t1 threshold is 0.7. We want ema_rate to hover near a value
// where the composite score is close to 0.7.
for block in blocks.iter_mut() {
block.meta.ema_rate = 0.65;
block.meta.access_window = 0xFFFF_FFFF_0000_0000; // half bits set
block.meta.last_access = 0;
block.meta.current_tier = Tier::Tier2;
block.meta.tier_since = 0;
}
for tick in 1..=TOTAL_TICKS {
for block in blocks.iter_mut() {
// Adversarial pattern: randomly touch ~50% of blocks each tick,
// creating a noisy signal near the boundary. Some blocks will
// have their score bump above t1, others below -- the noise
// should be absorbed by hysteresis.
let pseudo_rand = rng.next_u64();
if pseudo_rand % 2 == 0 {
tiering::touch(&config, tick, &mut block.meta);
} else {
tiering::tick_decay(&config, &mut block.meta);
}
// Attempt tier migration (hysteresis should absorb boundary noise)
if let Some(new_tier) = tiering::choose_tier(&config, tick, &block.meta) {
block.meta.current_tier = new_tier;
block.meta.tier_since = tick;
if new_tier != block.last_tier {
block.flip_count += 1;
block.last_tier = new_tier;
}
}
}
}
let total_flips: u32 = blocks.iter().map(|b| b.flip_count).sum();
let max_flips_per_block = blocks.iter().map(|b| b.flip_count).max().unwrap_or(0);
// Scale: 1000 ticks = 1 simulated minute
let sim_minutes = TOTAL_TICKS as f64 / 1000.0;
let flip_rate = if sim_minutes > 0.0 && NUM_BLOCKS > 0 {
total_flips as f64 / NUM_BLOCKS as f64 / sim_minutes
} else {
0.0
};
eprintln!("--- Adversarial Access Test ---");
eprintln!();
eprintln!(
" Blocks: {} Ticks: {} ({:.1} sim minutes)",
NUM_BLOCKS, TOTAL_TICKS, sim_minutes
);
eprintln!(
" Total flips: {} max/block: {}",
total_flips, max_flips_per_block
);
eprintln!(
" Flip rate: {:.4}/block/min (threshold: 0.1) {}",
flip_rate,
if flip_rate < 0.1 { "PASS" } else { "FAIL" }
);
// Also report tier distribution at end
let tier1 = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier1)
.count();
let tier2 = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier2)
.count();
let tier3 = blocks
.iter()
.filter(|b| b.meta.current_tier == Tier::Tier3)
.count();
eprintln!(" Final tiers: T1={} T2={} T3={}", tier1, tier2, tier3);
eprintln!();
assert!(
flip_rate < 0.1,
"Adversarial flip rate {:.4}/block/min exceeds 0.1 threshold \
(total_flips={}, max/block={})",
flip_rate,
total_flips,
max_flips_per_block
);
}
// ---------------------------------------------------------------------------
// 8. Segment encode/decode round-trip benchmark
// ---------------------------------------------------------------------------
/// Benchmark full segment encode + decode cycle.
#[test]
fn bench_segment_roundtrip() {
const TENSOR_LEN: u32 = 256;
const FRAME_COUNT: usize = 16;
const ITERS: u32 = 500;
let policy = TierPolicy::default();
let mut rng = SimpleRng::new(777);
let frames: Vec<Vec<f32>> = (0..FRAME_COUNT)
.map(|_| generate_f32_data(&mut rng, TENSOR_LEN as usize))
.collect();
eprintln!(
"Segment round-trip ({} frames x {} elements, {} iters):",
FRAME_COUNT, TENSOR_LEN, ITERS
);
for &bits in &[8u8, 7, 5, 3] {
let mut comp = TemporalTensorCompressor::new(policy, TENSOR_LEN, 0);
if bits == 8 {
comp.set_access(1000, 0);
} else if bits == 7 {
comp.set_access(10, 0);
} else if bits == 5 {
let p5 = TierPolicy {
warm_bits: 5,
..policy
};
comp = TemporalTensorCompressor::new(p5, TENSOR_LEN, 0);
comp.set_access(10, 0);
}
// bits==3: default (cold)
let mut seg = Vec::new();
for (i, frame) in frames.iter().enumerate() {
comp.push_frame(frame, (i + 1) as u32, &mut seg);
}
comp.flush(&mut seg);
if seg.is_empty() {
eprintln!(" {}-bit: (no segment produced, skipping)", bits);
continue;
}
let seg_bytes = seg.len();
let raw_bytes = TENSOR_LEN as usize * FRAME_COUNT * 4;
let mut decoded = Vec::with_capacity(TENSOR_LEN as usize * FRAME_COUNT);
let (_total, per_iter) = bench_loop(ITERS, || {
decoded.clear();
segment::decode(&seg, &mut decoded);
std::hint::black_box(&decoded);
});
let ns = per_iter.as_nanos();
let ratio = raw_bytes as f64 / seg_bytes as f64;
let throughput_gbs = raw_bytes as f64 / (ns as f64);
eprintln!(
" {}-bit decode: {:>7} ns/iter ({:.2} GB/s) ratio: {:.2}x seg: {} bytes",
bits, ns, throughput_gbs, ratio, seg_bytes
);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 9. Compressor throughput benchmark
// ---------------------------------------------------------------------------
/// Benchmark the full compressor push_frame path.
#[test]
fn bench_compressor_throughput() {
const TENSOR_LEN: u32 = 256;
const FRAMES: usize = 10_000;
let policy = TierPolicy::default();
let mut rng = SimpleRng::new(0xBEEF);
let frame = generate_f32_data(&mut rng, TENSOR_LEN as usize);
eprintln!(
"Compressor throughput ({} elements x {} frames):",
TENSOR_LEN, FRAMES
);
for &(label, access_count) in &[("hot/8-bit", 1000u32), ("cold/3-bit", 0)] {
let mut comp = TemporalTensorCompressor::new(policy, TENSOR_LEN, 0);
comp.set_access(access_count, 0);
let mut seg = Vec::new();
let mut total_segments = 0usize;
let start = Instant::now();
for i in 0..FRAMES {
comp.push_frame(&frame, (i + 1) as u32, &mut seg);
if !seg.is_empty() {
total_segments += 1;
}
}
comp.flush(&mut seg);
if !seg.is_empty() {
total_segments += 1;
}
let elapsed = start.elapsed();
let raw_bytes = TENSOR_LEN as usize * 4 * FRAMES;
let ns_total = elapsed.as_nanos();
let ns_per_frame = ns_total / FRAMES as u128;
let throughput_gbs = raw_bytes as f64 / (ns_total as f64);
eprintln!(
" {}: {} ns/frame ({:.2} GB/s) segments: {}",
label, ns_per_frame, throughput_gbs, total_segments
);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 10. Single-frame random-access decode benchmark
// ---------------------------------------------------------------------------
/// Benchmark single-frame decode (random access into a segment).
#[test]
fn bench_single_frame_decode() {
const TENSOR_LEN: u32 = 256;
const FRAME_COUNT: usize = 64;
const ITERS: u32 = 2000;
let policy = TierPolicy::default();
let mut rng = SimpleRng::new(0xF00D);
let mut comp = TemporalTensorCompressor::new(policy, TENSOR_LEN, 0);
comp.set_access(1000, 0);
let frame = generate_f32_data(&mut rng, TENSOR_LEN as usize);
let mut seg = Vec::new();
for i in 0..FRAME_COUNT {
comp.push_frame(&frame, (i + 1) as u32, &mut seg);
}
comp.flush(&mut seg);
if seg.is_empty() {
eprintln!("Single-frame decode: no segment produced, skipping");
return;
}
eprintln!(
"Single-frame decode ({} frames in segment, {} iters):",
FRAME_COUNT, ITERS
);
for &frame_idx in &[0usize, FRAME_COUNT / 2, FRAME_COUNT - 1] {
let (_total, per_iter) = bench_loop(ITERS, || {
let result = segment::decode_single_frame(&seg, frame_idx);
std::hint::black_box(&result);
});
let ns = per_iter.as_nanos();
eprintln!(" frame[{}]: {} ns/iter", frame_idx, ns);
}
eprintln!();
}
// ---------------------------------------------------------------------------
// 11. Tiering candidate selection benchmark
// ---------------------------------------------------------------------------
/// Benchmark tiering candidate selection with many blocks.
#[test]
fn bench_tiering_candidate_selection() {
const NUM_BLOCKS: usize = 10_000;
const ITERS: u32 = 100;
let config = TierConfig::default();
let mut rng = SimpleRng::new(0xABCD);
// Create varied block metadata
let metas: Vec<BlockMeta> = (0..NUM_BLOCKS)
.map(|_| {
let mut m = BlockMeta::new(0);
m.ema_rate = rng.next_f64() as f32;
m.access_window = rng.next_u64();
m.last_access = (rng.next_u64() % 500) as u64;
m.current_tier = match rng.next_u64() % 3 {
0 => Tier::Tier1,
1 => Tier::Tier2,
_ => Tier::Tier3,
};
m.tier_since = 0;
m
})
.collect();
let block_refs: Vec<(BlockKey, &BlockMeta)> = metas
.iter()
.enumerate()
.map(|(i, m)| (BlockKey(i as u64), m))
.collect();
let now = 1000u64;
let mut total_candidates = 0usize;
let (_total, per_iter) = bench_loop(ITERS, || {
let candidates = tiering::select_candidates(&config, now, &block_refs);
total_candidates += candidates.len();
std::hint::black_box(&candidates);
});
let ns = per_iter.as_nanos();
let avg_candidates = total_candidates / ITERS as usize;
eprintln!(
"Tiering candidate selection ({} blocks, {} iters):",
NUM_BLOCKS, ITERS
);
eprintln!(" {} ns/iter ({} avg candidates)", ns, avg_candidates);
eprintln!();
}
// ---------------------------------------------------------------------------
// Summary printer (runs last alphabetically)
// ---------------------------------------------------------------------------
/// Print a summary separator. Run this test last with `--nocapture`.
#[test]
fn z_summary() {
eprintln!();
eprintln!("=== All temporal tensor benchmarks complete ===");
eprintln!();
}