feat: Docker images, RVF export, and README update

- Add docker/ folder with Dockerfile.rust (132MB), Dockerfile.python (569MB),
  and docker-compose.yml
- Remove stale root-level Dockerfile and docker-compose files
- Implement --export-rvf CLI flag for standalone RVF package generation
- Generate wifi-densepose-v1.rvf (13KB) with model weights, vital config,
  SONA profile, and training provenance
- Update README with Docker pull/run commands and RVF export instructions
- Update test count to 542+ and fix Docker port mappings
- Reply to issues #43, #44, #45 with Docker/RVF availability

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruv
2026-02-28 23:44:30 -05:00
parent fc409dfd6a
commit add9f192aa
14 changed files with 533 additions and 701 deletions

View File

@@ -260,16 +260,45 @@ struct ModelLayer {
sparse: Option<SparseLinear>,
profiler: NeuronProfiler,
is_sparse: bool,
/// Quantized weights per row (populated by apply_quantization).
quantized: Option<Vec<QuantizedWeights>>,
/// Whether to use quantized weights for forward pass.
use_quantized: bool,
}
impl ModelLayer {
fn new(name: &str, weights: Vec<Vec<f32>>, bias: Vec<f32>) -> Self {
let n = weights.len();
Self { name: name.into(), weights, bias, sparse: None, profiler: NeuronProfiler::new(n), is_sparse: false }
Self {
name: name.into(), weights, bias, sparse: None,
profiler: NeuronProfiler::new(n), is_sparse: false,
quantized: None, use_quantized: false,
}
}
fn forward_dense(&self, input: &[f32]) -> Vec<f32> {
if self.use_quantized {
if let Some(ref qrows) = self.quantized {
return self.forward_quantized(input, qrows);
}
}
self.weights.iter().enumerate().map(|(r, row)| dot_bias(row, input, self.bias[r])).collect()
}
/// Forward using dequantized weights: val = q_val * scale (symmetric).
fn forward_quantized(&self, input: &[f32], qrows: &[QuantizedWeights]) -> Vec<f32> {
let n_out = qrows.len().min(self.bias.len());
let mut out = vec![0.0f32; n_out];
for r in 0..n_out {
let qw = &qrows[r];
let len = qw.data.len().min(input.len());
let mut s = self.bias[r];
for i in 0..len {
let w = (qw.data[i] as f32 - qw.zero_point as f32) * qw.scale;
s += w * input[i];
}
out[r] = s;
}
out
}
fn forward(&self, input: &[f32]) -> Vec<f32> {
if self.is_sparse { if let Some(ref s) = self.sparse { return s.forward(input); } }
self.forward_dense(input)
@@ -327,11 +356,20 @@ impl SparseModel {
}
}
/// Quantize weights (stores metadata; actual inference uses original weights).
/// Quantize weights using INT8 codebook per the config. After this call,
/// forward() uses dequantized weights (val = (q - zero_point) * scale).
pub fn apply_quantization(&mut self) {
// Quantization metadata is computed per the config but the sparse forward
// path uses the original f32 weights for simplicity in this implementation.
// The stats() method reflects the memory savings.
for layer in &mut self.layers {
let qrows: Vec<QuantizedWeights> = layer.weights.iter().map(|row| {
match self.config.quant_mode {
QuantMode::Int8Symmetric => Quantizer::quantize_symmetric(row),
QuantMode::Int8Asymmetric => Quantizer::quantize_asymmetric(row),
_ => Quantizer::quantize_symmetric(row),
}
}).collect();
layer.quantized = Some(qrows);
layer.use_quantized = true;
}
}
/// Forward pass through all layers with ReLU activation.