Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

404
crates/ruvllm/tests/fixtures/mod.rs vendored Normal file
View File

@@ -0,0 +1,404 @@
//! Test Fixtures for RuvLTRA-Small
//!
//! This module provides test fixtures including sample prompts, expected patterns,
//! and perplexity baselines for validating the RuvLTRA-Small inference engine.
use std::collections::HashMap;
// ============================================================================
// Sample Prompts
// ============================================================================
/// Collection of test prompts organized by category
pub mod prompts {
/// Simple text completion prompts
pub mod completion {
pub const QUICK_BROWN_FOX: &str = "The quick brown fox";
pub const ONCE_UPON_A_TIME: &str = "Once upon a time";
pub const IN_THE_BEGINNING: &str = "In the beginning";
pub const IT_WAS_A_DARK: &str = "It was a dark and stormy night";
}
/// Instruction-following prompts
pub mod instruction {
pub const WRITE_HAIKU: &str = "Write a haiku about programming:";
pub const EXPLAIN_GRAVITY: &str = "Explain gravity in simple terms:";
pub const LIST_PLANETS: &str = "List the planets in our solar system:";
pub const DESCRIBE_OCEAN: &str = "Describe the ocean in three sentences:";
}
/// Question-answering prompts
pub mod qa {
pub const CAPITAL_FRANCE: &str = "Q: What is the capital of France?\nA:";
pub const TWO_PLUS_TWO: &str = "Q: What is 2 + 2?\nA:";
pub const COLOR_SKY: &str = "Q: What color is the sky?\nA:";
pub const LARGEST_PLANET: &str = "Q: What is the largest planet in our solar system?\nA:";
}
/// Code generation prompts
pub mod code {
pub const FIBONACCI: &str = "def fibonacci(n):\n '''Return the nth Fibonacci number.'''\n";
pub const HELLO_WORLD: &str = "# Python function to print hello world\ndef hello():";
pub const FACTORIAL: &str = "def factorial(n):\n '''Return n factorial.'''\n";
pub const SORT_LIST: &str = "def sort_list(items):\n '''Sort a list in ascending order.'''\n";
}
/// Conversation/chat prompts
pub mod conversation {
pub const GREETING: &str = "User: Hello!\nAssistant:";
pub const TELL_JOKE: &str = "User: Tell me a joke.\nAssistant:";
pub const WEATHER: &str = "User: What's the weather like today?\nAssistant:";
pub const HELP: &str = "User: Can you help me?\nAssistant:";
}
/// Edge case prompts
pub mod edge_cases {
pub const EMPTY: &str = "";
pub const SINGLE_CHAR: &str = "A";
pub const SINGLE_WORD: &str = "Hello";
pub const SPECIAL_CHARS: &str = "Translate: \"Hello, world!\" ->";
pub const UNICODE: &str = "\u{4f60}\u{597d}\u{4e16}\u{754c}"; // 你好世界
pub const NUMBERS_ONLY: &str = "1 2 3 4 5";
pub const VERY_LONG: &str = "The quick brown fox jumps over the lazy dog. \
The quick brown fox jumps over the lazy dog. \
The quick brown fox jumps over the lazy dog. \
The quick brown fox jumps over the lazy dog. \
The quick brown fox jumps over the lazy dog. \
Continue:";
}
}
// ============================================================================
// Expected Output Patterns
// ============================================================================
/// Expected patterns in generated outputs
pub mod expected_patterns {
/// Patterns expected after "The quick brown fox"
pub const FOX_COMPLETION: &[&str] = &[
"jumps", "jumped", "runs", "ran", "over", "the", "lazy", "dog"
];
/// Patterns expected in haiku responses
pub const HAIKU_PATTERNS: &[&str] = &[
"code", "bug", "compile", "debug", "screen", "night", "lines", "function"
];
/// Capital of France
pub const FRANCE_CAPITAL: &str = "Paris";
/// Answer to 2+2
pub const TWO_PLUS_TWO: &str = "4";
/// Patterns in Fibonacci code
pub const FIBONACCI_PATTERNS: &[&str] = &[
"return", "if", "else", "n", "<=", "1", "+", "fibonacci"
];
/// Patterns in greeting responses
pub const GREETING_PATTERNS: &[&str] = &[
"hello", "hi", "hey", "how", "help", "assist", "welcome"
];
/// Patterns in factorial code
pub const FACTORIAL_PATTERNS: &[&str] = &[
"return", "if", "n", "<=", "1", "*", "factorial"
];
}
// ============================================================================
// Perplexity Baselines
// ============================================================================
/// Perplexity baseline values for quality validation
pub mod perplexity {
/// Maximum acceptable perplexity for coherent output
pub const MAX_ACCEPTABLE: f32 = 50.0;
/// Warning threshold for elevated perplexity
pub const WARNING_THRESHOLD: f32 = 30.0;
/// Excellent perplexity (high-quality output)
pub const EXCELLENT: f32 = 15.0;
/// Expected perplexity ranges by task type
pub mod task_ranges {
/// Simple completion: low perplexity expected
pub const COMPLETION: (f32, f32) = (5.0, 20.0);
/// Code generation: moderate perplexity
pub const CODE: (f32, f32) = (8.0, 30.0);
/// Creative writing: higher perplexity acceptable
pub const CREATIVE: (f32, f32) = (15.0, 45.0);
/// Factual QA: low perplexity (confident answers)
pub const FACTUAL: (f32, f32) = (3.0, 15.0);
}
/// Quantization degradation limits
pub mod degradation {
/// Max perplexity increase from quantization (%)
pub const MAX_INCREASE_PCT: f32 = 20.0;
/// Q4_K expected degradation from F16 (%)
pub const Q4K_EXPECTED: f32 = 15.0;
/// Q8_0 expected degradation from F16 (%)
pub const Q8_EXPECTED: f32 = 3.0;
}
}
// ============================================================================
// Token Probability Thresholds
// ============================================================================
/// Thresholds for token probability validation
pub mod probability_thresholds {
/// Minimum probability for top-1 token
pub const MIN_TOP1: f32 = 0.01;
/// Minimum cumulative probability for top-5 tokens
pub const MIN_TOP5_CUMULATIVE: f32 = 0.1;
/// Maximum entropy for non-degenerate output
pub const MAX_ENTROPY: f32 = 10.0;
/// Minimum confidence for factual answers
pub const MIN_FACTUAL_CONFIDENCE: f32 = 0.5;
}
// ============================================================================
// Coherence Metrics
// ============================================================================
/// Coherence validation thresholds
pub mod coherence {
/// Maximum consecutive word repetitions
pub const MAX_CONSECUTIVE_REPEATS: usize = 3;
/// Maximum n-gram repetition ratio
pub const MAX_NGRAM_REPETITION: f32 = 0.3;
/// Minimum alphanumeric ratio for valid text
pub const MIN_ALPHANUMERIC_RATIO: f32 = 0.7;
/// Maximum special character ratio
pub const MAX_SPECIAL_CHAR_RATIO: f32 = 0.2;
/// Sentence length bounds
pub const MIN_SENTENCE_LENGTH: usize = 3;
pub const MAX_SENTENCE_LENGTH: usize = 200;
}
// ============================================================================
// Performance Baselines
// ============================================================================
/// Performance baseline values
pub mod performance {
/// Tokens per second baselines by device
pub mod tokens_per_second {
/// M4 Pro with ANE
pub const M4_PRO_ANE: f32 = 60.0;
/// M4 Pro NEON only
pub const M4_PRO_NEON: f32 = 45.0;
/// M1 with ANE
pub const M1_ANE: f32 = 40.0;
/// x86 CPU (AVX2)
pub const X86_AVX2: f32 = 15.0;
}
/// Latency thresholds (milliseconds)
pub mod latency_ms {
/// Maximum time to first token
pub const MAX_FIRST_TOKEN: u64 = 500;
/// Maximum inter-token latency
pub const MAX_INTER_TOKEN: u64 = 100;
/// Target inter-token latency
pub const TARGET_INTER_TOKEN: u64 = 20;
}
/// Memory thresholds (bytes)
pub mod memory {
/// Maximum model memory (Q4_K)
pub const MAX_MODEL_Q4K: usize = 1_500_000_000;
/// Maximum KV cache memory
pub const MAX_KV_CACHE: usize = 500_000_000;
/// Maximum working memory
pub const MAX_WORKING: usize = 200_000_000;
}
}
// ============================================================================
// Test Data Generators
// ============================================================================
/// Generate a long prompt of specified length
pub fn generate_long_prompt(word_count: usize) -> String {
let words = [
"the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
"and", "then", "runs", "around", "park", "with", "great", "joy"
];
(0..word_count)
.map(|i| words[i % words.len()])
.collect::<Vec<_>>()
.join(" ")
}
/// Generate a sequence of numbers for pattern completion tests
pub fn generate_number_sequence(start: i32, count: usize) -> String {
(start..start + count as i32)
.map(|n| n.to_string())
.collect::<Vec<_>>()
.join(", ")
}
/// Generate a repeated pattern prompt
pub fn generate_repetition_prompt(word: &str, count: usize) -> String {
vec![word; count].join(" ")
}
// ============================================================================
// Validation Helpers
// ============================================================================
/// Check if output contains any of the expected patterns
pub fn contains_expected_pattern(output: &str, patterns: &[&str]) -> bool {
let output_lower = output.to_lowercase();
patterns.iter().any(|p| output_lower.contains(&p.to_lowercase()))
}
/// Calculate repetition ratio for n-grams
pub fn calculate_ngram_repetition(text: &str, n: usize) -> f32 {
let words: Vec<&str> = text.split_whitespace().collect();
if words.len() < n {
return 0.0;
}
let total_ngrams = words.len() - n + 1;
let mut ngram_counts: HashMap<Vec<&str>, usize> = HashMap::new();
for window in words.windows(n) {
*ngram_counts.entry(window.to_vec()).or_insert(0) += 1;
}
let repeated = ngram_counts.values().filter(|&&c| c > 1).sum::<usize>();
repeated as f32 / total_ngrams as f32
}
/// Count consecutive word repetitions
pub fn count_consecutive_repeats(text: &str) -> usize {
let words: Vec<&str> = text.split_whitespace().collect();
let mut max_repeats = 0;
let mut current_repeats = 0;
for i in 1..words.len() {
if words[i] == words[i - 1] {
current_repeats += 1;
max_repeats = max_repeats.max(current_repeats);
} else {
current_repeats = 0;
}
}
max_repeats
}
/// Calculate alphanumeric ratio
pub fn alphanumeric_ratio(text: &str) -> f32 {
if text.is_empty() {
return 0.0;
}
let alphanumeric = text.chars()
.filter(|c| c.is_alphanumeric())
.count();
alphanumeric as f32 / text.len() as f32
}
/// Check if text passes basic coherence checks
pub fn is_coherent(text: &str) -> bool {
// Check alphanumeric ratio
if alphanumeric_ratio(text) < coherence::MIN_ALPHANUMERIC_RATIO {
return false;
}
// Check repetition
if count_consecutive_repeats(text) > coherence::MAX_CONSECUTIVE_REPEATS {
return false;
}
// Check n-gram repetition
if calculate_ngram_repetition(text, 3) > coherence::MAX_NGRAM_REPETITION {
return false;
}
true
}
// ============================================================================
// Tests for Fixtures Module
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_long_prompt() {
let prompt = generate_long_prompt(100);
let word_count = prompt.split_whitespace().count();
assert_eq!(word_count, 100);
}
#[test]
fn test_generate_number_sequence() {
let seq = generate_number_sequence(1, 5);
assert_eq!(seq, "1, 2, 3, 4, 5");
}
#[test]
fn test_contains_expected_pattern() {
let output = "The fox jumps over the lazy dog";
assert!(contains_expected_pattern(output, expected_patterns::FOX_COMPLETION));
}
#[test]
fn test_ngram_repetition() {
let no_repeat = "the quick brown fox jumps over";
assert!(calculate_ngram_repetition(no_repeat, 2) < 0.1);
let high_repeat = "the the the the the the";
assert!(calculate_ngram_repetition(high_repeat, 2) > 0.5);
}
#[test]
fn test_consecutive_repeats() {
assert_eq!(count_consecutive_repeats("hello world"), 0);
assert_eq!(count_consecutive_repeats("hello hello world"), 1);
assert_eq!(count_consecutive_repeats("hello hello hello"), 2);
}
#[test]
fn test_alphanumeric_ratio() {
assert!(alphanumeric_ratio("Hello World") > 0.8);
assert!(alphanumeric_ratio("!@#$%^&*()") < 0.1);
}
#[test]
fn test_coherence_check() {
assert!(is_coherent("The quick brown fox jumps over the lazy dog."));
assert!(!is_coherent("!@#$%^&*()!@#$%^&*()!@#$%^&*()"));
assert!(!is_coherent("the the the the the the the"));
}
}

View File

@@ -0,0 +1,161 @@
{
"metadata": {
"version": "1.0.0",
"description": "Perplexity baselines for RuvLTRA-Small quality validation",
"model": "ruvltra-small",
"quantization_tested": ["Q4_K", "Q5_K", "Q8_0", "F16"],
"last_updated": "2024-01-19"
},
"quality_thresholds": {
"max_acceptable_perplexity": 50.0,
"warning_perplexity": 30.0,
"excellent_perplexity": 15.0,
"notes": "Perplexity values vary by dataset and prompt type"
},
"baselines": {
"wikitext": {
"description": "WikiText-2 test set perplexity",
"dataset_url": "https://huggingface.co/datasets/wikitext",
"values": {
"F16": {
"perplexity": 8.5,
"tokens_evaluated": 250000,
"notes": "Full precision baseline"
},
"Q8_0": {
"perplexity": 8.7,
"degradation_pct": 2.4,
"notes": "8-bit quantization, minimal quality loss"
},
"Q5_K": {
"perplexity": 9.2,
"degradation_pct": 8.2,
"notes": "5-bit k-quant, good balance"
},
"Q4_K": {
"perplexity": 9.8,
"degradation_pct": 15.3,
"notes": "4-bit k-quant, most common deployment format"
},
"Q2_K": {
"perplexity": 14.5,
"degradation_pct": 70.6,
"notes": "2-bit extreme quantization, noticeable degradation"
}
}
},
"lambada": {
"description": "LAMBADA last-word prediction accuracy",
"metric": "accuracy",
"values": {
"F16": {
"accuracy": 0.72,
"notes": "Full precision accuracy"
},
"Q4_K": {
"accuracy": 0.68,
"degradation_pct": 5.6,
"notes": "Slight accuracy drop acceptable"
}
}
},
"hellaswag": {
"description": "HellaSwag commonsense reasoning",
"metric": "accuracy",
"values": {
"F16": {
"accuracy": 0.68
},
"Q4_K": {
"accuracy": 0.65,
"degradation_pct": 4.4
}
}
},
"custom_prompts": {
"description": "Perplexity on custom test prompts",
"values": {
"simple_completion": {
"expected_ppl_range": [5.0, 20.0],
"notes": "Common phrase continuation should have low perplexity"
},
"code_generation": {
"expected_ppl_range": [8.0, 30.0],
"notes": "Code has higher entropy but should still be coherent"
},
"creative_writing": {
"expected_ppl_range": [15.0, 45.0],
"notes": "Creative tasks have higher acceptable perplexity"
},
"factual_qa": {
"expected_ppl_range": [3.0, 15.0],
"notes": "Factual responses should be confident"
}
}
}
},
"degradation_limits": {
"max_perplexity_increase_pct": 20.0,
"max_accuracy_decrease_pct": 10.0,
"notes": "Quantization should not degrade quality beyond these limits"
},
"token_probability_thresholds": {
"min_top1_probability": 0.01,
"min_top5_cumulative": 0.1,
"max_entropy": 10.0,
"notes": "Thresholds for detecting garbled or degenerate output"
},
"repetition_metrics": {
"max_ngram_repetition_ratio": 0.3,
"max_consecutive_repeats": 3,
"ngram_window_sizes": [2, 3, 4],
"notes": "Detect excessive repetition in generated text"
},
"coherence_metrics": {
"min_sentence_length": 3,
"max_sentence_length": 200,
"punctuation_ratio_range": [0.01, 0.15],
"alphanumeric_ratio_min": 0.7,
"notes": "Basic structural coherence checks"
},
"speed_baselines": {
"description": "Token generation speed baselines (tokens/second)",
"device_baselines": {
"m4_pro_ane": {
"prompt_processing": 2000,
"generation": 60,
"notes": "M4 Pro with ANE acceleration"
},
"m4_pro_neon": {
"prompt_processing": 1500,
"generation": 45,
"notes": "M4 Pro NEON-only fallback"
},
"m1_ane": {
"prompt_processing": 1200,
"generation": 40,
"notes": "M1 with ANE"
},
"cpu_x86": {
"prompt_processing": 500,
"generation": 15,
"notes": "x86 CPU baseline (AVX2)"
}
}
},
"memory_baselines": {
"model_sizes_mb": {
"F16": 4000,
"Q8_0": 2200,
"Q4_K": 1200,
"Q2_K": 700
},
"kv_cache_per_token_bytes": {
"F16": 1100,
"Q8_0": 1100,
"notes": "KV cache typically stays in F16 for accuracy"
},
"peak_memory_multiplier": 1.5,
"notes": "Peak memory = model_size * multiplier during inference"
}
}

View File

@@ -0,0 +1,191 @@
{
"metadata": {
"version": "1.0.0",
"description": "Test prompts for RuvLTRA-Small validation",
"model": "ruvltra-small",
"last_updated": "2024-01-19"
},
"prompts": {
"simple_completion": {
"id": "simple_001",
"category": "completion",
"prompt": "The quick brown fox",
"expected_patterns": ["jumps", "jumped", "runs", "ran", "over", "lazy"],
"max_tokens": 50,
"temperature": 0.7,
"notes": "Classic completion test for basic language modeling"
},
"instruction_haiku": {
"id": "instruction_001",
"category": "instruction",
"prompt": "Write a haiku about programming:",
"expected_patterns": ["code", "bug", "compile", "debug", "screen", "night", "lines", "function"],
"max_tokens": 100,
"temperature": 0.8,
"notes": "Tests instruction-following ability"
},
"qa_capital": {
"id": "qa_001",
"category": "question_answering",
"prompt": "Q: What is the capital of France?\nA:",
"expected_output": "Paris",
"max_tokens": 20,
"temperature": 0.1,
"notes": "Simple factual QA with deterministic expected output"
},
"qa_math": {
"id": "qa_002",
"category": "question_answering",
"prompt": "Q: What is 2 + 2?\nA:",
"expected_output": "4",
"max_tokens": 10,
"temperature": 0.0,
"notes": "Simple math QA"
},
"code_fibonacci": {
"id": "code_001",
"category": "code_generation",
"prompt": "def fibonacci(n):\n '''Return the nth Fibonacci number.'''\n",
"expected_patterns": ["return", "if", "else", "n", "<=", "1", "+", "fibonacci"],
"max_tokens": 150,
"temperature": 0.3,
"notes": "Code generation with expected structural patterns"
},
"code_hello_world": {
"id": "code_002",
"category": "code_generation",
"prompt": "# Python function to print hello world\ndef",
"expected_patterns": ["print", "hello", "world", "def"],
"max_tokens": 50,
"temperature": 0.2,
"notes": "Simple code generation"
},
"conversation_greeting": {
"id": "conv_001",
"category": "conversation",
"prompt": "User: Hello!\nAssistant:",
"expected_patterns": ["hello", "hi", "how", "help", "can", "assist"],
"max_tokens": 50,
"temperature": 0.7,
"notes": "Basic conversation response"
},
"conversation_joke": {
"id": "conv_002",
"category": "conversation",
"prompt": "User: Tell me a joke.\nAssistant:",
"expected_patterns": ["why", "what", "because", "knock", "chicken"],
"max_tokens": 100,
"temperature": 0.9,
"notes": "Creative response generation"
},
"summarization": {
"id": "summary_001",
"category": "summarization",
"prompt": "Summarize the following in one sentence:\nMachine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.\nSummary:",
"expected_patterns": ["machine learning", "AI", "artificial intelligence", "learn", "data"],
"max_tokens": 50,
"temperature": 0.3,
"notes": "Tests summarization capability"
},
"translation": {
"id": "translation_001",
"category": "translation",
"prompt": "Translate to French: Hello, how are you?\nFrench:",
"expected_patterns": ["bonjour", "comment", "allez", "vous"],
"max_tokens": 30,
"temperature": 0.1,
"notes": "Basic translation test"
},
"sentiment": {
"id": "sentiment_001",
"category": "classification",
"prompt": "Classify the sentiment of this review as positive, negative, or neutral:\n\"This product is amazing! Best purchase I've ever made.\"\nSentiment:",
"expected_output": "positive",
"max_tokens": 10,
"temperature": 0.0,
"notes": "Sentiment classification"
},
"reasoning_chain": {
"id": "reasoning_001",
"category": "reasoning",
"prompt": "Question: If I have 3 apples and give away 1, how many do I have left?\nLet's think step by step:",
"expected_patterns": ["3", "1", "2", "subtract", "minus", "left", "remaining"],
"max_tokens": 100,
"temperature": 0.1,
"notes": "Chain-of-thought reasoning"
}
},
"edge_cases": {
"empty_prompt": {
"id": "edge_001",
"prompt": "",
"expected_behavior": "Should handle gracefully, may produce empty output or generic response",
"max_tokens": 20
},
"single_char": {
"id": "edge_002",
"prompt": "A",
"expected_behavior": "Should produce coherent completion",
"max_tokens": 30
},
"special_characters": {
"id": "edge_003",
"prompt": "Translate: \"Hello, world!\" ->",
"expected_behavior": "Should handle quotes and punctuation correctly",
"max_tokens": 30
},
"very_long_prompt": {
"id": "edge_004",
"prompt": "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. Continue:",
"expected_behavior": "Should handle long context without issues",
"max_tokens": 50
},
"unicode": {
"id": "edge_005",
"prompt": "Translate to English: \u4f60\u597d\u4e16\u754c",
"expected_patterns": ["hello", "world"],
"max_tokens": 20
},
"mixed_language": {
"id": "edge_006",
"prompt": "English and \u65e5\u672c\u8a9e mixed:",
"expected_behavior": "Should handle multilingual input",
"max_tokens": 50
},
"numbers": {
"id": "edge_007",
"prompt": "Continue the sequence: 1, 2, 3, 4,",
"expected_patterns": ["5", "6", "7"],
"max_tokens": 20
},
"repetitive": {
"id": "edge_008",
"prompt": "Hello hello hello hello hello",
"expected_behavior": "Should not amplify repetition excessively",
"max_tokens": 30
}
},
"stress_tests": {
"max_context": {
"id": "stress_001",
"description": "Test with maximum context length",
"prompt_length": 8192,
"max_tokens": 100,
"notes": "Generate prompt programmatically to fill context"
},
"long_generation": {
"id": "stress_002",
"description": "Generate many tokens",
"prompt": "Once upon a time",
"max_tokens": 2000,
"notes": "Test stability over long generation"
},
"rapid_requests": {
"id": "stress_003",
"description": "Many rapid sequential requests",
"num_requests": 100,
"prompt": "Hello",
"max_tokens": 10
}
}
}