# RuvLLM Example Configuration # Copy this file to ruvllm.toml and customize [system] device_class = "server" # edge, mobile, server, gpu max_memory_mb = 8192 max_concurrent_requests = 10 data_dir = "./data" [embedding] dimension = 768 # Embedding vector size max_tokens = 512 # Max tokens per input batch_size = 8 # Batch size for embedding [memory] db_path = "./data/memory.db" hnsw_m = 16 # Connections per node hnsw_ef_construction = 100 # Build quality hnsw_ef_search = 64 # Search quality max_nodes = 1000000 # Max memory nodes writeback_batch_size = 100 # Batch size for writes writeback_interval_ms = 1000 # Write interval [router] input_dim = 128 # Input feature dimension hidden_dim = 64 # Hidden state size sparsity = 0.9 # Weight matrix sparsity rank = 8 # Low-rank decomposition rank confidence_threshold = 0.7 # Fallback threshold [inference] models = ["tiny", "small", "medium", "large"] quantization = "q4" # Quantization type max_context = 8192 # Max context length max_loaded_models = 2 # Max concurrent models kv_cache_size = 1024 # KV cache entries [learning] enabled = true # Enable self-learning quality_threshold = 0.7 # Min quality for writeback replay_capacity = 10000 # Replay buffer size batch_size = 32 # Training batch size learning_rate = 0.001 # Learning rate ewc_lambda = 0.4 # EWC regularization training_interval_ms = 3600000 # Training interval (1 hour) min_samples = 100 # Min samples before training