git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
377 lines
11 KiB
Markdown
377 lines
11 KiB
Markdown
# ADR-012: LLM Provider Integration
|
|
|
|
## Status
|
|
Accepted (Implemented)
|
|
|
|
## Date
|
|
2026-01-27
|
|
|
|
## Context
|
|
|
|
RuvBot requires LLM capabilities for:
|
|
- Conversational AI responses
|
|
- Reasoning and analysis tasks
|
|
- Tool/function calling
|
|
- Streaming responses for real-time UX
|
|
|
|
The system needs to support multiple providers to:
|
|
- Allow cost optimization (use cheaper models for simple tasks)
|
|
- Provide fallback options
|
|
- Access specialized models (reasoning models like QwQ, O1, DeepSeek R1)
|
|
- Support both direct API access and unified gateways
|
|
|
|
## Decision
|
|
|
|
### Provider Architecture
|
|
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ RuvBot LLM Provider Layer │
|
|
├─────────────────────────────────────────────────────────────────┤
|
|
│ Provider Interface │
|
|
│ └─ LLMProvider (abstract interface) │
|
|
│ ├─ complete() - Single completion │
|
|
│ ├─ stream() - Streaming completion (AsyncGenerator) │
|
|
│ ├─ countTokens() - Token estimation │
|
|
│ ├─ getModel() - Model info │
|
|
│ └─ isHealthy() - Health check │
|
|
├─────────────────────────────────────────────────────────────────┤
|
|
│ Implementations │
|
|
│ ├─ AnthropicProvider : Direct Anthropic API │
|
|
│ │ └─ Claude 4, 3.5, 3 models │
|
|
│ └─ OpenRouterProvider : Multi-model gateway │
|
|
│ ├─ Qwen QwQ (reasoning) │
|
|
│ ├─ DeepSeek R1 (reasoning) │
|
|
│ ├─ Claude via OpenRouter │
|
|
│ ├─ GPT-4, O1 via OpenRouter │
|
|
│ └─ Gemini, Llama via OpenRouter │
|
|
├─────────────────────────────────────────────────────────────────┤
|
|
│ Features │
|
|
│ ├─ Tool/Function calling │
|
|
│ ├─ Streaming with token callbacks │
|
|
│ ├─ Automatic retry with backoff │
|
|
│ └─ Token counting │
|
|
└─────────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
### Implementation
|
|
|
|
Located in `/npm/packages/ruvbot/src/integration/providers/`:
|
|
- `index.ts` - Interface definitions and exports
|
|
- `AnthropicProvider.ts` - Anthropic Claude integration
|
|
- `OpenRouterProvider.ts` - OpenRouter multi-model gateway
|
|
|
|
### LLMProvider Interface
|
|
|
|
```typescript
|
|
interface LLMProvider {
|
|
complete(messages: Message[], options?: CompletionOptions): Promise<Completion>;
|
|
stream(messages: Message[], options?: StreamOptions): AsyncGenerator<Token, Completion, void>;
|
|
countTokens(text: string): Promise<number>;
|
|
getModel(): ModelInfo;
|
|
isHealthy(): Promise<boolean>;
|
|
}
|
|
|
|
interface Message {
|
|
role: 'user' | 'assistant' | 'system';
|
|
content: string;
|
|
}
|
|
|
|
interface CompletionOptions {
|
|
maxTokens?: number;
|
|
temperature?: number; // 0.0-2.0
|
|
topP?: number; // 0.0-1.0
|
|
stopSequences?: string[];
|
|
tools?: Tool[];
|
|
}
|
|
|
|
interface StreamOptions extends CompletionOptions {
|
|
onToken?: (token: string) => void;
|
|
}
|
|
|
|
interface Completion {
|
|
content: string;
|
|
finishReason: 'stop' | 'length' | 'tool_use';
|
|
usage: {
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
};
|
|
toolCalls?: ToolCall[];
|
|
}
|
|
|
|
interface Token {
|
|
type: 'text' | 'tool_use';
|
|
text?: string;
|
|
toolUse?: ToolCall;
|
|
}
|
|
```
|
|
|
|
### Tool/Function Calling
|
|
|
|
```typescript
|
|
interface Tool {
|
|
name: string;
|
|
description: string;
|
|
parameters: Record<string, unknown>; // JSON Schema
|
|
}
|
|
|
|
interface ToolCall {
|
|
id: string;
|
|
name: string;
|
|
input: Record<string, unknown>;
|
|
}
|
|
```
|
|
|
|
### AnthropicProvider
|
|
|
|
Direct integration with Anthropic's Claude API.
|
|
|
|
```typescript
|
|
interface AnthropicConfig {
|
|
apiKey: string;
|
|
baseUrl?: string; // default: 'https://api.anthropic.com'
|
|
model?: string; // default: 'claude-3-5-sonnet-20241022'
|
|
maxRetries?: number; // default: 3
|
|
timeout?: number; // default: 60000ms
|
|
}
|
|
|
|
type AnthropicModel =
|
|
| 'claude-opus-4-20250514'
|
|
| 'claude-sonnet-4-20250514'
|
|
| 'claude-3-5-sonnet-20241022'
|
|
| 'claude-3-5-haiku-20241022'
|
|
| 'claude-3-opus-20240229'
|
|
| 'claude-3-sonnet-20240229'
|
|
| 'claude-3-haiku-20240307';
|
|
```
|
|
|
|
**Model Specifications:**
|
|
|
|
| Model | Max Tokens | Context Window | Best For |
|
|
|-------|------------|----------------|----------|
|
|
| claude-opus-4-20250514 | 32,768 | 200,000 | Complex reasoning, analysis |
|
|
| claude-sonnet-4-20250514 | 16,384 | 200,000 | Balanced performance |
|
|
| claude-3-5-sonnet-20241022 | 8,192 | 200,000 | General purpose |
|
|
| claude-3-5-haiku-20241022 | 8,192 | 200,000 | Fast, cost-effective |
|
|
| claude-3-opus-20240229 | 4,096 | 200,000 | Complex tasks |
|
|
| claude-3-sonnet-20240229 | 4,096 | 200,000 | Balanced |
|
|
| claude-3-haiku-20240307 | 4,096 | 200,000 | Fast responses |
|
|
|
|
**Usage:**
|
|
|
|
```typescript
|
|
import { createAnthropicProvider } from './integration/providers';
|
|
|
|
const provider = createAnthropicProvider({
|
|
apiKey: process.env.ANTHROPIC_API_KEY!,
|
|
model: 'claude-3-5-sonnet-20241022',
|
|
});
|
|
|
|
// Simple completion
|
|
const response = await provider.complete([
|
|
{ role: 'user', content: 'Hello!' }
|
|
]);
|
|
|
|
// Streaming
|
|
for await (const token of provider.stream(messages)) {
|
|
if (token.type === 'text') {
|
|
process.stdout.write(token.text!);
|
|
}
|
|
}
|
|
|
|
// With tools
|
|
const toolResponse = await provider.complete(messages, {
|
|
tools: [{
|
|
name: 'get_weather',
|
|
description: 'Get weather for a location',
|
|
parameters: {
|
|
type: 'object',
|
|
properties: {
|
|
location: { type: 'string' }
|
|
}
|
|
}
|
|
}]
|
|
});
|
|
```
|
|
|
|
### OpenRouterProvider
|
|
|
|
Access to 100+ models through OpenRouter's unified API.
|
|
|
|
```typescript
|
|
interface OpenRouterConfig {
|
|
apiKey: string;
|
|
baseUrl?: string; // default: 'https://openrouter.ai/api'
|
|
model?: string; // default: 'qwen/qwq-32b'
|
|
siteUrl?: string; // For attribution
|
|
siteName?: string; // default: 'RuvBot'
|
|
maxRetries?: number; // default: 3
|
|
timeout?: number; // default: 120000ms (longer for reasoning)
|
|
}
|
|
|
|
type OpenRouterModel =
|
|
// Reasoning Models
|
|
| 'qwen/qwq-32b'
|
|
| 'qwen/qwq-32b:free'
|
|
| 'openai/o1-preview'
|
|
| 'openai/o1-mini'
|
|
| 'deepseek/deepseek-r1'
|
|
// Standard Models
|
|
| 'anthropic/claude-3.5-sonnet'
|
|
| 'openai/gpt-4o'
|
|
| 'google/gemini-pro-1.5'
|
|
| 'meta-llama/llama-3.1-405b-instruct'
|
|
| string; // Any OpenRouter model
|
|
```
|
|
|
|
**Reasoning Model Specifications:**
|
|
|
|
| Model | Max Tokens | Context | Special Features |
|
|
|-------|------------|---------|------------------|
|
|
| qwen/qwq-32b | 16,384 | 32,768 | Chain-of-thought reasoning |
|
|
| qwen/qwq-32b:free | 16,384 | 32,768 | Free tier available |
|
|
| openai/o1-preview | 32,768 | 128,000 | Advanced reasoning |
|
|
| openai/o1-mini | 65,536 | 128,000 | Faster reasoning |
|
|
| deepseek/deepseek-r1 | 8,192 | 64,000 | Open-source reasoning |
|
|
|
|
**Usage:**
|
|
|
|
```typescript
|
|
import {
|
|
createOpenRouterProvider,
|
|
createQwQProvider,
|
|
createDeepSeekR1Provider
|
|
} from './integration/providers';
|
|
|
|
// General OpenRouter
|
|
const provider = createOpenRouterProvider({
|
|
apiKey: process.env.OPENROUTER_API_KEY!,
|
|
model: 'qwen/qwq-32b',
|
|
});
|
|
|
|
// Convenience: QwQ reasoning model
|
|
const qwq = createQwQProvider(process.env.OPENROUTER_API_KEY!, false);
|
|
|
|
// Convenience: Free QwQ
|
|
const qwqFree = createQwQProvider(process.env.OPENROUTER_API_KEY!, true);
|
|
|
|
// Convenience: DeepSeek R1
|
|
const deepseek = createDeepSeekR1Provider(process.env.OPENROUTER_API_KEY!);
|
|
|
|
// List available models
|
|
const models = await provider.listModels();
|
|
```
|
|
|
|
### Configuration Options
|
|
|
|
**Environment Variables:**
|
|
|
|
```bash
|
|
# Anthropic
|
|
ANTHROPIC_API_KEY=sk-ant-...
|
|
|
|
# OpenRouter
|
|
OPENROUTER_API_KEY=sk-or-...
|
|
```
|
|
|
|
**Rate Limiting:**
|
|
- Both providers use native fetch with `AbortSignal.timeout()`
|
|
- Anthropic: 60s default timeout
|
|
- OpenRouter: 120s default timeout (for reasoning models)
|
|
|
|
**Retry Strategy:**
|
|
- Default: 3 retries
|
|
- Backoff: Not implemented in base (use with retry libraries)
|
|
|
|
### Performance Benchmarks
|
|
|
|
| Operation | Anthropic | OpenRouter |
|
|
|-----------|-----------|------------|
|
|
| Cold start | ~500ms | ~800ms |
|
|
| Token latency (first) | ~200ms | ~300ms |
|
|
| Throughput (tokens/s) | ~50 | ~40 |
|
|
| Tool call parsing | <10ms | <10ms |
|
|
|
|
### Error Handling
|
|
|
|
```typescript
|
|
try {
|
|
const response = await provider.complete(messages);
|
|
} catch (error) {
|
|
if (error.message.includes('API error: 429')) {
|
|
// Rate limited - implement backoff
|
|
} else if (error.message.includes('API error: 401')) {
|
|
// Invalid API key
|
|
} else if (error.message.includes('timeout')) {
|
|
// Request timed out
|
|
}
|
|
}
|
|
```
|
|
|
|
### Usage Patterns
|
|
|
|
**Model Routing by Task Complexity:**
|
|
|
|
```typescript
|
|
function selectProvider(taskComplexity: 'simple' | 'medium' | 'complex' | 'reasoning') {
|
|
switch (taskComplexity) {
|
|
case 'simple':
|
|
return createAnthropicProvider({ apiKey, model: 'claude-3-5-haiku-20241022' });
|
|
case 'medium':
|
|
return createAnthropicProvider({ apiKey, model: 'claude-3-5-sonnet-20241022' });
|
|
case 'complex':
|
|
return createAnthropicProvider({ apiKey, model: 'claude-opus-4-20250514' });
|
|
case 'reasoning':
|
|
return createQwQProvider(openRouterApiKey);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Fallback Chain:**
|
|
|
|
```typescript
|
|
async function completeWithFallback(messages: Message[]) {
|
|
const providers = [
|
|
createAnthropicProvider({ apiKey, model: 'claude-3-5-sonnet-20241022' }),
|
|
createOpenRouterProvider({ apiKey: orKey, model: 'anthropic/claude-3.5-sonnet' }),
|
|
createQwQProvider(orKey, true), // Free fallback
|
|
];
|
|
|
|
for (const provider of providers) {
|
|
try {
|
|
if (await provider.isHealthy()) {
|
|
return await provider.complete(messages);
|
|
}
|
|
} catch (error) {
|
|
console.warn(`Provider failed, trying next:`, error);
|
|
}
|
|
}
|
|
throw new Error('All providers failed');
|
|
}
|
|
```
|
|
|
|
## Consequences
|
|
|
|
### Positive
|
|
- Unified interface for multiple LLM providers
|
|
- Access to 100+ models through OpenRouter
|
|
- Native streaming support with token callbacks
|
|
- Tool/function calling support
|
|
- Easy provider switching for cost optimization
|
|
|
|
### Negative
|
|
- Token counting is approximate (not tiktoken-based)
|
|
- No built-in retry with exponential backoff
|
|
- System messages handled differently by providers
|
|
|
|
### Trade-offs
|
|
- OpenRouter adds latency vs direct API calls
|
|
- Reasoning models (QwQ, O1) have longer timeouts
|
|
- Free tiers have rate limits and quotas
|
|
|
|
### RuvBot Advantages
|
|
- Multi-provider support vs single provider
|
|
- Reasoning model access (QwQ, DeepSeek R1, O1)
|
|
- Factory functions for common configurations
|
|
- Streaming with async generators
|