Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
568
vendor/ruvector/npm/packages/cloud-run/streaming-service.ts
vendored
Normal file
568
vendor/ruvector/npm/packages/cloud-run/streaming-service.ts
vendored
Normal file
@@ -0,0 +1,568 @@
|
||||
/**
|
||||
* Cloud Run Streaming Service - Main Entry Point
|
||||
*
|
||||
* High-performance HTTP/2 + WebSocket server for massive concurrent connections.
|
||||
* Optimized for 500M concurrent learning streams with adaptive scaling.
|
||||
*/
|
||||
|
||||
import Fastify, { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import fastifyWebsocket from '@fastify/websocket';
|
||||
import fastifyCompress from '@fastify/compress';
|
||||
import fastifyHelmet from '@fastify/helmet';
|
||||
import fastifyRateLimit from '@fastify/rate-limit';
|
||||
import { WebSocket } from 'ws';
|
||||
import { VectorClient } from './vector-client';
|
||||
import { LoadBalancer } from './load-balancer';
|
||||
import { trace, context, SpanStatusCode } from '@opentelemetry/api';
|
||||
import { register as metricsRegister, Counter, Histogram, Gauge } from 'prom-client';
|
||||
|
||||
// Environment configuration
|
||||
const CONFIG = {
|
||||
port: parseInt(process.env.PORT || '8080', 10),
|
||||
host: process.env.HOST || '0.0.0.0',
|
||||
nodeEnv: process.env.NODE_ENV || 'production',
|
||||
maxConnections: parseInt(process.env.MAX_CONNECTIONS || '100000', 10),
|
||||
requestTimeout: parseInt(process.env.REQUEST_TIMEOUT || '30000', 10),
|
||||
keepAliveTimeout: parseInt(process.env.KEEP_ALIVE_TIMEOUT || '65000', 10),
|
||||
headersTimeout: parseInt(process.env.HEADERS_TIMEOUT || '66000', 10),
|
||||
maxRequestsPerSocket: parseInt(process.env.MAX_REQUESTS_PER_SOCKET || '1000', 10),
|
||||
ruvectorHost: process.env.RUVECTOR_HOST || 'localhost:50051',
|
||||
enableTracing: process.env.ENABLE_TRACING === 'true',
|
||||
enableMetrics: process.env.ENABLE_METRICS !== 'false',
|
||||
gracefulShutdownTimeout: parseInt(process.env.GRACEFUL_SHUTDOWN_TIMEOUT || '10000', 10),
|
||||
};
|
||||
|
||||
// Prometheus metrics
|
||||
const metrics = {
|
||||
httpRequests: new Counter({
|
||||
name: 'http_requests_total',
|
||||
help: 'Total number of HTTP requests',
|
||||
labelNames: ['method', 'path', 'status_code'],
|
||||
}),
|
||||
httpDuration: new Histogram({
|
||||
name: 'http_request_duration_seconds',
|
||||
help: 'HTTP request duration in seconds',
|
||||
labelNames: ['method', 'path', 'status_code'],
|
||||
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2.5, 5, 10],
|
||||
}),
|
||||
activeConnections: new Gauge({
|
||||
name: 'active_connections',
|
||||
help: 'Number of active connections',
|
||||
labelNames: ['type'],
|
||||
}),
|
||||
streamingQueries: new Counter({
|
||||
name: 'streaming_queries_total',
|
||||
help: 'Total number of streaming queries',
|
||||
labelNames: ['protocol', 'status'],
|
||||
}),
|
||||
vectorOperations: new Histogram({
|
||||
name: 'vector_operations_duration_seconds',
|
||||
help: 'Vector operation duration in seconds',
|
||||
labelNames: ['operation', 'status'],
|
||||
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1],
|
||||
}),
|
||||
batchSize: new Histogram({
|
||||
name: 'batch_size',
|
||||
help: 'Size of batched requests',
|
||||
buckets: [1, 5, 10, 25, 50, 100, 250, 500],
|
||||
}),
|
||||
};
|
||||
|
||||
// Tracer
|
||||
const tracer = trace.getTracer('streaming-service', '1.0.0');
|
||||
|
||||
// Connection manager
|
||||
class ConnectionManager {
|
||||
private httpConnections = new Set<FastifyReply>();
|
||||
private wsConnections = new Set<WebSocket>();
|
||||
private batchQueue: Map<string, Array<{ query: any; callback: Function }>> = new Map();
|
||||
private batchTimer: NodeJS.Timeout | null = null;
|
||||
private readonly BATCH_INTERVAL = 10; // 10ms batching window
|
||||
private readonly MAX_BATCH_SIZE = 100;
|
||||
|
||||
constructor(
|
||||
private vectorClient: VectorClient,
|
||||
private loadBalancer: LoadBalancer
|
||||
) {}
|
||||
|
||||
// HTTP connection tracking
|
||||
registerHttpConnection(reply: FastifyReply): void {
|
||||
this.httpConnections.add(reply);
|
||||
metrics.activeConnections.inc({ type: 'http' });
|
||||
}
|
||||
|
||||
unregisterHttpConnection(reply: FastifyReply): void {
|
||||
this.httpConnections.delete(reply);
|
||||
metrics.activeConnections.dec({ type: 'http' });
|
||||
}
|
||||
|
||||
// WebSocket connection tracking
|
||||
registerWsConnection(ws: WebSocket): void {
|
||||
this.wsConnections.add(ws);
|
||||
metrics.activeConnections.inc({ type: 'websocket' });
|
||||
|
||||
ws.on('close', () => {
|
||||
this.unregisterWsConnection(ws);
|
||||
});
|
||||
}
|
||||
|
||||
unregisterWsConnection(ws: WebSocket): void {
|
||||
this.wsConnections.delete(ws);
|
||||
metrics.activeConnections.dec({ type: 'websocket' });
|
||||
}
|
||||
|
||||
// Request batching for efficiency
|
||||
async batchQuery(query: any): Promise<any> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const batchKey = this.getBatchKey(query);
|
||||
|
||||
if (!this.batchQueue.has(batchKey)) {
|
||||
this.batchQueue.set(batchKey, []);
|
||||
}
|
||||
|
||||
const batch = this.batchQueue.get(batchKey)!;
|
||||
batch.push({ query, callback: (err: Error | null, result: any) => {
|
||||
if (err) reject(err);
|
||||
else resolve(result);
|
||||
}});
|
||||
|
||||
metrics.batchSize.observe(batch.length);
|
||||
|
||||
// Process batch when full or after timeout
|
||||
if (batch.length >= this.MAX_BATCH_SIZE) {
|
||||
this.processBatch(batchKey);
|
||||
} else if (!this.batchTimer) {
|
||||
this.batchTimer = setTimeout(() => {
|
||||
this.processAllBatches();
|
||||
}, this.BATCH_INTERVAL);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private getBatchKey(query: any): string {
|
||||
// Group similar queries for batching
|
||||
return `${query.collection || 'default'}_${query.operation || 'search'}`;
|
||||
}
|
||||
|
||||
private async processBatch(batchKey: string): Promise<void> {
|
||||
const batch = this.batchQueue.get(batchKey);
|
||||
if (!batch || batch.length === 0) return;
|
||||
|
||||
this.batchQueue.delete(batchKey);
|
||||
|
||||
const span = tracer.startSpan('process-batch', {
|
||||
attributes: { batchKey, batchSize: batch.length },
|
||||
});
|
||||
|
||||
try {
|
||||
const queries = batch.map(item => item.query);
|
||||
const results = await this.vectorClient.batchQuery(queries);
|
||||
|
||||
results.forEach((result, index) => {
|
||||
batch[index].callback(null, result);
|
||||
});
|
||||
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
} catch (error) {
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error).message });
|
||||
batch.forEach(item => item.callback(error, null));
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private async processAllBatches(): Promise<void> {
|
||||
this.batchTimer = null;
|
||||
const batchKeys = Array.from(this.batchQueue.keys());
|
||||
await Promise.all(batchKeys.map(key => this.processBatch(key)));
|
||||
}
|
||||
|
||||
// Graceful shutdown
|
||||
async shutdown(): Promise<void> {
|
||||
console.log('Starting graceful shutdown...');
|
||||
|
||||
// Stop accepting new connections
|
||||
this.httpConnections.forEach(reply => {
|
||||
if (!reply.sent) {
|
||||
reply.code(503).send({ error: 'Service shutting down' });
|
||||
}
|
||||
});
|
||||
|
||||
// Close WebSocket connections gracefully
|
||||
this.wsConnections.forEach(ws => {
|
||||
if (ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify({ type: 'shutdown', message: 'Service shutting down' }));
|
||||
ws.close(1001, 'Service shutting down');
|
||||
}
|
||||
});
|
||||
|
||||
// Process remaining batches
|
||||
await this.processAllBatches();
|
||||
|
||||
console.log(`Closed ${this.httpConnections.size} HTTP and ${this.wsConnections.size} WebSocket connections`);
|
||||
}
|
||||
|
||||
getStats() {
|
||||
return {
|
||||
httpConnections: this.httpConnections.size,
|
||||
wsConnections: this.wsConnections.size,
|
||||
pendingBatches: this.batchQueue.size,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Main application setup
|
||||
export class StreamingService {
|
||||
private app: FastifyInstance;
|
||||
private vectorClient: VectorClient;
|
||||
private loadBalancer: LoadBalancer;
|
||||
private connectionManager: ConnectionManager;
|
||||
private isShuttingDown = false;
|
||||
|
||||
constructor() {
|
||||
this.app = Fastify({
|
||||
logger: {
|
||||
level: CONFIG.nodeEnv === 'production' ? 'info' : 'debug',
|
||||
serializers: {
|
||||
req(request) {
|
||||
return {
|
||||
method: request.method,
|
||||
url: request.url,
|
||||
headers: request.headers,
|
||||
remoteAddress: request.ip,
|
||||
};
|
||||
},
|
||||
},
|
||||
},
|
||||
trustProxy: true,
|
||||
http2: true,
|
||||
connectionTimeout: CONFIG.requestTimeout,
|
||||
keepAliveTimeout: CONFIG.keepAliveTimeout,
|
||||
requestIdHeader: 'x-request-id',
|
||||
requestIdLogLabel: 'requestId',
|
||||
});
|
||||
|
||||
this.vectorClient = new VectorClient({
|
||||
host: CONFIG.ruvectorHost,
|
||||
maxConnections: 100,
|
||||
enableMetrics: CONFIG.enableMetrics,
|
||||
});
|
||||
|
||||
this.loadBalancer = new LoadBalancer({
|
||||
maxRequestsPerSecond: 10000,
|
||||
circuitBreakerThreshold: 0.5,
|
||||
circuitBreakerTimeout: 30000,
|
||||
});
|
||||
|
||||
this.connectionManager = new ConnectionManager(this.vectorClient, this.loadBalancer);
|
||||
|
||||
this.setupMiddleware();
|
||||
this.setupRoutes();
|
||||
this.setupShutdownHandlers();
|
||||
}
|
||||
|
||||
private setupMiddleware(): void {
|
||||
// Security headers
|
||||
this.app.register(fastifyHelmet, {
|
||||
contentSecurityPolicy: false,
|
||||
});
|
||||
|
||||
// Compression
|
||||
this.app.register(fastifyCompress, {
|
||||
global: true,
|
||||
encodings: ['gzip', 'deflate', 'br'],
|
||||
});
|
||||
|
||||
// Rate limiting
|
||||
this.app.register(fastifyRateLimit, {
|
||||
max: 1000,
|
||||
timeWindow: '1 minute',
|
||||
cache: 10000,
|
||||
allowList: ['127.0.0.1'],
|
||||
redis: process.env.REDIS_URL ? { url: process.env.REDIS_URL } : undefined,
|
||||
});
|
||||
|
||||
// WebSocket support
|
||||
this.app.register(fastifyWebsocket, {
|
||||
options: {
|
||||
maxPayload: 1024 * 1024, // 1MB
|
||||
perMessageDeflate: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Request tracking
|
||||
this.app.addHook('onRequest', async (request, reply) => {
|
||||
const startTime = Date.now();
|
||||
reply.raw.on('finish', () => {
|
||||
const duration = (Date.now() - startTime) / 1000;
|
||||
const labels = {
|
||||
method: request.method,
|
||||
path: request.routerPath || request.url,
|
||||
status_code: reply.statusCode.toString(),
|
||||
};
|
||||
metrics.httpRequests.inc(labels);
|
||||
metrics.httpDuration.observe(labels, duration);
|
||||
});
|
||||
});
|
||||
|
||||
// Shutdown check
|
||||
this.app.addHook('onRequest', async (request, reply) => {
|
||||
if (this.isShuttingDown) {
|
||||
reply.code(503).send({ error: 'Service shutting down' });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private setupRoutes(): void {
|
||||
// Health check endpoint
|
||||
this.app.get('/health', async (request, reply) => {
|
||||
const isHealthy = await this.vectorClient.healthCheck();
|
||||
const stats = this.connectionManager.getStats();
|
||||
|
||||
if (isHealthy) {
|
||||
return {
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
connections: stats,
|
||||
version: process.env.SERVICE_VERSION || '1.0.0',
|
||||
};
|
||||
} else {
|
||||
reply.code(503);
|
||||
return {
|
||||
status: 'unhealthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
error: 'Vector client unhealthy',
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
// Readiness check
|
||||
this.app.get('/ready', async (request, reply) => {
|
||||
if (this.isShuttingDown) {
|
||||
reply.code(503);
|
||||
return { status: 'not ready', reason: 'shutting down' };
|
||||
}
|
||||
|
||||
const stats = this.connectionManager.getStats();
|
||||
if (stats.httpConnections + stats.wsConnections >= CONFIG.maxConnections) {
|
||||
reply.code(503);
|
||||
return { status: 'not ready', reason: 'max connections reached' };
|
||||
}
|
||||
|
||||
return { status: 'ready', connections: stats };
|
||||
});
|
||||
|
||||
// Metrics endpoint
|
||||
this.app.get('/metrics', async (request, reply) => {
|
||||
reply.type('text/plain');
|
||||
return metricsRegister.metrics();
|
||||
});
|
||||
|
||||
// SSE streaming endpoint
|
||||
this.app.get('/stream/sse/:collection', async (request, reply) => {
|
||||
const { collection } = request.params as { collection: string };
|
||||
const query = request.query as any;
|
||||
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'X-Accel-Buffering': 'no', // Disable nginx buffering
|
||||
});
|
||||
|
||||
this.connectionManager.registerHttpConnection(reply);
|
||||
|
||||
const span = tracer.startSpan('sse-stream', {
|
||||
attributes: { collection, queryType: query.type || 'search' },
|
||||
});
|
||||
|
||||
try {
|
||||
// Heartbeat to keep connection alive
|
||||
const heartbeat = setInterval(() => {
|
||||
if (!reply.raw.destroyed) {
|
||||
reply.raw.write(': heartbeat\n\n');
|
||||
} else {
|
||||
clearInterval(heartbeat);
|
||||
}
|
||||
}, 30000);
|
||||
|
||||
// Stream results
|
||||
await this.vectorClient.streamQuery(collection, query, (chunk) => {
|
||||
if (!reply.raw.destroyed) {
|
||||
const data = JSON.stringify(chunk);
|
||||
reply.raw.write(`data: ${data}\n\n`);
|
||||
}
|
||||
});
|
||||
|
||||
clearInterval(heartbeat);
|
||||
reply.raw.write('event: done\ndata: {}\n\n');
|
||||
reply.raw.end();
|
||||
|
||||
metrics.streamingQueries.inc({ protocol: 'sse', status: 'success' });
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
} catch (error) {
|
||||
this.app.log.error({ error, collection }, 'SSE stream error');
|
||||
metrics.streamingQueries.inc({ protocol: 'sse', status: 'error' });
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error).message });
|
||||
reply.raw.end();
|
||||
} finally {
|
||||
this.connectionManager.unregisterHttpConnection(reply);
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
|
||||
// WebSocket streaming endpoint
|
||||
this.app.get('/stream/ws/:collection', { websocket: true }, (connection, request) => {
|
||||
const { collection } = request.params as { collection: string };
|
||||
const ws = connection.socket;
|
||||
|
||||
this.connectionManager.registerWsConnection(ws);
|
||||
|
||||
const span = tracer.startSpan('websocket-stream', {
|
||||
attributes: { collection },
|
||||
});
|
||||
|
||||
ws.on('message', async (message) => {
|
||||
try {
|
||||
const query = JSON.parse(message.toString());
|
||||
|
||||
if (query.type === 'ping') {
|
||||
ws.send(JSON.stringify({ type: 'pong', timestamp: Date.now() }));
|
||||
return;
|
||||
}
|
||||
|
||||
// Route through load balancer
|
||||
const routed = await this.loadBalancer.route(collection, query);
|
||||
if (!routed) {
|
||||
ws.send(JSON.stringify({ type: 'error', error: 'Load balancer rejected request' }));
|
||||
return;
|
||||
}
|
||||
|
||||
// Stream results
|
||||
await this.vectorClient.streamQuery(collection, query, (chunk) => {
|
||||
if (ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify({ type: 'data', data: chunk }));
|
||||
}
|
||||
});
|
||||
|
||||
ws.send(JSON.stringify({ type: 'done' }));
|
||||
metrics.streamingQueries.inc({ protocol: 'websocket', status: 'success' });
|
||||
} catch (error) {
|
||||
this.app.log.error({ error, collection }, 'WebSocket message error');
|
||||
ws.send(JSON.stringify({ type: 'error', error: (error as Error).message }));
|
||||
metrics.streamingQueries.inc({ protocol: 'websocket', status: 'error' });
|
||||
}
|
||||
});
|
||||
|
||||
ws.on('error', (error) => {
|
||||
this.app.log.error({ error }, 'WebSocket error');
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
|
||||
});
|
||||
|
||||
ws.on('close', () => {
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
|
||||
// Batch query endpoint
|
||||
this.app.post('/query/batch', async (request, reply) => {
|
||||
const { queries } = request.body as { queries: any[] };
|
||||
|
||||
if (!Array.isArray(queries) || queries.length === 0) {
|
||||
reply.code(400);
|
||||
return { error: 'queries must be a non-empty array' };
|
||||
}
|
||||
|
||||
const span = tracer.startSpan('batch-query', {
|
||||
attributes: { queryCount: queries.length },
|
||||
});
|
||||
|
||||
try {
|
||||
const results = await Promise.all(
|
||||
queries.map(query => this.connectionManager.batchQuery(query))
|
||||
);
|
||||
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
return { results };
|
||||
} catch (error) {
|
||||
this.app.log.error({ error }, 'Batch query error');
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error).message });
|
||||
reply.code(500);
|
||||
return { error: (error as Error).message };
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
|
||||
// Single query endpoint
|
||||
this.app.post('/query/:collection', async (request, reply) => {
|
||||
const { collection } = request.params as { collection: string };
|
||||
const query = request.body as any;
|
||||
|
||||
const span = tracer.startSpan('single-query', {
|
||||
attributes: { collection, queryType: query.type || 'search' },
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await this.connectionManager.batchQuery({ collection, ...query });
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
return result;
|
||||
} catch (error) {
|
||||
this.app.log.error({ error, collection }, 'Query error');
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error).message });
|
||||
reply.code(500);
|
||||
return { error: (error as Error).message };
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private setupShutdownHandlers(): void {
|
||||
const shutdown = async (signal: string) => {
|
||||
console.log(`Received ${signal}, starting graceful shutdown...`);
|
||||
this.isShuttingDown = true;
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
console.error('Graceful shutdown timeout, forcing exit');
|
||||
process.exit(1);
|
||||
}, CONFIG.gracefulShutdownTimeout);
|
||||
|
||||
try {
|
||||
await this.connectionManager.shutdown();
|
||||
await this.vectorClient.close();
|
||||
await this.app.close();
|
||||
clearTimeout(timeout);
|
||||
console.log('Graceful shutdown completed');
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
console.error('Error during shutdown:', error);
|
||||
clearTimeout(timeout);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
||||
process.on('SIGINT', () => shutdown('SIGINT'));
|
||||
}
|
||||
|
||||
async start(): Promise<void> {
|
||||
try {
|
||||
await this.vectorClient.initialize();
|
||||
await this.app.listen({ port: CONFIG.port, host: CONFIG.host });
|
||||
console.log(`Streaming service running on ${CONFIG.host}:${CONFIG.port}`);
|
||||
console.log(`Environment: ${CONFIG.nodeEnv}`);
|
||||
console.log(`Max connections: ${CONFIG.maxConnections}`);
|
||||
} catch (error) {
|
||||
this.app.log.error(error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start service if run directly
|
||||
if (require.main === module) {
|
||||
const service = new StreamingService();
|
||||
service.start();
|
||||
}
|
||||
Reference in New Issue
Block a user