Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
675
vendor/ruvector/examples/edge-net/pkg/monitor.js
vendored
Normal file
675
vendor/ruvector/examples/edge-net/pkg/monitor.js
vendored
Normal file
@@ -0,0 +1,675 @@
|
||||
/**
|
||||
* @ruvector/edge-net Monitoring and Metrics System
|
||||
*
|
||||
* Real-time monitoring for distributed compute network:
|
||||
* - System metrics collection
|
||||
* - Network health monitoring
|
||||
* - Performance tracking
|
||||
* - Alert system
|
||||
* - Metrics aggregation
|
||||
*
|
||||
* @module @ruvector/edge-net/monitor
|
||||
*/
|
||||
|
||||
import { EventEmitter } from 'events';
|
||||
import { randomBytes } from 'crypto';
|
||||
import { cpus, totalmem, freemem, loadavg } from 'os';
|
||||
|
||||
// ============================================
|
||||
// METRICS COLLECTOR
|
||||
// ============================================
|
||||
|
||||
/**
|
||||
* Time-series metrics storage
|
||||
*/
|
||||
class MetricsSeries {
|
||||
constructor(options = {}) {
|
||||
this.name = options.name;
|
||||
this.maxPoints = options.maxPoints || 1000;
|
||||
this.points = [];
|
||||
}
|
||||
|
||||
add(value, timestamp = Date.now()) {
|
||||
this.points.push({ value, timestamp });
|
||||
|
||||
// Prune old points
|
||||
if (this.points.length > this.maxPoints) {
|
||||
this.points = this.points.slice(-this.maxPoints);
|
||||
}
|
||||
}
|
||||
|
||||
latest() {
|
||||
return this.points.length > 0 ? this.points[this.points.length - 1] : null;
|
||||
}
|
||||
|
||||
avg(duration = 60000) {
|
||||
const cutoff = Date.now() - duration;
|
||||
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
||||
if (recent.length === 0) return 0;
|
||||
return recent.reduce((sum, p) => sum + p.value, 0) / recent.length;
|
||||
}
|
||||
|
||||
min(duration = 60000) {
|
||||
const cutoff = Date.now() - duration;
|
||||
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
||||
if (recent.length === 0) return 0;
|
||||
return Math.min(...recent.map(p => p.value));
|
||||
}
|
||||
|
||||
max(duration = 60000) {
|
||||
const cutoff = Date.now() - duration;
|
||||
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
||||
if (recent.length === 0) return 0;
|
||||
return Math.max(...recent.map(p => p.value));
|
||||
}
|
||||
|
||||
rate(duration = 60000) {
|
||||
const cutoff = Date.now() - duration;
|
||||
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
||||
if (recent.length < 2) return 0;
|
||||
|
||||
const first = recent[0];
|
||||
const last = recent[recent.length - 1];
|
||||
const timeDiff = (last.timestamp - first.timestamp) / 1000;
|
||||
|
||||
return timeDiff > 0 ? (last.value - first.value) / timeDiff : 0;
|
||||
}
|
||||
|
||||
percentile(p, duration = 60000) {
|
||||
const cutoff = Date.now() - duration;
|
||||
const recent = this.points.filter(pt => pt.timestamp >= cutoff);
|
||||
if (recent.length === 0) return 0;
|
||||
|
||||
const sorted = recent.map(pt => pt.value).sort((a, b) => a - b);
|
||||
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
||||
return sorted[Math.max(0, index)];
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
count: this.points.length,
|
||||
latest: this.latest(),
|
||||
avg: this.avg(),
|
||||
min: this.min(),
|
||||
max: this.max(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Counter metric (monotonically increasing)
|
||||
*/
|
||||
class Counter {
|
||||
constructor(name) {
|
||||
this.name = name;
|
||||
this.value = 0;
|
||||
this.lastReset = Date.now();
|
||||
}
|
||||
|
||||
inc(amount = 1) {
|
||||
this.value += amount;
|
||||
}
|
||||
|
||||
get() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
reset() {
|
||||
this.value = 0;
|
||||
this.lastReset = Date.now();
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
value: this.value,
|
||||
lastReset: this.lastReset,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gauge metric (can go up and down)
|
||||
*/
|
||||
class Gauge {
|
||||
constructor(name) {
|
||||
this.name = name;
|
||||
this.value = 0;
|
||||
}
|
||||
|
||||
set(value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
inc(amount = 1) {
|
||||
this.value += amount;
|
||||
}
|
||||
|
||||
dec(amount = 1) {
|
||||
this.value -= amount;
|
||||
}
|
||||
|
||||
get() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
value: this.value,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Histogram metric
|
||||
*/
|
||||
class Histogram {
|
||||
constructor(name, buckets = [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]) {
|
||||
this.name = name;
|
||||
this.buckets = buckets.sort((a, b) => a - b);
|
||||
this.counts = new Map(buckets.map(b => [b, 0]));
|
||||
this.counts.set(Infinity, 0);
|
||||
this.sum = 0;
|
||||
this.count = 0;
|
||||
}
|
||||
|
||||
observe(value) {
|
||||
this.sum += value;
|
||||
this.count++;
|
||||
|
||||
for (const bucket of this.buckets) {
|
||||
if (value <= bucket) {
|
||||
this.counts.set(bucket, this.counts.get(bucket) + 1);
|
||||
}
|
||||
}
|
||||
this.counts.set(Infinity, this.counts.get(Infinity) + 1);
|
||||
}
|
||||
|
||||
avg() {
|
||||
return this.count > 0 ? this.sum / this.count : 0;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
name: this.name,
|
||||
count: this.count,
|
||||
sum: this.sum,
|
||||
avg: this.avg(),
|
||||
buckets: Object.fromEntries(this.counts),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// SYSTEM MONITOR
|
||||
// ============================================
|
||||
|
||||
/**
|
||||
* System resource monitor
|
||||
*/
|
||||
export class SystemMonitor extends EventEmitter {
|
||||
constructor(options = {}) {
|
||||
super();
|
||||
this.interval = options.interval || 5000;
|
||||
this.timer = null;
|
||||
|
||||
// Metrics
|
||||
this.cpu = new MetricsSeries({ name: 'cpu_usage' });
|
||||
this.memory = new MetricsSeries({ name: 'memory_usage' });
|
||||
this.load = new MetricsSeries({ name: 'load_avg' });
|
||||
}
|
||||
|
||||
start() {
|
||||
this.collect();
|
||||
this.timer = setInterval(() => this.collect(), this.interval);
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (this.timer) {
|
||||
clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
collect() {
|
||||
// CPU usage (simplified - percentage of load vs cores)
|
||||
const load = loadavg()[0];
|
||||
const cores = cpus().length;
|
||||
const cpuUsage = Math.min(100, (load / cores) * 100);
|
||||
this.cpu.add(cpuUsage);
|
||||
|
||||
// Memory usage
|
||||
const total = totalmem();
|
||||
const free = freemem();
|
||||
const memUsage = ((total - free) / total) * 100;
|
||||
this.memory.add(memUsage);
|
||||
|
||||
// Load average
|
||||
this.load.add(load);
|
||||
|
||||
this.emit('metrics', this.getMetrics());
|
||||
}
|
||||
|
||||
getMetrics() {
|
||||
return {
|
||||
timestamp: Date.now(),
|
||||
cpu: {
|
||||
usage: this.cpu.latest()?.value || 0,
|
||||
avg1m: this.cpu.avg(60000),
|
||||
avg5m: this.cpu.avg(300000),
|
||||
},
|
||||
memory: {
|
||||
usage: this.memory.latest()?.value || 0,
|
||||
total: totalmem(),
|
||||
free: freemem(),
|
||||
},
|
||||
load: {
|
||||
current: this.load.latest()?.value || 0,
|
||||
avg: loadavg(),
|
||||
},
|
||||
cores: cpus().length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// NETWORK MONITOR
|
||||
// ============================================
|
||||
|
||||
/**
|
||||
* Network health and performance monitor
|
||||
*/
|
||||
export class NetworkMonitor extends EventEmitter {
|
||||
constructor(options = {}) {
|
||||
super();
|
||||
this.nodeId = options.nodeId;
|
||||
this.checkInterval = options.checkInterval || 30000;
|
||||
this.timer = null;
|
||||
|
||||
// Metrics
|
||||
this.peers = new Gauge('connected_peers');
|
||||
this.messages = new Counter('messages_total');
|
||||
this.errors = new Counter('errors_total');
|
||||
this.latency = new Histogram('peer_latency_ms');
|
||||
|
||||
// Series
|
||||
this.bandwidth = new MetricsSeries({ name: 'bandwidth_bps' });
|
||||
this.peerCount = new MetricsSeries({ name: 'peer_count' });
|
||||
|
||||
// Peer tracking
|
||||
this.peerLatencies = new Map(); // peerId -> latency ms
|
||||
this.peerStatus = new Map(); // peerId -> { status, lastSeen }
|
||||
}
|
||||
|
||||
start() {
|
||||
this.timer = setInterval(() => this.check(), this.checkInterval);
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (this.timer) {
|
||||
clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record peer connection
|
||||
*/
|
||||
peerConnected(peerId) {
|
||||
this.peers.inc();
|
||||
this.peerStatus.set(peerId, { status: 'connected', lastSeen: Date.now() });
|
||||
this.peerCount.add(this.peers.get());
|
||||
this.emit('peer-connected', { peerId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Record peer disconnection
|
||||
*/
|
||||
peerDisconnected(peerId) {
|
||||
this.peers.dec();
|
||||
this.peerStatus.set(peerId, { status: 'disconnected', lastSeen: Date.now() });
|
||||
this.peerCount.add(this.peers.get());
|
||||
this.emit('peer-disconnected', { peerId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Record message
|
||||
*/
|
||||
recordMessage(peerId, bytes) {
|
||||
this.messages.inc();
|
||||
this.bandwidth.add(bytes);
|
||||
|
||||
if (peerId && this.peerStatus.has(peerId)) {
|
||||
this.peerStatus.get(peerId).lastSeen = Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record latency measurement
|
||||
*/
|
||||
recordLatency(peerId, latencyMs) {
|
||||
this.latency.observe(latencyMs);
|
||||
this.peerLatencies.set(peerId, latencyMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record error
|
||||
*/
|
||||
recordError(type) {
|
||||
this.errors.inc();
|
||||
this.emit('error', { type });
|
||||
}
|
||||
|
||||
/**
|
||||
* Periodic health check
|
||||
*/
|
||||
check() {
|
||||
const metrics = this.getMetrics();
|
||||
|
||||
// Check for issues
|
||||
if (metrics.peers.current === 0) {
|
||||
this.emit('alert', { type: 'no_peers', message: 'No connected peers' });
|
||||
}
|
||||
|
||||
if (metrics.latency.avg > 1000) {
|
||||
this.emit('alert', { type: 'high_latency', message: 'High network latency', value: metrics.latency.avg });
|
||||
}
|
||||
|
||||
this.emit('health-check', metrics);
|
||||
}
|
||||
|
||||
getMetrics() {
|
||||
return {
|
||||
timestamp: Date.now(),
|
||||
peers: {
|
||||
current: this.peers.get(),
|
||||
avg1h: this.peerCount.avg(3600000),
|
||||
},
|
||||
messages: this.messages.get(),
|
||||
errors: this.errors.get(),
|
||||
latency: {
|
||||
avg: this.latency.avg(),
|
||||
p50: this.latency.toJSON().buckets[50] || 0,
|
||||
p99: this.latency.toJSON().buckets[1000] || 0,
|
||||
},
|
||||
bandwidth: {
|
||||
current: this.bandwidth.rate(),
|
||||
avg1m: this.bandwidth.avg(60000),
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// TASK MONITOR
|
||||
// ============================================
|
||||
|
||||
/**
|
||||
* Task execution monitor
|
||||
*/
|
||||
export class TaskMonitor extends EventEmitter {
|
||||
constructor(options = {}) {
|
||||
super();
|
||||
|
||||
// Counters
|
||||
this.submitted = new Counter('tasks_submitted');
|
||||
this.completed = new Counter('tasks_completed');
|
||||
this.failed = new Counter('tasks_failed');
|
||||
this.retried = new Counter('tasks_retried');
|
||||
|
||||
// Gauges
|
||||
this.pending = new Gauge('tasks_pending');
|
||||
this.running = new Gauge('tasks_running');
|
||||
|
||||
// Histograms
|
||||
this.waitTime = new Histogram('task_wait_time_ms');
|
||||
this.execTime = new Histogram('task_exec_time_ms');
|
||||
|
||||
// Series
|
||||
this.throughput = new MetricsSeries({ name: 'tasks_per_second' });
|
||||
}
|
||||
|
||||
taskSubmitted() {
|
||||
this.submitted.inc();
|
||||
this.pending.inc();
|
||||
}
|
||||
|
||||
taskStarted() {
|
||||
this.pending.dec();
|
||||
this.running.inc();
|
||||
}
|
||||
|
||||
taskCompleted(waitTimeMs, execTimeMs) {
|
||||
this.running.dec();
|
||||
this.completed.inc();
|
||||
this.waitTime.observe(waitTimeMs);
|
||||
this.execTime.observe(execTimeMs);
|
||||
this.throughput.add(1);
|
||||
}
|
||||
|
||||
taskFailed() {
|
||||
this.running.dec();
|
||||
this.failed.inc();
|
||||
}
|
||||
|
||||
taskRetried() {
|
||||
this.retried.inc();
|
||||
}
|
||||
|
||||
getMetrics() {
|
||||
const total = this.completed.get() + this.failed.get();
|
||||
const successRate = total > 0 ? this.completed.get() / total : 1;
|
||||
|
||||
return {
|
||||
timestamp: Date.now(),
|
||||
submitted: this.submitted.get(),
|
||||
completed: this.completed.get(),
|
||||
failed: this.failed.get(),
|
||||
retried: this.retried.get(),
|
||||
pending: this.pending.get(),
|
||||
running: this.running.get(),
|
||||
successRate,
|
||||
waitTime: {
|
||||
avg: this.waitTime.avg(),
|
||||
p50: this.waitTime.toJSON().buckets[100] || 0,
|
||||
p99: this.waitTime.toJSON().buckets[5000] || 0,
|
||||
},
|
||||
execTime: {
|
||||
avg: this.execTime.avg(),
|
||||
p50: this.execTime.toJSON().buckets[500] || 0,
|
||||
p99: this.execTime.toJSON().buckets[10000] || 0,
|
||||
},
|
||||
throughput: this.throughput.rate(60000),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// MONITORING DASHBOARD
|
||||
// ============================================
|
||||
|
||||
/**
|
||||
* Unified monitoring dashboard
|
||||
*/
|
||||
export class Monitor extends EventEmitter {
|
||||
constructor(options = {}) {
|
||||
super();
|
||||
this.nodeId = options.nodeId || `monitor-${randomBytes(8).toString('hex')}`;
|
||||
|
||||
// Sub-monitors
|
||||
this.system = new SystemMonitor(options.system);
|
||||
this.network = new NetworkMonitor({ ...options.network, nodeId: this.nodeId });
|
||||
this.tasks = new TaskMonitor(options.tasks);
|
||||
|
||||
// Alert thresholds
|
||||
this.thresholds = {
|
||||
cpuHigh: options.cpuHigh || 90,
|
||||
memoryHigh: options.memoryHigh || 90,
|
||||
latencyHigh: options.latencyHigh || 1000,
|
||||
errorRateHigh: options.errorRateHigh || 0.1,
|
||||
...options.thresholds,
|
||||
};
|
||||
|
||||
// Alert state
|
||||
this.alerts = new Map();
|
||||
this.alertHistory = [];
|
||||
|
||||
// Reporting
|
||||
this.reportInterval = options.reportInterval || 60000;
|
||||
this.reportTimer = null;
|
||||
|
||||
// Forward events
|
||||
this.system.on('metrics', m => this.emit('system-metrics', m));
|
||||
this.network.on('health-check', m => this.emit('network-metrics', m));
|
||||
this.network.on('alert', a => this.handleAlert(a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Start all monitors
|
||||
*/
|
||||
start() {
|
||||
this.system.start();
|
||||
this.network.start();
|
||||
|
||||
this.reportTimer = setInterval(() => {
|
||||
this.generateReport();
|
||||
}, this.reportInterval);
|
||||
|
||||
this.emit('started');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop all monitors
|
||||
*/
|
||||
stop() {
|
||||
this.system.stop();
|
||||
this.network.stop();
|
||||
|
||||
if (this.reportTimer) {
|
||||
clearInterval(this.reportTimer);
|
||||
this.reportTimer = null;
|
||||
}
|
||||
|
||||
this.emit('stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle alert
|
||||
*/
|
||||
handleAlert(alert) {
|
||||
const key = `${alert.type}`;
|
||||
const existing = this.alerts.get(key);
|
||||
|
||||
if (existing) {
|
||||
existing.count++;
|
||||
existing.lastSeen = Date.now();
|
||||
} else {
|
||||
const newAlert = {
|
||||
...alert,
|
||||
id: `alert-${randomBytes(4).toString('hex')}`,
|
||||
count: 1,
|
||||
firstSeen: Date.now(),
|
||||
lastSeen: Date.now(),
|
||||
};
|
||||
this.alerts.set(key, newAlert);
|
||||
this.alertHistory.push(newAlert);
|
||||
}
|
||||
|
||||
this.emit('alert', alert);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear alert
|
||||
*/
|
||||
clearAlert(type) {
|
||||
this.alerts.delete(type);
|
||||
this.emit('alert-cleared', { type });
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate comprehensive report
|
||||
*/
|
||||
generateReport() {
|
||||
const report = {
|
||||
timestamp: Date.now(),
|
||||
nodeId: this.nodeId,
|
||||
system: this.system.getMetrics(),
|
||||
network: this.network.getMetrics(),
|
||||
tasks: this.tasks.getMetrics(),
|
||||
alerts: Array.from(this.alerts.values()),
|
||||
health: this.calculateHealth(),
|
||||
};
|
||||
|
||||
this.emit('report', report);
|
||||
return report;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate overall health score (0-100)
|
||||
*/
|
||||
calculateHealth() {
|
||||
let score = 100;
|
||||
const issues = [];
|
||||
|
||||
// System health
|
||||
const sysMetrics = this.system.getMetrics();
|
||||
if (sysMetrics.cpu.usage > this.thresholds.cpuHigh) {
|
||||
score -= 20;
|
||||
issues.push('high_cpu');
|
||||
}
|
||||
if (sysMetrics.memory.usage > this.thresholds.memoryHigh) {
|
||||
score -= 20;
|
||||
issues.push('high_memory');
|
||||
}
|
||||
|
||||
// Network health
|
||||
const netMetrics = this.network.getMetrics();
|
||||
if (netMetrics.peers.current === 0) {
|
||||
score -= 30;
|
||||
issues.push('no_peers');
|
||||
}
|
||||
if (netMetrics.latency.avg > this.thresholds.latencyHigh) {
|
||||
score -= 15;
|
||||
issues.push('high_latency');
|
||||
}
|
||||
|
||||
// Task health
|
||||
const taskMetrics = this.tasks.getMetrics();
|
||||
if (taskMetrics.successRate < (1 - this.thresholds.errorRateHigh)) {
|
||||
score -= 15;
|
||||
issues.push('high_error_rate');
|
||||
}
|
||||
|
||||
return {
|
||||
score: Math.max(0, score),
|
||||
status: score >= 80 ? 'healthy' : score >= 50 ? 'degraded' : 'unhealthy',
|
||||
issues,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current metrics summary
|
||||
*/
|
||||
getMetrics() {
|
||||
return {
|
||||
system: this.system.getMetrics(),
|
||||
network: this.network.getMetrics(),
|
||||
tasks: this.tasks.getMetrics(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get active alerts
|
||||
*/
|
||||
getAlerts() {
|
||||
return Array.from(this.alerts.values());
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// EXPORTS
|
||||
// ============================================
|
||||
|
||||
export default Monitor;
|
||||
Reference in New Issue
Block a user