384 lines
12 KiB
JavaScript
384 lines
12 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* RuVector Intelligence Metrics
|
|
*
|
|
* Tracks effectiveness of the learning system:
|
|
* - Prediction accuracy (did suggestions help?)
|
|
* - Command success rate trends
|
|
* - Agent routing accuracy
|
|
* - Time-series analysis
|
|
*/
|
|
|
|
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
import { join, dirname } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const DATA_DIR = join(__dirname, 'data');
|
|
const METRICS_FILE = join(DATA_DIR, 'metrics.json');
|
|
|
|
/**
|
|
* Load or initialize metrics
|
|
*/
|
|
function loadMetrics() {
|
|
if (existsSync(METRICS_FILE)) {
|
|
return JSON.parse(readFileSync(METRICS_FILE, 'utf-8'));
|
|
}
|
|
return {
|
|
created: new Date().toISOString(),
|
|
predictions: [], // { predicted, actual, correct, timestamp }
|
|
commandOutcomes: [], // { type, success, hadWarning, timestamp }
|
|
agentRoutings: [], // { recommended, used, success, timestamp }
|
|
dailyStats: {}, // { "2025-01-15": { commands: 10, successes: 8, ... } }
|
|
calibration: {}, // { bucket: { predicted: 0.8, actual: 0.75 } }
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Save metrics
|
|
*/
|
|
function saveMetrics(metrics) {
|
|
metrics.lastUpdated = new Date().toISOString();
|
|
writeFileSync(METRICS_FILE, JSON.stringify(metrics, null, 2));
|
|
}
|
|
|
|
/**
|
|
* Record a prediction outcome
|
|
*/
|
|
export function recordPrediction(predicted, actual, metadata = {}) {
|
|
const metrics = loadMetrics();
|
|
const correct = predicted === actual;
|
|
|
|
metrics.predictions.push({
|
|
predicted,
|
|
actual,
|
|
correct,
|
|
confidence: metadata.confidence || 0,
|
|
timestamp: new Date().toISOString(),
|
|
...metadata
|
|
});
|
|
|
|
// Keep last 1000 predictions
|
|
if (metrics.predictions.length > 1000) {
|
|
metrics.predictions = metrics.predictions.slice(-1000);
|
|
}
|
|
|
|
// Update calibration buckets
|
|
const bucket = Math.floor((metadata.confidence || 0) * 10) / 10; // 0.0, 0.1, ..., 0.9
|
|
if (!metrics.calibration[bucket]) {
|
|
metrics.calibration[bucket] = { total: 0, correct: 0 };
|
|
}
|
|
metrics.calibration[bucket].total++;
|
|
if (correct) metrics.calibration[bucket].correct++;
|
|
|
|
saveMetrics(metrics);
|
|
return correct;
|
|
}
|
|
|
|
/**
|
|
* Record command outcome with context
|
|
*/
|
|
export function recordCommandOutcome(cmdType, success, context = {}) {
|
|
const metrics = loadMetrics();
|
|
const today = new Date().toISOString().split('T')[0];
|
|
|
|
metrics.commandOutcomes.push({
|
|
type: cmdType,
|
|
success,
|
|
hadWarning: context.hadWarning || false,
|
|
followedAdvice: context.followedAdvice,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
|
|
// Keep last 2000 outcomes
|
|
if (metrics.commandOutcomes.length > 2000) {
|
|
metrics.commandOutcomes = metrics.commandOutcomes.slice(-2000);
|
|
}
|
|
|
|
// Update daily stats
|
|
if (!metrics.dailyStats[today]) {
|
|
metrics.dailyStats[today] = {
|
|
commands: 0,
|
|
successes: 0,
|
|
withWarning: 0,
|
|
warningHeeded: 0,
|
|
warningHeededSuccess: 0
|
|
};
|
|
}
|
|
metrics.dailyStats[today].commands++;
|
|
if (success) metrics.dailyStats[today].successes++;
|
|
if (context.hadWarning) {
|
|
metrics.dailyStats[today].withWarning++;
|
|
if (context.followedAdvice) {
|
|
metrics.dailyStats[today].warningHeeded++;
|
|
if (success) metrics.dailyStats[today].warningHeededSuccess++;
|
|
}
|
|
}
|
|
|
|
saveMetrics(metrics);
|
|
}
|
|
|
|
/**
|
|
* Record agent routing outcome
|
|
*/
|
|
export function recordAgentRouting(recommended, actualUsed, success) {
|
|
const metrics = loadMetrics();
|
|
|
|
metrics.agentRoutings.push({
|
|
recommended,
|
|
used: actualUsed,
|
|
followed: recommended === actualUsed,
|
|
success,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
|
|
// Keep last 500 routings
|
|
if (metrics.agentRoutings.length > 500) {
|
|
metrics.agentRoutings = metrics.agentRoutings.slice(-500);
|
|
}
|
|
|
|
saveMetrics(metrics);
|
|
}
|
|
|
|
/**
|
|
* Calculate effectiveness metrics
|
|
*/
|
|
export function calculateEffectiveness() {
|
|
const metrics = loadMetrics();
|
|
const results = {
|
|
generated: new Date().toISOString(),
|
|
summary: {},
|
|
trends: {},
|
|
calibration: {},
|
|
recommendations: []
|
|
};
|
|
|
|
// === Prediction Accuracy ===
|
|
if (metrics.predictions.length > 0) {
|
|
const correct = metrics.predictions.filter(p => p.correct).length;
|
|
results.summary.predictionAccuracy = {
|
|
total: metrics.predictions.length,
|
|
correct,
|
|
rate: (correct / metrics.predictions.length).toFixed(3)
|
|
};
|
|
}
|
|
|
|
// === Command Success Rates ===
|
|
if (metrics.commandOutcomes.length > 0) {
|
|
const outcomes = metrics.commandOutcomes;
|
|
const successes = outcomes.filter(o => o.success).length;
|
|
|
|
// Overall
|
|
results.summary.commandSuccess = {
|
|
total: outcomes.length,
|
|
successes,
|
|
rate: (successes / outcomes.length).toFixed(3)
|
|
};
|
|
|
|
// With vs without warnings
|
|
const withWarning = outcomes.filter(o => o.hadWarning);
|
|
const withoutWarning = outcomes.filter(o => !o.hadWarning);
|
|
|
|
if (withWarning.length > 10 && withoutWarning.length > 10) {
|
|
const warningSuccessRate = withWarning.filter(o => o.success).length / withWarning.length;
|
|
const noWarningSuccessRate = withoutWarning.filter(o => o.success).length / withoutWarning.length;
|
|
|
|
results.summary.warningImpact = {
|
|
withWarning: { total: withWarning.length, rate: warningSuccessRate.toFixed(3) },
|
|
withoutWarning: { total: withoutWarning.length, rate: noWarningSuccessRate.toFixed(3) },
|
|
delta: (noWarningSuccessRate - warningSuccessRate).toFixed(3),
|
|
interpretation: warningSuccessRate < noWarningSuccessRate
|
|
? "Warnings correctly identify risky commands"
|
|
: "Warnings may be too aggressive"
|
|
};
|
|
}
|
|
|
|
// Heeded vs ignored warnings
|
|
const heeded = withWarning.filter(o => o.followedAdvice);
|
|
const ignored = withWarning.filter(o => o.followedAdvice === false);
|
|
|
|
if (heeded.length > 5 && ignored.length > 5) {
|
|
const heededSuccess = heeded.filter(o => o.success).length / heeded.length;
|
|
const ignoredSuccess = ignored.filter(o => o.success).length / ignored.length;
|
|
|
|
results.summary.adviceValue = {
|
|
heeded: { total: heeded.length, successRate: heededSuccess.toFixed(3) },
|
|
ignored: { total: ignored.length, successRate: ignoredSuccess.toFixed(3) },
|
|
delta: (heededSuccess - ignoredSuccess).toFixed(3),
|
|
interpretation: heededSuccess > ignoredSuccess
|
|
? "Following advice improves outcomes"
|
|
: "Advice may not be helpful"
|
|
};
|
|
}
|
|
}
|
|
|
|
// === Agent Routing Accuracy ===
|
|
if (metrics.agentRoutings.length > 0) {
|
|
const routings = metrics.agentRoutings;
|
|
const followed = routings.filter(r => r.followed);
|
|
const notFollowed = routings.filter(r => !r.followed);
|
|
|
|
results.summary.agentRouting = {
|
|
total: routings.length,
|
|
followedRecommendation: followed.length,
|
|
followRate: (followed.length / routings.length).toFixed(3)
|
|
};
|
|
|
|
if (followed.length > 5 && notFollowed.length > 5) {
|
|
const followedSuccess = followed.filter(r => r.success).length / followed.length;
|
|
const notFollowedSuccess = notFollowed.filter(r => r.success).length / notFollowed.length;
|
|
|
|
results.summary.agentRouting.followedSuccessRate = followedSuccess.toFixed(3);
|
|
results.summary.agentRouting.notFollowedSuccessRate = notFollowedSuccess.toFixed(3);
|
|
results.summary.agentRouting.delta = (followedSuccess - notFollowedSuccess).toFixed(3);
|
|
results.summary.agentRouting.interpretation = followedSuccess > notFollowedSuccess
|
|
? "Agent recommendations improve task success"
|
|
: "Agent routing needs improvement";
|
|
}
|
|
}
|
|
|
|
// === Calibration Analysis ===
|
|
for (const [bucket, data] of Object.entries(metrics.calibration)) {
|
|
if (data.total >= 5) {
|
|
const actualRate = data.correct / data.total;
|
|
const expectedRate = parseFloat(bucket) + 0.05; // midpoint of bucket
|
|
results.calibration[bucket] = {
|
|
predicted: expectedRate.toFixed(2),
|
|
actual: actualRate.toFixed(3),
|
|
samples: data.total,
|
|
calibrationError: Math.abs(expectedRate - actualRate).toFixed(3)
|
|
};
|
|
}
|
|
}
|
|
|
|
// === Trend Analysis ===
|
|
const days = Object.keys(metrics.dailyStats).sort();
|
|
if (days.length >= 3) {
|
|
const recentDays = days.slice(-7);
|
|
const olderDays = days.slice(-14, -7);
|
|
|
|
const recentRate = recentDays.reduce((sum, d) => {
|
|
const s = metrics.dailyStats[d];
|
|
return sum + (s.commands > 0 ? s.successes / s.commands : 0);
|
|
}, 0) / recentDays.length;
|
|
|
|
if (olderDays.length > 0) {
|
|
const olderRate = olderDays.reduce((sum, d) => {
|
|
const s = metrics.dailyStats[d];
|
|
return sum + (s.commands > 0 ? s.successes / s.commands : 0);
|
|
}, 0) / olderDays.length;
|
|
|
|
results.trends.successRateTrend = {
|
|
recent7Days: recentRate.toFixed(3),
|
|
previous7Days: olderRate.toFixed(3),
|
|
change: (recentRate - olderRate).toFixed(3),
|
|
improving: recentRate > olderRate
|
|
};
|
|
}
|
|
}
|
|
|
|
// === Recommendations ===
|
|
if (results.summary.adviceValue?.delta < 0) {
|
|
results.recommendations.push({
|
|
priority: 'high',
|
|
issue: 'Advice not helping',
|
|
action: 'Review Q-table thresholds and warning triggers'
|
|
});
|
|
}
|
|
|
|
if (results.summary.agentRouting?.delta < 0) {
|
|
results.recommendations.push({
|
|
priority: 'medium',
|
|
issue: 'Agent routing not improving outcomes',
|
|
action: 'Retrain with more agent assignment data'
|
|
});
|
|
}
|
|
|
|
const avgCalibrationError = Object.values(results.calibration)
|
|
.reduce((sum, c) => sum + parseFloat(c.calibrationError), 0) /
|
|
Math.max(1, Object.keys(results.calibration).length);
|
|
|
|
if (avgCalibrationError > 0.15) {
|
|
results.recommendations.push({
|
|
priority: 'medium',
|
|
issue: `Confidence poorly calibrated (avg error: ${avgCalibrationError.toFixed(2)})`,
|
|
action: 'Adjust Q-value scaling or add temperature parameter'
|
|
});
|
|
}
|
|
|
|
if (results.recommendations.length === 0) {
|
|
results.recommendations.push({
|
|
priority: 'info',
|
|
issue: 'None detected',
|
|
action: 'Continue collecting data for more insights'
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* CLI
|
|
*/
|
|
const command = process.argv[2];
|
|
|
|
switch (command) {
|
|
case 'record-prediction': {
|
|
const [,, , predicted, actual, confidence] = process.argv;
|
|
const correct = recordPrediction(predicted, actual, { confidence: parseFloat(confidence) || 0 });
|
|
console.log(JSON.stringify({ recorded: true, correct }));
|
|
break;
|
|
}
|
|
|
|
case 'record-command': {
|
|
const [,, , cmdType, success, hadWarning, followedAdvice] = process.argv;
|
|
recordCommandOutcome(cmdType, success === 'true', {
|
|
hadWarning: hadWarning === 'true',
|
|
followedAdvice: followedAdvice === 'true' ? true : followedAdvice === 'false' ? false : undefined
|
|
});
|
|
console.log(JSON.stringify({ recorded: true }));
|
|
break;
|
|
}
|
|
|
|
case 'record-routing': {
|
|
const [,, , recommended, used, success] = process.argv;
|
|
recordAgentRouting(recommended, used, success === 'true');
|
|
console.log(JSON.stringify({ recorded: true }));
|
|
break;
|
|
}
|
|
|
|
case 'effectiveness':
|
|
case 'report': {
|
|
const report = calculateEffectiveness();
|
|
console.log(JSON.stringify(report, null, 2));
|
|
break;
|
|
}
|
|
|
|
case 'reset': {
|
|
if (existsSync(METRICS_FILE)) {
|
|
const backup = METRICS_FILE + '.backup';
|
|
writeFileSync(backup, readFileSync(METRICS_FILE));
|
|
console.log(`Backed up to ${backup}`);
|
|
}
|
|
saveMetrics(loadMetrics()); // Creates fresh metrics
|
|
console.log('Metrics reset');
|
|
break;
|
|
}
|
|
|
|
default:
|
|
console.log(`
|
|
📊 RuVector Intelligence Metrics
|
|
|
|
Commands:
|
|
effectiveness Show effectiveness report
|
|
record-prediction <predicted> <actual> [confidence]
|
|
record-command <type> <success> [hadWarning] [followedAdvice]
|
|
record-routing <recommended> <used> <success>
|
|
reset Reset metrics (backs up existing)
|
|
|
|
Example:
|
|
node metrics.js effectiveness
|
|
node metrics.js record-command cargo true true true
|
|
`);
|
|
}
|