wifi-densepose/vendor/ruvector/.claude/intelligence/metrics.js

#!/usr/bin/env node
/**
 * RuVector Intelligence Metrics
 *
 * Tracks effectiveness of the learning system:
 * - Prediction accuracy (did suggestions help?)
 * - Command success rate trends
 * - Agent routing accuracy
 * - Time-series analysis
 */

import { readFileSync, writeFileSync, existsSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const DATA_DIR = join(__dirname, 'data');
const METRICS_FILE = join(DATA_DIR, 'metrics.json');

/**
 * Load or initialize metrics
 */
function loadMetrics() {
  if (existsSync(METRICS_FILE)) {
    return JSON.parse(readFileSync(METRICS_FILE, 'utf-8'));
  }
  return {
    created: new Date().toISOString(),
    predictions: [],        // { predicted, actual, correct, timestamp }
    commandOutcomes: [],    // { type, success, hadWarning, timestamp }
    agentRoutings: [],      // { recommended, used, success, timestamp }
    dailyStats: {},         // { "2025-01-15": { commands: 10, successes: 8, ... } }
    calibration: {},        // { bucket: { predicted: 0.8, actual: 0.75 } }
  };
}

/**
 * Save metrics
 */
function saveMetrics(metrics) {
  metrics.lastUpdated = new Date().toISOString();
  writeFileSync(METRICS_FILE, JSON.stringify(metrics, null, 2));
}

/**
 * Record a prediction outcome
 */
export function recordPrediction(predicted, actual, metadata = {}) {
  const metrics = loadMetrics();
  const correct = predicted === actual;

  metrics.predictions.push({
    predicted,
    actual,
    correct,
    confidence: metadata.confidence || 0,
    timestamp: new Date().toISOString(),
    ...metadata
  });

  // Keep last 1000 predictions
  if (metrics.predictions.length > 1000) {
    metrics.predictions = metrics.predictions.slice(-1000);
  }

  // Update calibration buckets
  const bucket = Math.floor((metadata.confidence || 0) * 10) / 10; // 0.0, 0.1, ..., 0.9
  if (!metrics.calibration[bucket]) {
    metrics.calibration[bucket] = { total: 0, correct: 0 };
  }
  metrics.calibration[bucket].total++;
  if (correct) metrics.calibration[bucket].correct++;

  saveMetrics(metrics);
  return correct;
}

/**
 * Record command outcome with context
 */
export function recordCommandOutcome(cmdType, success, context = {}) {
  const metrics = loadMetrics();
  const today = new Date().toISOString().split('T')[0];

  metrics.commandOutcomes.push({
    type: cmdType,
    success,
    hadWarning: context.hadWarning || false,
    followedAdvice: context.followedAdvice,
    timestamp: new Date().toISOString()
  });

  // Keep last 2000 outcomes
  if (metrics.commandOutcomes.length > 2000) {
    metrics.commandOutcomes = metrics.commandOutcomes.slice(-2000);
  }

  // Update daily stats
  if (!metrics.dailyStats[today]) {
    metrics.dailyStats[today] = {
      commands: 0,
      successes: 0,
      withWarning: 0,
      warningHeeded: 0,
      warningHeededSuccess: 0
    };
  }
  metrics.dailyStats[today].commands++;
  if (success) metrics.dailyStats[today].successes++;
  if (context.hadWarning) {
    metrics.dailyStats[today].withWarning++;
    if (context.followedAdvice) {
      metrics.dailyStats[today].warningHeeded++;
      if (success) metrics.dailyStats[today].warningHeededSuccess++;
    }
  }

  saveMetrics(metrics);
}

/**
 * Record agent routing outcome
 */
export function recordAgentRouting(recommended, actualUsed, success) {
  const metrics = loadMetrics();

  metrics.agentRoutings.push({
    recommended,
    used: actualUsed,
    followed: recommended === actualUsed,
    success,
    timestamp: new Date().toISOString()
  });

  // Keep last 500 routings
  if (metrics.agentRoutings.length > 500) {
    metrics.agentRoutings = metrics.agentRoutings.slice(-500);
  }

  saveMetrics(metrics);
}

/**
 * Calculate effectiveness metrics
 */
export function calculateEffectiveness() {
  const metrics = loadMetrics();
  const results = {
    generated: new Date().toISOString(),
    summary: {},
    trends: {},
    calibration: {},
    recommendations: []
  };

  // === Prediction Accuracy ===
  if (metrics.predictions.length > 0) {
    const correct = metrics.predictions.filter(p => p.correct).length;
    results.summary.predictionAccuracy = {
      total: metrics.predictions.length,
      correct,
      rate: (correct / metrics.predictions.length).toFixed(3)
    };
  }

  // === Command Success Rates ===
  if (metrics.commandOutcomes.length > 0) {
    const outcomes = metrics.commandOutcomes;
    const successes = outcomes.filter(o => o.success).length;

    // Overall
    results.summary.commandSuccess = {
      total: outcomes.length,
      successes,
      rate: (successes / outcomes.length).toFixed(3)
    };

    // With vs without warnings
    const withWarning = outcomes.filter(o => o.hadWarning);
    const withoutWarning = outcomes.filter(o => !o.hadWarning);

    if (withWarning.length > 10 && withoutWarning.length > 10) {
      const warningSuccessRate = withWarning.filter(o => o.success).length / withWarning.length;
      const noWarningSuccessRate = withoutWarning.filter(o => o.success).length / withoutWarning.length;

      results.summary.warningImpact = {
        withWarning: { total: withWarning.length, rate: warningSuccessRate.toFixed(3) },
        withoutWarning: { total: withoutWarning.length, rate: noWarningSuccessRate.toFixed(3) },
        delta: (noWarningSuccessRate - warningSuccessRate).toFixed(3),
        interpretation: warningSuccessRate < noWarningSuccessRate
          ? "Warnings correctly identify risky commands"
          : "Warnings may be too aggressive"
      };
    }

    // Heeded vs ignored warnings
    const heeded = withWarning.filter(o => o.followedAdvice);
    const ignored = withWarning.filter(o => o.followedAdvice === false);

    if (heeded.length > 5 && ignored.length > 5) {
      const heededSuccess = heeded.filter(o => o.success).length / heeded.length;
      const ignoredSuccess = ignored.filter(o => o.success).length / ignored.length;

      results.summary.adviceValue = {
        heeded: { total: heeded.length, successRate: heededSuccess.toFixed(3) },
        ignored: { total: ignored.length, successRate: ignoredSuccess.toFixed(3) },
        delta: (heededSuccess - ignoredSuccess).toFixed(3),
        interpretation: heededSuccess > ignoredSuccess
          ? "Following advice improves outcomes"
          : "Advice may not be helpful"
      };
    }
  }

  // === Agent Routing Accuracy ===
  if (metrics.agentRoutings.length > 0) {
    const routings = metrics.agentRoutings;
    const followed = routings.filter(r => r.followed);
    const notFollowed = routings.filter(r => !r.followed);

    results.summary.agentRouting = {
      total: routings.length,
      followedRecommendation: followed.length,
      followRate: (followed.length / routings.length).toFixed(3)
    };

    if (followed.length > 5 && notFollowed.length > 5) {
      const followedSuccess = followed.filter(r => r.success).length / followed.length;
      const notFollowedSuccess = notFollowed.filter(r => r.success).length / notFollowed.length;

      results.summary.agentRouting.followedSuccessRate = followedSuccess.toFixed(3);
      results.summary.agentRouting.notFollowedSuccessRate = notFollowedSuccess.toFixed(3);
      results.summary.agentRouting.delta = (followedSuccess - notFollowedSuccess).toFixed(3);
      results.summary.agentRouting.interpretation = followedSuccess > notFollowedSuccess
        ? "Agent recommendations improve task success"
        : "Agent routing needs improvement";
    }
  }

  // === Calibration Analysis ===
  for (const [bucket, data] of Object.entries(metrics.calibration)) {
    if (data.total >= 5) {
      const actualRate = data.correct / data.total;
      const expectedRate = parseFloat(bucket) + 0.05; // midpoint of bucket
      results.calibration[bucket] = {
        predicted: expectedRate.toFixed(2),
        actual: actualRate.toFixed(3),
        samples: data.total,
        calibrationError: Math.abs(expectedRate - actualRate).toFixed(3)
      };
    }
  }

  // === Trend Analysis ===
  const days = Object.keys(metrics.dailyStats).sort();
  if (days.length >= 3) {
    const recentDays = days.slice(-7);
    const olderDays = days.slice(-14, -7);

    const recentRate = recentDays.reduce((sum, d) => {
      const s = metrics.dailyStats[d];
      return sum + (s.commands > 0 ? s.successes / s.commands : 0);
    }, 0) / recentDays.length;

    if (olderDays.length > 0) {
      const olderRate = olderDays.reduce((sum, d) => {
        const s = metrics.dailyStats[d];
        return sum + (s.commands > 0 ? s.successes / s.commands : 0);
      }, 0) / olderDays.length;

      results.trends.successRateTrend = {
        recent7Days: recentRate.toFixed(3),
        previous7Days: olderRate.toFixed(3),
        change: (recentRate - olderRate).toFixed(3),
        improving: recentRate > olderRate
      };
    }
  }

  // === Recommendations ===
  if (results.summary.adviceValue?.delta < 0) {
    results.recommendations.push({
      priority: 'high',
      issue: 'Advice not helping',
      action: 'Review Q-table thresholds and warning triggers'
    });
  }

  if (results.summary.agentRouting?.delta < 0) {
    results.recommendations.push({
      priority: 'medium',
      issue: 'Agent routing not improving outcomes',
      action: 'Retrain with more agent assignment data'
    });
  }

  const avgCalibrationError = Object.values(results.calibration)
    .reduce((sum, c) => sum + parseFloat(c.calibrationError), 0) /
    Math.max(1, Object.keys(results.calibration).length);

  if (avgCalibrationError > 0.15) {
    results.recommendations.push({
      priority: 'medium',
      issue: `Confidence poorly calibrated (avg error: ${avgCalibrationError.toFixed(2)})`,
      action: 'Adjust Q-value scaling or add temperature parameter'
    });
  }

  if (results.recommendations.length === 0) {
    results.recommendations.push({
      priority: 'info',
      issue: 'None detected',
      action: 'Continue collecting data for more insights'
    });
  }

  return results;
}

/**
 * CLI
 */
const command = process.argv[2];

switch (command) {
  case 'record-prediction': {
    const [,, , predicted, actual, confidence] = process.argv;
    const correct = recordPrediction(predicted, actual, { confidence: parseFloat(confidence) || 0 });
    console.log(JSON.stringify({ recorded: true, correct }));
    break;
  }

  case 'record-command': {
    const [,, , cmdType, success, hadWarning, followedAdvice] = process.argv;
    recordCommandOutcome(cmdType, success === 'true', {
      hadWarning: hadWarning === 'true',
      followedAdvice: followedAdvice === 'true' ? true : followedAdvice === 'false' ? false : undefined
    });
    console.log(JSON.stringify({ recorded: true }));
    break;
  }

  case 'record-routing': {
    const [,, , recommended, used, success] = process.argv;
    recordAgentRouting(recommended, used, success === 'true');
    console.log(JSON.stringify({ recorded: true }));
    break;
  }

  case 'effectiveness':
  case 'report': {
    const report = calculateEffectiveness();
    console.log(JSON.stringify(report, null, 2));
    break;
  }

  case 'reset': {
    if (existsSync(METRICS_FILE)) {
      const backup = METRICS_FILE + '.backup';
      writeFileSync(backup, readFileSync(METRICS_FILE));
      console.log(`Backed up to ${backup}`);
    }
    saveMetrics(loadMetrics()); // Creates fresh metrics
    console.log('Metrics reset');
    break;
  }

  default:
    console.log(`
📊 RuVector Intelligence Metrics

Commands:
  effectiveness     Show effectiveness report
  record-prediction <predicted> <actual> [confidence]
  record-command <type> <success> [hadWarning] [followedAdvice]
  record-routing <recommended> <used> <success>
  reset             Reset metrics (backs up existing)

Example:
  node metrics.js effectiveness
  node metrics.js record-command cargo true true true
`);
}