wifi-densepose/vendor/ruvector/examples/neural-trader/exotic/reinforcement-learning-agent.js

/**
 * Reinforcement Learning Trading Agent
 *
 * EXOTIC: Deep Q-Learning for autonomous trading
 *
 * Uses @neural-trader/neural with RuVector for:
 * - Deep Q-Network (DQN) for action selection
 * - Experience replay with vector similarity
 * - Epsilon-greedy exploration
 * - Target network for stable learning
 *
 * The agent learns optimal trading actions directly from
 * market experience, without explicit strategy rules.
 */

// RL Configuration
const rlConfig = {
  // Network architecture
  network: {
    stateDim: 20,           // State vector dimension
    hiddenLayers: [128, 64, 32],
    actionSpace: 5          // hold, buy_small, buy_large, sell_small, sell_large
  },

  // Learning parameters
  learning: {
    gamma: 0.99,            // Discount factor
    learningRate: 0.001,
    batchSize: 32,
    targetUpdateFreq: 100,  // Steps between target network updates
    replayBufferSize: 10000
  },

  // Exploration
  exploration: {
    epsilonStart: 1.0,
    epsilonEnd: 0.01,
    epsilonDecay: 0.995
  },

  // Trading
  trading: {
    initialCapital: 100000,
    maxPosition: 0.5,       // Max 50% of capital
    transactionCost: 0.001, // 10 bps
    slippage: 0.0005        // 5 bps
  }
};

// Action definitions
const Actions = {
  HOLD: 0,
  BUY_SMALL: 1,    // 10% of available
  BUY_LARGE: 2,    // 30% of available
  SELL_SMALL: 3,   // 10% of position
  SELL_LARGE: 4    // 30% of position
};

const ActionNames = ['HOLD', 'BUY_SMALL', 'BUY_LARGE', 'SELL_SMALL', 'SELL_LARGE'];

// Neural Network Layer
class DenseLayer {
  constructor(inputDim, outputDim, activation = 'relu') {
    this.inputDim = inputDim;
    this.outputDim = outputDim;
    this.activation = activation;

    // Xavier initialization
    const scale = Math.sqrt(2.0 / (inputDim + outputDim));
    this.weights = [];
    for (let i = 0; i < inputDim; i++) {
      const row = [];
      for (let j = 0; j < outputDim; j++) {
        row.push((Math.random() - 0.5) * 2 * scale);
      }
      this.weights.push(row);
    }
    this.bias = new Array(outputDim).fill(0).map(() => (Math.random() - 0.5) * 0.1);
  }

  forward(input) {
    const output = new Array(this.outputDim).fill(0);

    for (let j = 0; j < this.outputDim; j++) {
      for (let i = 0; i < this.inputDim; i++) {
        output[j] += input[i] * this.weights[i][j];
      }
      output[j] += this.bias[j];

      // Activation
      if (this.activation === 'relu') {
        output[j] = Math.max(0, output[j]);
      }
    }

    return output;
  }

  // Simplified gradient update
  updateWeights(gradients, lr) {
    for (let i = 0; i < this.inputDim; i++) {
      for (let j = 0; j < this.outputDim; j++) {
        this.weights[i][j] -= lr * gradients[i][j];
      }
    }
    for (let j = 0; j < this.outputDim; j++) {
      this.bias[j] -= lr * gradients.bias[j];
    }
  }

  copyFrom(other) {
    for (let i = 0; i < this.inputDim; i++) {
      for (let j = 0; j < this.outputDim; j++) {
        this.weights[i][j] = other.weights[i][j];
      }
    }
    for (let j = 0; j < this.outputDim; j++) {
      this.bias[j] = other.bias[j];
    }
  }
}

// Deep Q-Network
class DQN {
  constructor(config) {
    this.config = config;

    // Build layers
    this.layers = [];
    let prevDim = config.stateDim;

    for (const hiddenDim of config.hiddenLayers) {
      this.layers.push(new DenseLayer(prevDim, hiddenDim, 'relu'));
      prevDim = hiddenDim;
    }

    // Output layer (no activation for Q-values)
    this.layers.push(new DenseLayer(prevDim, config.actionSpace, 'linear'));
  }

  forward(state) {
    let x = state;
    // Store activations for backpropagation
    this.activations = [state];
    for (const layer of this.layers) {
      x = layer.forward(x);
      this.activations.push(x);
    }
    return x;
  }

  // Get the activation before the output layer (for gradient computation)
  getPreOutputActivation() {
    if (!this.activations || this.activations.length < 2) {
      return null;
    }
    // Return activation just before output layer
    return this.activations[this.activations.length - 2];
  }

  copyFrom(other) {
    for (let i = 0; i < this.layers.length; i++) {
      this.layers[i].copyFrom(other.layers[i]);
    }
  }
}

// Experience Replay Buffer
class ReplayBuffer {
  constructor(maxSize) {
    this.maxSize = maxSize;
    this.buffer = [];
    this.position = 0;
  }

  add(experience) {
    if (this.buffer.length < this.maxSize) {
      this.buffer.push(experience);
    } else {
      this.buffer[this.position] = experience;
    }
    this.position = (this.position + 1) % this.maxSize;
  }

  sample(batchSize) {
    const samples = [];
    const indices = new Set();

    while (indices.size < Math.min(batchSize, this.buffer.length)) {
      indices.add(Math.floor(Math.random() * this.buffer.length));
    }

    for (const idx of indices) {
      samples.push(this.buffer[idx]);
    }

    return samples;
  }

  size() {
    return this.buffer.length;
  }
}

// State Encoder
class StateEncoder {
  constructor(config) {
    this.config = config;
    this.priceHistory = [];
    this.returnHistory = [];
  }

  update(price) {
    this.priceHistory.push(price);
    if (this.priceHistory.length > 1) {
      const ret = (price - this.priceHistory[this.priceHistory.length - 2]) /
                  this.priceHistory[this.priceHistory.length - 2];
      this.returnHistory.push(ret);
    }

    // Keep bounded
    if (this.priceHistory.length > 100) {
      this.priceHistory.shift();
      this.returnHistory.shift();
    }
  }

  encode(portfolio) {
    const state = [];

    // Price-based features
    if (this.returnHistory.length >= 20) {
      // Recent returns
      for (let i = 1; i <= 5; i++) {
        state.push(this.returnHistory[this.returnHistory.length - i] * 10);  // Scaled
      }

      // Return statistics
      const recent20 = this.returnHistory.slice(-20);
      const mean = recent20.reduce((a, b) => a + b, 0) / 20;
      const variance = recent20.reduce((s, r) => s + (r - mean) ** 2, 0) / 20;
      const volatility = Math.sqrt(variance);

      state.push(mean * 100);
      state.push(volatility * 100);

      // Momentum
      const momentum5 = this.returnHistory.slice(-5).reduce((a, b) => a + b, 0);
      const momentum10 = this.returnHistory.slice(-10).reduce((a, b) => a + b, 0);
      const momentum20 = this.returnHistory.slice(-20).reduce((a, b) => a + b, 0);

      state.push(momentum5 * 10);
      state.push(momentum10 * 10);
      state.push(momentum20 * 10);

      // Price relative to moving averages
      const currentPrice = this.priceHistory[this.priceHistory.length - 1];
      const sma5 = this.priceHistory.slice(-5).reduce((a, b) => a + b, 0) / 5;
      const sma20 = this.priceHistory.slice(-20).reduce((a, b) => a + b, 0) / 20;

      state.push((currentPrice / sma5 - 1) * 10);
      state.push((currentPrice / sma20 - 1) * 10);

      // Trend direction
      const trend = this.returnHistory.slice(-10).filter(r => r > 0).length / 10;
      state.push(trend - 0.5);
    } else {
      // Pad with zeros
      for (let i = 0; i < 13; i++) {
        state.push(0);
      }
    }

    // Portfolio features
    state.push(portfolio.positionPct - 0.5);  // Position as fraction of capital
    state.push(portfolio.unrealizedPnL / portfolio.capital);
    state.push(portfolio.realizedPnL / portfolio.capital);
    state.push(portfolio.drawdown);
    state.push(portfolio.winRate - 0.5);
    state.push(portfolio.sharpe / 2);
    state.push(portfolio.tradeCount / 100);

    // Ensure state dimension
    while (state.length < this.config.network.stateDim) {
      state.push(0);
    }

    return state.slice(0, this.config.network.stateDim);
  }
}

// Trading Environment
class TradingEnvironment {
  constructor(config, priceData) {
    this.config = config;
    this.priceData = priceData;
    this.reset();
  }

  reset() {
    this.currentStep = 50;  // Start after warmup
    this.capital = this.config.trading.initialCapital;
    this.position = 0;
    this.avgCost = 0;
    this.realizedPnL = 0;
    this.trades = [];
    this.peakCapital = this.capital;
    this.returns = [];

    return this.getState();
  }

  getState() {
    return {
      price: this.priceData[this.currentStep].close,
      capital: this.capital,
      position: this.position,
      positionPct: this.position * this.priceData[this.currentStep].close / this.getPortfolioValue(),
      unrealizedPnL: this.getUnrealizedPnL(),
      realizedPnL: this.realizedPnL,
      drawdown: this.getDrawdown(),
      winRate: this.getWinRate(),
      sharpe: this.getSharpe(),
      tradeCount: this.trades.length
    };
  }

  getPortfolioValue() {
    const price = this.priceData[this.currentStep].close;
    return this.capital + this.position * price;
  }

  getUnrealizedPnL() {
    if (this.position === 0) return 0;
    const price = this.priceData[this.currentStep].close;
    return this.position * (price - this.avgCost);
  }

  getDrawdown() {
    const value = this.getPortfolioValue();
    this.peakCapital = Math.max(this.peakCapital, value);
    return (this.peakCapital - value) / this.peakCapital;
  }

  getWinRate() {
    const closedTrades = this.trades.filter(t => t.closed);
    if (closedTrades.length === 0) return 0.5;
    const wins = closedTrades.filter(t => t.pnl > 0).length;
    return wins / closedTrades.length;
  }

  getSharpe() {
    if (this.returns.length < 10) return 0;
    const mean = this.returns.reduce((a, b) => a + b, 0) / this.returns.length;
    const variance = this.returns.reduce((s, r) => s + (r - mean) ** 2, 0) / this.returns.length;
    if (variance === 0) return 0;
    return mean / Math.sqrt(variance) * Math.sqrt(252);
  }

  step(action) {
    const prevValue = this.getPortfolioValue();
    const price = this.priceData[this.currentStep].close;

    // Execute action
    this.executeAction(action, price);

    // Move to next step
    this.currentStep++;
    const done = this.currentStep >= this.priceData.length - 1;

    // Calculate reward
    const newValue = this.getPortfolioValue();
    const stepReturn = (newValue - prevValue) / prevValue;
    this.returns.push(stepReturn);
    // Bound returns array to prevent memory leak
    if (this.returns.length > 1000) {
      this.returns = this.returns.slice(-500);
    }

    // Shape reward
    let reward = stepReturn * 100;  // Scale returns

    // Penalty for excessive trading
    if (action !== Actions.HOLD) {
      reward -= 0.1;
    }

    // Penalty for drawdown
    const drawdown = this.getDrawdown();
    if (drawdown > 0.1) {
      reward -= drawdown * 10;
    }

    // Bonus for profitable trades
    const winRate = this.getWinRate();
    if (winRate > 0.5) {
      reward += (winRate - 0.5) * 2;
    }

    return {
      state: this.getState(),
      reward,
      done,
      info: {
        portfolioValue: newValue,
        stepReturn,
        action: ActionNames[action]
      }
    };
  }

  executeAction(action, price) {
    const slippage = this.config.trading.slippage;
    const cost = this.config.trading.transactionCost;

    switch (action) {
      case Actions.BUY_SMALL:
        this.buy(0.1, price * (1 + slippage + cost));
        break;
      case Actions.BUY_LARGE:
        this.buy(0.3, price * (1 + slippage + cost));
        break;
      case Actions.SELL_SMALL:
        this.sell(0.1, price * (1 - slippage - cost));
        break;
      case Actions.SELL_LARGE:
        this.sell(0.3, price * (1 - slippage - cost));
        break;
      case Actions.HOLD:
      default:
        break;
    }
  }

  buy(fraction, price) {
    const maxBuy = this.capital * this.config.trading.maxPosition;
    const amount = Math.min(this.capital * fraction, maxBuy);

    if (amount < 100) return;  // Min trade size

    const shares = amount / price;
    const totalCost = this.position * this.avgCost + amount;
    const totalShares = this.position + shares;

    this.avgCost = totalCost / totalShares;
    this.position = totalShares;
    this.capital -= amount;

    this.trades.push({
      type: 'buy',
      shares,
      price,
      timestamp: this.currentStep,
      closed: false
    });
  }

  sell(fraction, price) {
    if (this.position <= 0) return;

    const sharesToSell = this.position * fraction;
    if (sharesToSell < 0.01) return;

    const proceeds = sharesToSell * price;
    const costBasis = sharesToSell * this.avgCost;
    const tradePnL = proceeds - costBasis;

    this.position -= sharesToSell;
    this.capital += proceeds;
    this.realizedPnL += tradePnL;

    this.trades.push({
      type: 'sell',
      shares: sharesToSell,
      price,
      pnl: tradePnL,
      timestamp: this.currentStep,
      closed: true
    });
  }
}

// DQN Agent
class DQNAgent {
  constructor(config) {
    this.config = config;

    // Networks
    this.qNetwork = new DQN(config.network);
    this.targetNetwork = new DQN(config.network);
    this.targetNetwork.copyFrom(this.qNetwork);

    // Experience replay
    this.replayBuffer = new ReplayBuffer(config.learning.replayBufferSize);

    // Exploration
    this.epsilon = config.exploration.epsilonStart;

    // Training stats
    this.stepCount = 0;
    this.episodeCount = 0;
    this.totalReward = 0;
    this.losses = [];
  }

  selectAction(state) {
    // Epsilon-greedy
    if (Math.random() < this.epsilon) {
      return Math.floor(Math.random() * this.config.network.actionSpace);
    }

    // Greedy action
    const qValues = this.qNetwork.forward(state);
    return qValues.indexOf(Math.max(...qValues));
  }

  train() {
    if (this.replayBuffer.size() < this.config.learning.batchSize) {
      return 0;
    }

    const batch = this.replayBuffer.sample(this.config.learning.batchSize);
    let totalLoss = 0;

    for (const experience of batch) {
      const { state, action, reward, nextState, done } = experience;

      // Current Q-value
      const currentQ = this.qNetwork.forward(state);

      // Target Q-value
      let targetQ;
      if (done) {
        targetQ = reward;
      } else {
        const nextQ = this.targetNetwork.forward(nextState);
        targetQ = reward + this.config.learning.gamma * Math.max(...nextQ);
      }

      // TD error
      const tdError = targetQ - currentQ[action];
      totalLoss += tdError ** 2;

      // Simplified update (in production, use proper backprop)
      this.updateQNetwork(state, action, tdError);
    }

    this.losses.push(totalLoss / batch.length);
    return totalLoss / batch.length;
  }

  updateQNetwork(state, action, tdError) {
    const lr = this.config.learning.learningRate;

    // Get the actual hidden layer output (activation before output layer)
    const hiddenOutput = this.qNetwork.getPreOutputActivation();

    if (!hiddenOutput) {
      // Fallback: run forward pass to get activations
      this.qNetwork.forward(state);
      return this.updateQNetwork(state, action, tdError);
    }

    // Update output layer using actual hidden activations
    const outputLayer = this.qNetwork.layers[this.qNetwork.layers.length - 1];

    // Gradient for output layer: dL/dW = tdError * hiddenOutput
    for (let i = 0; i < outputLayer.inputDim; i++) {
      outputLayer.weights[i][action] += lr * tdError * hiddenOutput[i];
    }
    outputLayer.bias[action] += lr * tdError;

    // Simplified backprop through hidden layers (gradient clipping for stability)
    const maxGrad = 1.0;
    let delta = tdError * outputLayer.weights.map(row => row[action]);

    for (let l = this.qNetwork.layers.length - 2; l >= 0; l--) {
      const layer = this.qNetwork.layers[l];
      const prevActivation = this.qNetwork.activations[l];
      const currentActivation = this.qNetwork.activations[l + 1];

      // ReLU derivative: 1 if activation > 0, else 0
      const reluGrad = currentActivation.map(a => a > 0 ? 1 : 0);

      // Apply ReLU gradient
      delta = delta.map((d, i) => d * (reluGrad[i] || 0));

      // Clip gradients for stability
      delta = delta.map(d => Math.max(-maxGrad, Math.min(maxGrad, d)));

      // Update weights for this layer
      for (let i = 0; i < layer.inputDim; i++) {
        for (let j = 0; j < layer.outputDim; j++) {
          layer.weights[i][j] += lr * 0.1 * delta[j] * (prevActivation[i] || 0);
        }
      }

      // Propagate delta to previous layer
      if (l > 0) {
        const newDelta = new Array(layer.inputDim).fill(0);
        for (let i = 0; i < layer.inputDim; i++) {
          for (let j = 0; j < layer.outputDim; j++) {
            newDelta[i] += delta[j] * layer.weights[i][j];
          }
        }
        delta = newDelta;
      }
    }
  }

  updateTargetNetwork() {
    this.targetNetwork.copyFrom(this.qNetwork);
  }

  decayEpsilon() {
    this.epsilon = Math.max(
      this.config.exploration.epsilonEnd,
      this.epsilon * this.config.exploration.epsilonDecay
    );
  }

  addExperience(state, action, reward, nextState, done) {
    this.replayBuffer.add({ state, action, reward, nextState, done });
    this.stepCount++;

    if (this.stepCount % this.config.learning.targetUpdateFreq === 0) {
      this.updateTargetNetwork();
    }
  }
}

// Generate synthetic price data
function generatePriceData(n, seed = 42) {
  const data = [];
  let price = 100;

  let rng = seed;
  const random = () => {
    rng = (rng * 9301 + 49297) % 233280;
    return rng / 233280;
  };

  for (let i = 0; i < n; i++) {
    // Regime-switching dynamics
    const regime = Math.floor(i / 100) % 3;
    let drift = 0, volatility = 0.015;

    if (regime === 0) {
      drift = 0.001;
      volatility = 0.012;
    } else if (regime === 1) {
      drift = -0.0005;
      volatility = 0.02;
    } else {
      drift = 0;
      volatility = 0.01;
    }

    const return_ = drift + volatility * (random() + random() - 1);
    price = price * (1 + return_);

    data.push({
      timestamp: i,
      open: price * (1 - random() * 0.002),
      high: price * (1 + random() * 0.005),
      low: price * (1 - random() * 0.005),
      close: price,
      volume: 1000000 * (0.5 + random())
    });
  }

  return data;
}

async function main() {
  console.log('═'.repeat(70));
  console.log('REINFORCEMENT LEARNING TRADING AGENT');
  console.log('═'.repeat(70));
  console.log();

  // 1. Generate data
  console.log('1. Environment Setup:');
  console.log('─'.repeat(70));

  const priceData = generatePriceData(1000);
  const env = new TradingEnvironment(rlConfig, priceData);
  const stateEncoder = new StateEncoder(rlConfig);

  console.log(`   Price data:       ${priceData.length} candles`);
  console.log(`   Initial capital:  $${rlConfig.trading.initialCapital.toLocaleString()}`);
  console.log(`   Action space:     ${rlConfig.network.actionSpace} actions`);
  console.log(`   State dimension:  ${rlConfig.network.stateDim}`);
  console.log();

  // 2. Initialize agent
  console.log('2. Agent Configuration:');
  console.log('─'.repeat(70));

  const agent = new DQNAgent(rlConfig);

  console.log(`   Network:          ${rlConfig.network.hiddenLayers.join(' → ')} → ${rlConfig.network.actionSpace}`);
  console.log(`   Learning rate:    ${rlConfig.learning.learningRate}`);
  console.log(`   Discount factor:  ${rlConfig.learning.gamma}`);
  console.log(`   Replay buffer:    ${rlConfig.learning.replayBufferSize}`);
  console.log(`   Batch size:       ${rlConfig.learning.batchSize}`);
  console.log();

  // 3. Training
  console.log('3. Training Loop:');
  console.log('─'.repeat(70));

  const numEpisodes = 20;
  const episodeRewards = [];
  const episodeValues = [];

  for (let episode = 0; episode < numEpisodes; episode++) {
    let state = env.reset();
    let totalReward = 0;
    let done = false;

    // Update price history for state encoding
    for (let i = 0; i < 50; i++) {
      stateEncoder.update(priceData[i].close);
    }

    while (!done) {
      const encodedState = stateEncoder.encode(state);
      const action = agent.selectAction(encodedState);

      const { state: nextState, reward, done: episodeDone, info } = env.step(action);

      stateEncoder.update(priceData[env.currentStep].close);
      const nextEncodedState = stateEncoder.encode(nextState);

      agent.addExperience(encodedState, action, reward, nextEncodedState, episodeDone);

      // Train
      if (agent.stepCount % 4 === 0) {
        agent.train();
      }

      totalReward += reward;
      state = nextState;
      done = episodeDone;
    }

    agent.decayEpsilon();
    agent.episodeCount++;

    const finalValue = env.getPortfolioValue();
    episodeRewards.push(totalReward);
    episodeValues.push(finalValue);

    if ((episode + 1) % 5 === 0) {
      const avgReward = episodeRewards.slice(-5).reduce((a, b) => a + b, 0) / 5;
      console.log(`   Episode ${(episode + 1).toString().padStart(3)}: Reward=${avgReward.toFixed(1).padStart(7)}, Value=$${finalValue.toFixed(0).padStart(7)}, ε=${agent.epsilon.toFixed(3)}`);
    }
  }
  console.log();

  // 4. Final evaluation
  console.log('4. Final Evaluation:');
  console.log('─'.repeat(70));

  // Run one episode with no exploration
  agent.epsilon = 0;
  let evalState = env.reset();
  let evalDone = false;
  const evalActions = [];

  for (let i = 0; i < 50; i++) {
    stateEncoder.update(priceData[i].close);
  }

  while (!evalDone) {
    const encodedState = stateEncoder.encode(evalState);
    const action = agent.selectAction(encodedState);
    evalActions.push(ActionNames[action]);

    const { state: nextState, done } = env.step(action);
    stateEncoder.update(priceData[env.currentStep].close);
    evalState = nextState;
    evalDone = done;
  }

  const finalValue = env.getPortfolioValue();
  const totalReturn = (finalValue - rlConfig.trading.initialCapital) / rlConfig.trading.initialCapital;

  console.log(`   Final Portfolio:  $${finalValue.toFixed(2)}`);
  console.log(`   Total Return:     ${(totalReturn * 100).toFixed(2)}%`);
  console.log(`   Realized P&L:     $${env.realizedPnL.toFixed(2)}`);
  console.log(`   Total Trades:     ${env.trades.length}`);
  console.log(`   Win Rate:         ${(env.getWinRate() * 100).toFixed(1)}%`);
  console.log(`   Sharpe Ratio:     ${env.getSharpe().toFixed(3)}`);
  console.log(`   Max Drawdown:     ${(env.getDrawdown() * 100).toFixed(1)}%`);
  console.log();

  // 5. Action distribution
  console.log('5. Action Distribution:');
  console.log('─'.repeat(70));

  const actionCounts = {};
  for (const action of evalActions) {
    actionCounts[action] = (actionCounts[action] || 0) + 1;
  }

  for (const [action, count] of Object.entries(actionCounts).sort((a, b) => b[1] - a[1])) {
    const pct = (count / evalActions.length * 100).toFixed(1);
    const bar = '█'.repeat(Math.floor(count / evalActions.length * 40));
    console.log(`   ${action.padEnd(12)} ${bar.padEnd(40)} ${pct}%`);
  }
  console.log();

  // 6. Learning curve
  console.log('6. Learning Curve:');
  console.log('─'.repeat(70));

  console.log('   Episode Returns:');
  let curve = '   ';
  const minReward = Math.min(...episodeRewards);
  const maxReward = Math.max(...episodeRewards);
  const range = maxReward - minReward || 1;

  for (const reward of episodeRewards) {
    const normalized = (reward - minReward) / range;
    if (normalized < 0.25) curve += '▁';
    else if (normalized < 0.5) curve += '▃';
    else if (normalized < 0.75) curve += '▅';
    else curve += '█';
  }
  console.log(curve);
  console.log(`   Min: ${minReward.toFixed(1)}  Max: ${maxReward.toFixed(1)}`);
  console.log();

  // 7. Q-value analysis
  console.log('7. Q-Value Analysis (Sample State):');
  console.log('─'.repeat(70));

  const sampleState = stateEncoder.encode(evalState);
  const qValues = agent.qNetwork.forward(sampleState);

  console.log('   Action Q-Values:');
  for (let i = 0; i < ActionNames.length; i++) {
    const bar = qValues[i] > 0 ? '+'.repeat(Math.min(20, Math.floor(qValues[i] * 2))) : '';
    const negBar = qValues[i] < 0 ? '-'.repeat(Math.min(20, Math.floor(Math.abs(qValues[i]) * 2))) : '';
    console.log(`   ${ActionNames[i].padEnd(12)} ${qValues[i] >= 0 ? '+' : ''}${qValues[i].toFixed(3)} ${bar}${negBar}`);
  }
  console.log();

  // 8. Experience replay stats
  console.log('8. Experience Replay Statistics:');
  console.log('─'.repeat(70));

  console.log(`   Buffer size:      ${agent.replayBuffer.size()}`);
  console.log(`   Total steps:      ${agent.stepCount}`);
  console.log(`   Training updates: ${agent.losses.length}`);
  if (agent.losses.length > 0) {
    const avgLoss = agent.losses.reduce((a, b) => a + b, 0) / agent.losses.length;
    console.log(`   Average loss:     ${avgLoss.toFixed(4)}`);
  }
  console.log();

  // 9. Trading strategy emerged
  console.log('9. Emergent Strategy Analysis:');
  console.log('─'.repeat(70));

  // Analyze when agent buys vs sells
  const buyActions = evalActions.filter(a => a.includes('BUY')).length;
  const sellActions = evalActions.filter(a => a.includes('SELL')).length;
  const holdActions = evalActions.filter(a => a === 'HOLD').length;

  console.log('   The agent learned to:');
  if (holdActions > evalActions.length * 0.5) {
    console.log('   - Be patient (primarily holding positions)');
  }
  if (buyActions > sellActions) {
    console.log('   - Favor long positions (more buys than sells)');
  } else if (sellActions > buyActions) {
    console.log('   - Manage risk actively (frequent profit taking)');
  }
  console.log();

  // 10. RuVector integration
  console.log('10. RuVector Vector Storage:');
  console.log('─'.repeat(70));
  console.log('   State vectors can be stored for similarity search:');
  console.log();
  console.log(`   State vector sample (first 5 dims):`);
  console.log(`   [${sampleState.slice(0, 5).map(v => v.toFixed(4)).join(', ')}]`);
  console.log();
  console.log('   Use cases:');
  console.log('   - Find similar market states from history');
  console.log('   - Experience replay with prioritized sampling');
  console.log('   - State clustering for interpretability');
  console.log();

  console.log('═'.repeat(70));
  console.log('Reinforcement learning agent training completed');
  console.log('═'.repeat(70));
}

main().catch(console.error);