903 lines
26 KiB
JavaScript
903 lines
26 KiB
JavaScript
/**
|
|
* Reinforcement Learning Trading Agent
|
|
*
|
|
* EXOTIC: Deep Q-Learning for autonomous trading
|
|
*
|
|
* Uses @neural-trader/neural with RuVector for:
|
|
* - Deep Q-Network (DQN) for action selection
|
|
* - Experience replay with vector similarity
|
|
* - Epsilon-greedy exploration
|
|
* - Target network for stable learning
|
|
*
|
|
* The agent learns optimal trading actions directly from
|
|
* market experience, without explicit strategy rules.
|
|
*/
|
|
|
|
// RL Configuration
|
|
const rlConfig = {
|
|
// Network architecture
|
|
network: {
|
|
stateDim: 20, // State vector dimension
|
|
hiddenLayers: [128, 64, 32],
|
|
actionSpace: 5 // hold, buy_small, buy_large, sell_small, sell_large
|
|
},
|
|
|
|
// Learning parameters
|
|
learning: {
|
|
gamma: 0.99, // Discount factor
|
|
learningRate: 0.001,
|
|
batchSize: 32,
|
|
targetUpdateFreq: 100, // Steps between target network updates
|
|
replayBufferSize: 10000
|
|
},
|
|
|
|
// Exploration
|
|
exploration: {
|
|
epsilonStart: 1.0,
|
|
epsilonEnd: 0.01,
|
|
epsilonDecay: 0.995
|
|
},
|
|
|
|
// Trading
|
|
trading: {
|
|
initialCapital: 100000,
|
|
maxPosition: 0.5, // Max 50% of capital
|
|
transactionCost: 0.001, // 10 bps
|
|
slippage: 0.0005 // 5 bps
|
|
}
|
|
};
|
|
|
|
// Action definitions
|
|
const Actions = {
|
|
HOLD: 0,
|
|
BUY_SMALL: 1, // 10% of available
|
|
BUY_LARGE: 2, // 30% of available
|
|
SELL_SMALL: 3, // 10% of position
|
|
SELL_LARGE: 4 // 30% of position
|
|
};
|
|
|
|
const ActionNames = ['HOLD', 'BUY_SMALL', 'BUY_LARGE', 'SELL_SMALL', 'SELL_LARGE'];
|
|
|
|
// Neural Network Layer
|
|
class DenseLayer {
|
|
constructor(inputDim, outputDim, activation = 'relu') {
|
|
this.inputDim = inputDim;
|
|
this.outputDim = outputDim;
|
|
this.activation = activation;
|
|
|
|
// Xavier initialization
|
|
const scale = Math.sqrt(2.0 / (inputDim + outputDim));
|
|
this.weights = [];
|
|
for (let i = 0; i < inputDim; i++) {
|
|
const row = [];
|
|
for (let j = 0; j < outputDim; j++) {
|
|
row.push((Math.random() - 0.5) * 2 * scale);
|
|
}
|
|
this.weights.push(row);
|
|
}
|
|
this.bias = new Array(outputDim).fill(0).map(() => (Math.random() - 0.5) * 0.1);
|
|
}
|
|
|
|
forward(input) {
|
|
const output = new Array(this.outputDim).fill(0);
|
|
|
|
for (let j = 0; j < this.outputDim; j++) {
|
|
for (let i = 0; i < this.inputDim; i++) {
|
|
output[j] += input[i] * this.weights[i][j];
|
|
}
|
|
output[j] += this.bias[j];
|
|
|
|
// Activation
|
|
if (this.activation === 'relu') {
|
|
output[j] = Math.max(0, output[j]);
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
// Simplified gradient update
|
|
updateWeights(gradients, lr) {
|
|
for (let i = 0; i < this.inputDim; i++) {
|
|
for (let j = 0; j < this.outputDim; j++) {
|
|
this.weights[i][j] -= lr * gradients[i][j];
|
|
}
|
|
}
|
|
for (let j = 0; j < this.outputDim; j++) {
|
|
this.bias[j] -= lr * gradients.bias[j];
|
|
}
|
|
}
|
|
|
|
copyFrom(other) {
|
|
for (let i = 0; i < this.inputDim; i++) {
|
|
for (let j = 0; j < this.outputDim; j++) {
|
|
this.weights[i][j] = other.weights[i][j];
|
|
}
|
|
}
|
|
for (let j = 0; j < this.outputDim; j++) {
|
|
this.bias[j] = other.bias[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep Q-Network
|
|
class DQN {
|
|
constructor(config) {
|
|
this.config = config;
|
|
|
|
// Build layers
|
|
this.layers = [];
|
|
let prevDim = config.stateDim;
|
|
|
|
for (const hiddenDim of config.hiddenLayers) {
|
|
this.layers.push(new DenseLayer(prevDim, hiddenDim, 'relu'));
|
|
prevDim = hiddenDim;
|
|
}
|
|
|
|
// Output layer (no activation for Q-values)
|
|
this.layers.push(new DenseLayer(prevDim, config.actionSpace, 'linear'));
|
|
}
|
|
|
|
forward(state) {
|
|
let x = state;
|
|
// Store activations for backpropagation
|
|
this.activations = [state];
|
|
for (const layer of this.layers) {
|
|
x = layer.forward(x);
|
|
this.activations.push(x);
|
|
}
|
|
return x;
|
|
}
|
|
|
|
// Get the activation before the output layer (for gradient computation)
|
|
getPreOutputActivation() {
|
|
if (!this.activations || this.activations.length < 2) {
|
|
return null;
|
|
}
|
|
// Return activation just before output layer
|
|
return this.activations[this.activations.length - 2];
|
|
}
|
|
|
|
copyFrom(other) {
|
|
for (let i = 0; i < this.layers.length; i++) {
|
|
this.layers[i].copyFrom(other.layers[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Experience Replay Buffer
|
|
class ReplayBuffer {
|
|
constructor(maxSize) {
|
|
this.maxSize = maxSize;
|
|
this.buffer = [];
|
|
this.position = 0;
|
|
}
|
|
|
|
add(experience) {
|
|
if (this.buffer.length < this.maxSize) {
|
|
this.buffer.push(experience);
|
|
} else {
|
|
this.buffer[this.position] = experience;
|
|
}
|
|
this.position = (this.position + 1) % this.maxSize;
|
|
}
|
|
|
|
sample(batchSize) {
|
|
const samples = [];
|
|
const indices = new Set();
|
|
|
|
while (indices.size < Math.min(batchSize, this.buffer.length)) {
|
|
indices.add(Math.floor(Math.random() * this.buffer.length));
|
|
}
|
|
|
|
for (const idx of indices) {
|
|
samples.push(this.buffer[idx]);
|
|
}
|
|
|
|
return samples;
|
|
}
|
|
|
|
size() {
|
|
return this.buffer.length;
|
|
}
|
|
}
|
|
|
|
// State Encoder
|
|
class StateEncoder {
|
|
constructor(config) {
|
|
this.config = config;
|
|
this.priceHistory = [];
|
|
this.returnHistory = [];
|
|
}
|
|
|
|
update(price) {
|
|
this.priceHistory.push(price);
|
|
if (this.priceHistory.length > 1) {
|
|
const ret = (price - this.priceHistory[this.priceHistory.length - 2]) /
|
|
this.priceHistory[this.priceHistory.length - 2];
|
|
this.returnHistory.push(ret);
|
|
}
|
|
|
|
// Keep bounded
|
|
if (this.priceHistory.length > 100) {
|
|
this.priceHistory.shift();
|
|
this.returnHistory.shift();
|
|
}
|
|
}
|
|
|
|
encode(portfolio) {
|
|
const state = [];
|
|
|
|
// Price-based features
|
|
if (this.returnHistory.length >= 20) {
|
|
// Recent returns
|
|
for (let i = 1; i <= 5; i++) {
|
|
state.push(this.returnHistory[this.returnHistory.length - i] * 10); // Scaled
|
|
}
|
|
|
|
// Return statistics
|
|
const recent20 = this.returnHistory.slice(-20);
|
|
const mean = recent20.reduce((a, b) => a + b, 0) / 20;
|
|
const variance = recent20.reduce((s, r) => s + (r - mean) ** 2, 0) / 20;
|
|
const volatility = Math.sqrt(variance);
|
|
|
|
state.push(mean * 100);
|
|
state.push(volatility * 100);
|
|
|
|
// Momentum
|
|
const momentum5 = this.returnHistory.slice(-5).reduce((a, b) => a + b, 0);
|
|
const momentum10 = this.returnHistory.slice(-10).reduce((a, b) => a + b, 0);
|
|
const momentum20 = this.returnHistory.slice(-20).reduce((a, b) => a + b, 0);
|
|
|
|
state.push(momentum5 * 10);
|
|
state.push(momentum10 * 10);
|
|
state.push(momentum20 * 10);
|
|
|
|
// Price relative to moving averages
|
|
const currentPrice = this.priceHistory[this.priceHistory.length - 1];
|
|
const sma5 = this.priceHistory.slice(-5).reduce((a, b) => a + b, 0) / 5;
|
|
const sma20 = this.priceHistory.slice(-20).reduce((a, b) => a + b, 0) / 20;
|
|
|
|
state.push((currentPrice / sma5 - 1) * 10);
|
|
state.push((currentPrice / sma20 - 1) * 10);
|
|
|
|
// Trend direction
|
|
const trend = this.returnHistory.slice(-10).filter(r => r > 0).length / 10;
|
|
state.push(trend - 0.5);
|
|
} else {
|
|
// Pad with zeros
|
|
for (let i = 0; i < 13; i++) {
|
|
state.push(0);
|
|
}
|
|
}
|
|
|
|
// Portfolio features
|
|
state.push(portfolio.positionPct - 0.5); // Position as fraction of capital
|
|
state.push(portfolio.unrealizedPnL / portfolio.capital);
|
|
state.push(portfolio.realizedPnL / portfolio.capital);
|
|
state.push(portfolio.drawdown);
|
|
state.push(portfolio.winRate - 0.5);
|
|
state.push(portfolio.sharpe / 2);
|
|
state.push(portfolio.tradeCount / 100);
|
|
|
|
// Ensure state dimension
|
|
while (state.length < this.config.network.stateDim) {
|
|
state.push(0);
|
|
}
|
|
|
|
return state.slice(0, this.config.network.stateDim);
|
|
}
|
|
}
|
|
|
|
// Trading Environment
|
|
class TradingEnvironment {
|
|
constructor(config, priceData) {
|
|
this.config = config;
|
|
this.priceData = priceData;
|
|
this.reset();
|
|
}
|
|
|
|
reset() {
|
|
this.currentStep = 50; // Start after warmup
|
|
this.capital = this.config.trading.initialCapital;
|
|
this.position = 0;
|
|
this.avgCost = 0;
|
|
this.realizedPnL = 0;
|
|
this.trades = [];
|
|
this.peakCapital = this.capital;
|
|
this.returns = [];
|
|
|
|
return this.getState();
|
|
}
|
|
|
|
getState() {
|
|
return {
|
|
price: this.priceData[this.currentStep].close,
|
|
capital: this.capital,
|
|
position: this.position,
|
|
positionPct: this.position * this.priceData[this.currentStep].close / this.getPortfolioValue(),
|
|
unrealizedPnL: this.getUnrealizedPnL(),
|
|
realizedPnL: this.realizedPnL,
|
|
drawdown: this.getDrawdown(),
|
|
winRate: this.getWinRate(),
|
|
sharpe: this.getSharpe(),
|
|
tradeCount: this.trades.length
|
|
};
|
|
}
|
|
|
|
getPortfolioValue() {
|
|
const price = this.priceData[this.currentStep].close;
|
|
return this.capital + this.position * price;
|
|
}
|
|
|
|
getUnrealizedPnL() {
|
|
if (this.position === 0) return 0;
|
|
const price = this.priceData[this.currentStep].close;
|
|
return this.position * (price - this.avgCost);
|
|
}
|
|
|
|
getDrawdown() {
|
|
const value = this.getPortfolioValue();
|
|
this.peakCapital = Math.max(this.peakCapital, value);
|
|
return (this.peakCapital - value) / this.peakCapital;
|
|
}
|
|
|
|
getWinRate() {
|
|
const closedTrades = this.trades.filter(t => t.closed);
|
|
if (closedTrades.length === 0) return 0.5;
|
|
const wins = closedTrades.filter(t => t.pnl > 0).length;
|
|
return wins / closedTrades.length;
|
|
}
|
|
|
|
getSharpe() {
|
|
if (this.returns.length < 10) return 0;
|
|
const mean = this.returns.reduce((a, b) => a + b, 0) / this.returns.length;
|
|
const variance = this.returns.reduce((s, r) => s + (r - mean) ** 2, 0) / this.returns.length;
|
|
if (variance === 0) return 0;
|
|
return mean / Math.sqrt(variance) * Math.sqrt(252);
|
|
}
|
|
|
|
step(action) {
|
|
const prevValue = this.getPortfolioValue();
|
|
const price = this.priceData[this.currentStep].close;
|
|
|
|
// Execute action
|
|
this.executeAction(action, price);
|
|
|
|
// Move to next step
|
|
this.currentStep++;
|
|
const done = this.currentStep >= this.priceData.length - 1;
|
|
|
|
// Calculate reward
|
|
const newValue = this.getPortfolioValue();
|
|
const stepReturn = (newValue - prevValue) / prevValue;
|
|
this.returns.push(stepReturn);
|
|
// Bound returns array to prevent memory leak
|
|
if (this.returns.length > 1000) {
|
|
this.returns = this.returns.slice(-500);
|
|
}
|
|
|
|
// Shape reward
|
|
let reward = stepReturn * 100; // Scale returns
|
|
|
|
// Penalty for excessive trading
|
|
if (action !== Actions.HOLD) {
|
|
reward -= 0.1;
|
|
}
|
|
|
|
// Penalty for drawdown
|
|
const drawdown = this.getDrawdown();
|
|
if (drawdown > 0.1) {
|
|
reward -= drawdown * 10;
|
|
}
|
|
|
|
// Bonus for profitable trades
|
|
const winRate = this.getWinRate();
|
|
if (winRate > 0.5) {
|
|
reward += (winRate - 0.5) * 2;
|
|
}
|
|
|
|
return {
|
|
state: this.getState(),
|
|
reward,
|
|
done,
|
|
info: {
|
|
portfolioValue: newValue,
|
|
stepReturn,
|
|
action: ActionNames[action]
|
|
}
|
|
};
|
|
}
|
|
|
|
executeAction(action, price) {
|
|
const slippage = this.config.trading.slippage;
|
|
const cost = this.config.trading.transactionCost;
|
|
|
|
switch (action) {
|
|
case Actions.BUY_SMALL:
|
|
this.buy(0.1, price * (1 + slippage + cost));
|
|
break;
|
|
case Actions.BUY_LARGE:
|
|
this.buy(0.3, price * (1 + slippage + cost));
|
|
break;
|
|
case Actions.SELL_SMALL:
|
|
this.sell(0.1, price * (1 - slippage - cost));
|
|
break;
|
|
case Actions.SELL_LARGE:
|
|
this.sell(0.3, price * (1 - slippage - cost));
|
|
break;
|
|
case Actions.HOLD:
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
buy(fraction, price) {
|
|
const maxBuy = this.capital * this.config.trading.maxPosition;
|
|
const amount = Math.min(this.capital * fraction, maxBuy);
|
|
|
|
if (amount < 100) return; // Min trade size
|
|
|
|
const shares = amount / price;
|
|
const totalCost = this.position * this.avgCost + amount;
|
|
const totalShares = this.position + shares;
|
|
|
|
this.avgCost = totalCost / totalShares;
|
|
this.position = totalShares;
|
|
this.capital -= amount;
|
|
|
|
this.trades.push({
|
|
type: 'buy',
|
|
shares,
|
|
price,
|
|
timestamp: this.currentStep,
|
|
closed: false
|
|
});
|
|
}
|
|
|
|
sell(fraction, price) {
|
|
if (this.position <= 0) return;
|
|
|
|
const sharesToSell = this.position * fraction;
|
|
if (sharesToSell < 0.01) return;
|
|
|
|
const proceeds = sharesToSell * price;
|
|
const costBasis = sharesToSell * this.avgCost;
|
|
const tradePnL = proceeds - costBasis;
|
|
|
|
this.position -= sharesToSell;
|
|
this.capital += proceeds;
|
|
this.realizedPnL += tradePnL;
|
|
|
|
this.trades.push({
|
|
type: 'sell',
|
|
shares: sharesToSell,
|
|
price,
|
|
pnl: tradePnL,
|
|
timestamp: this.currentStep,
|
|
closed: true
|
|
});
|
|
}
|
|
}
|
|
|
|
// DQN Agent
|
|
class DQNAgent {
|
|
constructor(config) {
|
|
this.config = config;
|
|
|
|
// Networks
|
|
this.qNetwork = new DQN(config.network);
|
|
this.targetNetwork = new DQN(config.network);
|
|
this.targetNetwork.copyFrom(this.qNetwork);
|
|
|
|
// Experience replay
|
|
this.replayBuffer = new ReplayBuffer(config.learning.replayBufferSize);
|
|
|
|
// Exploration
|
|
this.epsilon = config.exploration.epsilonStart;
|
|
|
|
// Training stats
|
|
this.stepCount = 0;
|
|
this.episodeCount = 0;
|
|
this.totalReward = 0;
|
|
this.losses = [];
|
|
}
|
|
|
|
selectAction(state) {
|
|
// Epsilon-greedy
|
|
if (Math.random() < this.epsilon) {
|
|
return Math.floor(Math.random() * this.config.network.actionSpace);
|
|
}
|
|
|
|
// Greedy action
|
|
const qValues = this.qNetwork.forward(state);
|
|
return qValues.indexOf(Math.max(...qValues));
|
|
}
|
|
|
|
train() {
|
|
if (this.replayBuffer.size() < this.config.learning.batchSize) {
|
|
return 0;
|
|
}
|
|
|
|
const batch = this.replayBuffer.sample(this.config.learning.batchSize);
|
|
let totalLoss = 0;
|
|
|
|
for (const experience of batch) {
|
|
const { state, action, reward, nextState, done } = experience;
|
|
|
|
// Current Q-value
|
|
const currentQ = this.qNetwork.forward(state);
|
|
|
|
// Target Q-value
|
|
let targetQ;
|
|
if (done) {
|
|
targetQ = reward;
|
|
} else {
|
|
const nextQ = this.targetNetwork.forward(nextState);
|
|
targetQ = reward + this.config.learning.gamma * Math.max(...nextQ);
|
|
}
|
|
|
|
// TD error
|
|
const tdError = targetQ - currentQ[action];
|
|
totalLoss += tdError ** 2;
|
|
|
|
// Simplified update (in production, use proper backprop)
|
|
this.updateQNetwork(state, action, tdError);
|
|
}
|
|
|
|
this.losses.push(totalLoss / batch.length);
|
|
return totalLoss / batch.length;
|
|
}
|
|
|
|
updateQNetwork(state, action, tdError) {
|
|
const lr = this.config.learning.learningRate;
|
|
|
|
// Get the actual hidden layer output (activation before output layer)
|
|
const hiddenOutput = this.qNetwork.getPreOutputActivation();
|
|
|
|
if (!hiddenOutput) {
|
|
// Fallback: run forward pass to get activations
|
|
this.qNetwork.forward(state);
|
|
return this.updateQNetwork(state, action, tdError);
|
|
}
|
|
|
|
// Update output layer using actual hidden activations
|
|
const outputLayer = this.qNetwork.layers[this.qNetwork.layers.length - 1];
|
|
|
|
// Gradient for output layer: dL/dW = tdError * hiddenOutput
|
|
for (let i = 0; i < outputLayer.inputDim; i++) {
|
|
outputLayer.weights[i][action] += lr * tdError * hiddenOutput[i];
|
|
}
|
|
outputLayer.bias[action] += lr * tdError;
|
|
|
|
// Simplified backprop through hidden layers (gradient clipping for stability)
|
|
const maxGrad = 1.0;
|
|
let delta = tdError * outputLayer.weights.map(row => row[action]);
|
|
|
|
for (let l = this.qNetwork.layers.length - 2; l >= 0; l--) {
|
|
const layer = this.qNetwork.layers[l];
|
|
const prevActivation = this.qNetwork.activations[l];
|
|
const currentActivation = this.qNetwork.activations[l + 1];
|
|
|
|
// ReLU derivative: 1 if activation > 0, else 0
|
|
const reluGrad = currentActivation.map(a => a > 0 ? 1 : 0);
|
|
|
|
// Apply ReLU gradient
|
|
delta = delta.map((d, i) => d * (reluGrad[i] || 0));
|
|
|
|
// Clip gradients for stability
|
|
delta = delta.map(d => Math.max(-maxGrad, Math.min(maxGrad, d)));
|
|
|
|
// Update weights for this layer
|
|
for (let i = 0; i < layer.inputDim; i++) {
|
|
for (let j = 0; j < layer.outputDim; j++) {
|
|
layer.weights[i][j] += lr * 0.1 * delta[j] * (prevActivation[i] || 0);
|
|
}
|
|
}
|
|
|
|
// Propagate delta to previous layer
|
|
if (l > 0) {
|
|
const newDelta = new Array(layer.inputDim).fill(0);
|
|
for (let i = 0; i < layer.inputDim; i++) {
|
|
for (let j = 0; j < layer.outputDim; j++) {
|
|
newDelta[i] += delta[j] * layer.weights[i][j];
|
|
}
|
|
}
|
|
delta = newDelta;
|
|
}
|
|
}
|
|
}
|
|
|
|
updateTargetNetwork() {
|
|
this.targetNetwork.copyFrom(this.qNetwork);
|
|
}
|
|
|
|
decayEpsilon() {
|
|
this.epsilon = Math.max(
|
|
this.config.exploration.epsilonEnd,
|
|
this.epsilon * this.config.exploration.epsilonDecay
|
|
);
|
|
}
|
|
|
|
addExperience(state, action, reward, nextState, done) {
|
|
this.replayBuffer.add({ state, action, reward, nextState, done });
|
|
this.stepCount++;
|
|
|
|
if (this.stepCount % this.config.learning.targetUpdateFreq === 0) {
|
|
this.updateTargetNetwork();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generate synthetic price data
|
|
function generatePriceData(n, seed = 42) {
|
|
const data = [];
|
|
let price = 100;
|
|
|
|
let rng = seed;
|
|
const random = () => {
|
|
rng = (rng * 9301 + 49297) % 233280;
|
|
return rng / 233280;
|
|
};
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
// Regime-switching dynamics
|
|
const regime = Math.floor(i / 100) % 3;
|
|
let drift = 0, volatility = 0.015;
|
|
|
|
if (regime === 0) {
|
|
drift = 0.001;
|
|
volatility = 0.012;
|
|
} else if (regime === 1) {
|
|
drift = -0.0005;
|
|
volatility = 0.02;
|
|
} else {
|
|
drift = 0;
|
|
volatility = 0.01;
|
|
}
|
|
|
|
const return_ = drift + volatility * (random() + random() - 1);
|
|
price = price * (1 + return_);
|
|
|
|
data.push({
|
|
timestamp: i,
|
|
open: price * (1 - random() * 0.002),
|
|
high: price * (1 + random() * 0.005),
|
|
low: price * (1 - random() * 0.005),
|
|
close: price,
|
|
volume: 1000000 * (0.5 + random())
|
|
});
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
async function main() {
|
|
console.log('═'.repeat(70));
|
|
console.log('REINFORCEMENT LEARNING TRADING AGENT');
|
|
console.log('═'.repeat(70));
|
|
console.log();
|
|
|
|
// 1. Generate data
|
|
console.log('1. Environment Setup:');
|
|
console.log('─'.repeat(70));
|
|
|
|
const priceData = generatePriceData(1000);
|
|
const env = new TradingEnvironment(rlConfig, priceData);
|
|
const stateEncoder = new StateEncoder(rlConfig);
|
|
|
|
console.log(` Price data: ${priceData.length} candles`);
|
|
console.log(` Initial capital: $${rlConfig.trading.initialCapital.toLocaleString()}`);
|
|
console.log(` Action space: ${rlConfig.network.actionSpace} actions`);
|
|
console.log(` State dimension: ${rlConfig.network.stateDim}`);
|
|
console.log();
|
|
|
|
// 2. Initialize agent
|
|
console.log('2. Agent Configuration:');
|
|
console.log('─'.repeat(70));
|
|
|
|
const agent = new DQNAgent(rlConfig);
|
|
|
|
console.log(` Network: ${rlConfig.network.hiddenLayers.join(' → ')} → ${rlConfig.network.actionSpace}`);
|
|
console.log(` Learning rate: ${rlConfig.learning.learningRate}`);
|
|
console.log(` Discount factor: ${rlConfig.learning.gamma}`);
|
|
console.log(` Replay buffer: ${rlConfig.learning.replayBufferSize}`);
|
|
console.log(` Batch size: ${rlConfig.learning.batchSize}`);
|
|
console.log();
|
|
|
|
// 3. Training
|
|
console.log('3. Training Loop:');
|
|
console.log('─'.repeat(70));
|
|
|
|
const numEpisodes = 20;
|
|
const episodeRewards = [];
|
|
const episodeValues = [];
|
|
|
|
for (let episode = 0; episode < numEpisodes; episode++) {
|
|
let state = env.reset();
|
|
let totalReward = 0;
|
|
let done = false;
|
|
|
|
// Update price history for state encoding
|
|
for (let i = 0; i < 50; i++) {
|
|
stateEncoder.update(priceData[i].close);
|
|
}
|
|
|
|
while (!done) {
|
|
const encodedState = stateEncoder.encode(state);
|
|
const action = agent.selectAction(encodedState);
|
|
|
|
const { state: nextState, reward, done: episodeDone, info } = env.step(action);
|
|
|
|
stateEncoder.update(priceData[env.currentStep].close);
|
|
const nextEncodedState = stateEncoder.encode(nextState);
|
|
|
|
agent.addExperience(encodedState, action, reward, nextEncodedState, episodeDone);
|
|
|
|
// Train
|
|
if (agent.stepCount % 4 === 0) {
|
|
agent.train();
|
|
}
|
|
|
|
totalReward += reward;
|
|
state = nextState;
|
|
done = episodeDone;
|
|
}
|
|
|
|
agent.decayEpsilon();
|
|
agent.episodeCount++;
|
|
|
|
const finalValue = env.getPortfolioValue();
|
|
episodeRewards.push(totalReward);
|
|
episodeValues.push(finalValue);
|
|
|
|
if ((episode + 1) % 5 === 0) {
|
|
const avgReward = episodeRewards.slice(-5).reduce((a, b) => a + b, 0) / 5;
|
|
console.log(` Episode ${(episode + 1).toString().padStart(3)}: Reward=${avgReward.toFixed(1).padStart(7)}, Value=$${finalValue.toFixed(0).padStart(7)}, ε=${agent.epsilon.toFixed(3)}`);
|
|
}
|
|
}
|
|
console.log();
|
|
|
|
// 4. Final evaluation
|
|
console.log('4. Final Evaluation:');
|
|
console.log('─'.repeat(70));
|
|
|
|
// Run one episode with no exploration
|
|
agent.epsilon = 0;
|
|
let evalState = env.reset();
|
|
let evalDone = false;
|
|
const evalActions = [];
|
|
|
|
for (let i = 0; i < 50; i++) {
|
|
stateEncoder.update(priceData[i].close);
|
|
}
|
|
|
|
while (!evalDone) {
|
|
const encodedState = stateEncoder.encode(evalState);
|
|
const action = agent.selectAction(encodedState);
|
|
evalActions.push(ActionNames[action]);
|
|
|
|
const { state: nextState, done } = env.step(action);
|
|
stateEncoder.update(priceData[env.currentStep].close);
|
|
evalState = nextState;
|
|
evalDone = done;
|
|
}
|
|
|
|
const finalValue = env.getPortfolioValue();
|
|
const totalReturn = (finalValue - rlConfig.trading.initialCapital) / rlConfig.trading.initialCapital;
|
|
|
|
console.log(` Final Portfolio: $${finalValue.toFixed(2)}`);
|
|
console.log(` Total Return: ${(totalReturn * 100).toFixed(2)}%`);
|
|
console.log(` Realized P&L: $${env.realizedPnL.toFixed(2)}`);
|
|
console.log(` Total Trades: ${env.trades.length}`);
|
|
console.log(` Win Rate: ${(env.getWinRate() * 100).toFixed(1)}%`);
|
|
console.log(` Sharpe Ratio: ${env.getSharpe().toFixed(3)}`);
|
|
console.log(` Max Drawdown: ${(env.getDrawdown() * 100).toFixed(1)}%`);
|
|
console.log();
|
|
|
|
// 5. Action distribution
|
|
console.log('5. Action Distribution:');
|
|
console.log('─'.repeat(70));
|
|
|
|
const actionCounts = {};
|
|
for (const action of evalActions) {
|
|
actionCounts[action] = (actionCounts[action] || 0) + 1;
|
|
}
|
|
|
|
for (const [action, count] of Object.entries(actionCounts).sort((a, b) => b[1] - a[1])) {
|
|
const pct = (count / evalActions.length * 100).toFixed(1);
|
|
const bar = '█'.repeat(Math.floor(count / evalActions.length * 40));
|
|
console.log(` ${action.padEnd(12)} ${bar.padEnd(40)} ${pct}%`);
|
|
}
|
|
console.log();
|
|
|
|
// 6. Learning curve
|
|
console.log('6. Learning Curve:');
|
|
console.log('─'.repeat(70));
|
|
|
|
console.log(' Episode Returns:');
|
|
let curve = ' ';
|
|
const minReward = Math.min(...episodeRewards);
|
|
const maxReward = Math.max(...episodeRewards);
|
|
const range = maxReward - minReward || 1;
|
|
|
|
for (const reward of episodeRewards) {
|
|
const normalized = (reward - minReward) / range;
|
|
if (normalized < 0.25) curve += '▁';
|
|
else if (normalized < 0.5) curve += '▃';
|
|
else if (normalized < 0.75) curve += '▅';
|
|
else curve += '█';
|
|
}
|
|
console.log(curve);
|
|
console.log(` Min: ${minReward.toFixed(1)} Max: ${maxReward.toFixed(1)}`);
|
|
console.log();
|
|
|
|
// 7. Q-value analysis
|
|
console.log('7. Q-Value Analysis (Sample State):');
|
|
console.log('─'.repeat(70));
|
|
|
|
const sampleState = stateEncoder.encode(evalState);
|
|
const qValues = agent.qNetwork.forward(sampleState);
|
|
|
|
console.log(' Action Q-Values:');
|
|
for (let i = 0; i < ActionNames.length; i++) {
|
|
const bar = qValues[i] > 0 ? '+'.repeat(Math.min(20, Math.floor(qValues[i] * 2))) : '';
|
|
const negBar = qValues[i] < 0 ? '-'.repeat(Math.min(20, Math.floor(Math.abs(qValues[i]) * 2))) : '';
|
|
console.log(` ${ActionNames[i].padEnd(12)} ${qValues[i] >= 0 ? '+' : ''}${qValues[i].toFixed(3)} ${bar}${negBar}`);
|
|
}
|
|
console.log();
|
|
|
|
// 8. Experience replay stats
|
|
console.log('8. Experience Replay Statistics:');
|
|
console.log('─'.repeat(70));
|
|
|
|
console.log(` Buffer size: ${agent.replayBuffer.size()}`);
|
|
console.log(` Total steps: ${agent.stepCount}`);
|
|
console.log(` Training updates: ${agent.losses.length}`);
|
|
if (agent.losses.length > 0) {
|
|
const avgLoss = agent.losses.reduce((a, b) => a + b, 0) / agent.losses.length;
|
|
console.log(` Average loss: ${avgLoss.toFixed(4)}`);
|
|
}
|
|
console.log();
|
|
|
|
// 9. Trading strategy emerged
|
|
console.log('9. Emergent Strategy Analysis:');
|
|
console.log('─'.repeat(70));
|
|
|
|
// Analyze when agent buys vs sells
|
|
const buyActions = evalActions.filter(a => a.includes('BUY')).length;
|
|
const sellActions = evalActions.filter(a => a.includes('SELL')).length;
|
|
const holdActions = evalActions.filter(a => a === 'HOLD').length;
|
|
|
|
console.log(' The agent learned to:');
|
|
if (holdActions > evalActions.length * 0.5) {
|
|
console.log(' - Be patient (primarily holding positions)');
|
|
}
|
|
if (buyActions > sellActions) {
|
|
console.log(' - Favor long positions (more buys than sells)');
|
|
} else if (sellActions > buyActions) {
|
|
console.log(' - Manage risk actively (frequent profit taking)');
|
|
}
|
|
console.log();
|
|
|
|
// 10. RuVector integration
|
|
console.log('10. RuVector Vector Storage:');
|
|
console.log('─'.repeat(70));
|
|
console.log(' State vectors can be stored for similarity search:');
|
|
console.log();
|
|
console.log(` State vector sample (first 5 dims):`);
|
|
console.log(` [${sampleState.slice(0, 5).map(v => v.toFixed(4)).join(', ')}]`);
|
|
console.log();
|
|
console.log(' Use cases:');
|
|
console.log(' - Find similar market states from history');
|
|
console.log(' - Experience replay with prioritized sampling');
|
|
console.log(' - State clustering for interpretability');
|
|
console.log();
|
|
|
|
console.log('═'.repeat(70));
|
|
console.log('Reinforcement learning agent training completed');
|
|
console.log('═'.repeat(70));
|
|
}
|
|
|
|
main().catch(console.error);
|