Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
902
examples/neural-trader/exotic/reinforcement-learning-agent.js
Normal file
902
examples/neural-trader/exotic/reinforcement-learning-agent.js
Normal file
@@ -0,0 +1,902 @@
|
||||
/**
|
||||
* Reinforcement Learning Trading Agent
|
||||
*
|
||||
* EXOTIC: Deep Q-Learning for autonomous trading
|
||||
*
|
||||
* Uses @neural-trader/neural with RuVector for:
|
||||
* - Deep Q-Network (DQN) for action selection
|
||||
* - Experience replay with vector similarity
|
||||
* - Epsilon-greedy exploration
|
||||
* - Target network for stable learning
|
||||
*
|
||||
* The agent learns optimal trading actions directly from
|
||||
* market experience, without explicit strategy rules.
|
||||
*/
|
||||
|
||||
// RL Configuration
|
||||
const rlConfig = {
|
||||
// Network architecture
|
||||
network: {
|
||||
stateDim: 20, // State vector dimension
|
||||
hiddenLayers: [128, 64, 32],
|
||||
actionSpace: 5 // hold, buy_small, buy_large, sell_small, sell_large
|
||||
},
|
||||
|
||||
// Learning parameters
|
||||
learning: {
|
||||
gamma: 0.99, // Discount factor
|
||||
learningRate: 0.001,
|
||||
batchSize: 32,
|
||||
targetUpdateFreq: 100, // Steps between target network updates
|
||||
replayBufferSize: 10000
|
||||
},
|
||||
|
||||
// Exploration
|
||||
exploration: {
|
||||
epsilonStart: 1.0,
|
||||
epsilonEnd: 0.01,
|
||||
epsilonDecay: 0.995
|
||||
},
|
||||
|
||||
// Trading
|
||||
trading: {
|
||||
initialCapital: 100000,
|
||||
maxPosition: 0.5, // Max 50% of capital
|
||||
transactionCost: 0.001, // 10 bps
|
||||
slippage: 0.0005 // 5 bps
|
||||
}
|
||||
};
|
||||
|
||||
// Action definitions
|
||||
const Actions = {
|
||||
HOLD: 0,
|
||||
BUY_SMALL: 1, // 10% of available
|
||||
BUY_LARGE: 2, // 30% of available
|
||||
SELL_SMALL: 3, // 10% of position
|
||||
SELL_LARGE: 4 // 30% of position
|
||||
};
|
||||
|
||||
const ActionNames = ['HOLD', 'BUY_SMALL', 'BUY_LARGE', 'SELL_SMALL', 'SELL_LARGE'];
|
||||
|
||||
// Neural Network Layer
|
||||
class DenseLayer {
|
||||
constructor(inputDim, outputDim, activation = 'relu') {
|
||||
this.inputDim = inputDim;
|
||||
this.outputDim = outputDim;
|
||||
this.activation = activation;
|
||||
|
||||
// Xavier initialization
|
||||
const scale = Math.sqrt(2.0 / (inputDim + outputDim));
|
||||
this.weights = [];
|
||||
for (let i = 0; i < inputDim; i++) {
|
||||
const row = [];
|
||||
for (let j = 0; j < outputDim; j++) {
|
||||
row.push((Math.random() - 0.5) * 2 * scale);
|
||||
}
|
||||
this.weights.push(row);
|
||||
}
|
||||
this.bias = new Array(outputDim).fill(0).map(() => (Math.random() - 0.5) * 0.1);
|
||||
}
|
||||
|
||||
forward(input) {
|
||||
const output = new Array(this.outputDim).fill(0);
|
||||
|
||||
for (let j = 0; j < this.outputDim; j++) {
|
||||
for (let i = 0; i < this.inputDim; i++) {
|
||||
output[j] += input[i] * this.weights[i][j];
|
||||
}
|
||||
output[j] += this.bias[j];
|
||||
|
||||
// Activation
|
||||
if (this.activation === 'relu') {
|
||||
output[j] = Math.max(0, output[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// Simplified gradient update
|
||||
updateWeights(gradients, lr) {
|
||||
for (let i = 0; i < this.inputDim; i++) {
|
||||
for (let j = 0; j < this.outputDim; j++) {
|
||||
this.weights[i][j] -= lr * gradients[i][j];
|
||||
}
|
||||
}
|
||||
for (let j = 0; j < this.outputDim; j++) {
|
||||
this.bias[j] -= lr * gradients.bias[j];
|
||||
}
|
||||
}
|
||||
|
||||
copyFrom(other) {
|
||||
for (let i = 0; i < this.inputDim; i++) {
|
||||
for (let j = 0; j < this.outputDim; j++) {
|
||||
this.weights[i][j] = other.weights[i][j];
|
||||
}
|
||||
}
|
||||
for (let j = 0; j < this.outputDim; j++) {
|
||||
this.bias[j] = other.bias[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deep Q-Network
|
||||
class DQN {
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
|
||||
// Build layers
|
||||
this.layers = [];
|
||||
let prevDim = config.stateDim;
|
||||
|
||||
for (const hiddenDim of config.hiddenLayers) {
|
||||
this.layers.push(new DenseLayer(prevDim, hiddenDim, 'relu'));
|
||||
prevDim = hiddenDim;
|
||||
}
|
||||
|
||||
// Output layer (no activation for Q-values)
|
||||
this.layers.push(new DenseLayer(prevDim, config.actionSpace, 'linear'));
|
||||
}
|
||||
|
||||
forward(state) {
|
||||
let x = state;
|
||||
// Store activations for backpropagation
|
||||
this.activations = [state];
|
||||
for (const layer of this.layers) {
|
||||
x = layer.forward(x);
|
||||
this.activations.push(x);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
// Get the activation before the output layer (for gradient computation)
|
||||
getPreOutputActivation() {
|
||||
if (!this.activations || this.activations.length < 2) {
|
||||
return null;
|
||||
}
|
||||
// Return activation just before output layer
|
||||
return this.activations[this.activations.length - 2];
|
||||
}
|
||||
|
||||
copyFrom(other) {
|
||||
for (let i = 0; i < this.layers.length; i++) {
|
||||
this.layers[i].copyFrom(other.layers[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Experience Replay Buffer
|
||||
class ReplayBuffer {
|
||||
constructor(maxSize) {
|
||||
this.maxSize = maxSize;
|
||||
this.buffer = [];
|
||||
this.position = 0;
|
||||
}
|
||||
|
||||
add(experience) {
|
||||
if (this.buffer.length < this.maxSize) {
|
||||
this.buffer.push(experience);
|
||||
} else {
|
||||
this.buffer[this.position] = experience;
|
||||
}
|
||||
this.position = (this.position + 1) % this.maxSize;
|
||||
}
|
||||
|
||||
sample(batchSize) {
|
||||
const samples = [];
|
||||
const indices = new Set();
|
||||
|
||||
while (indices.size < Math.min(batchSize, this.buffer.length)) {
|
||||
indices.add(Math.floor(Math.random() * this.buffer.length));
|
||||
}
|
||||
|
||||
for (const idx of indices) {
|
||||
samples.push(this.buffer[idx]);
|
||||
}
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
size() {
|
||||
return this.buffer.length;
|
||||
}
|
||||
}
|
||||
|
||||
// State Encoder
|
||||
class StateEncoder {
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
this.priceHistory = [];
|
||||
this.returnHistory = [];
|
||||
}
|
||||
|
||||
update(price) {
|
||||
this.priceHistory.push(price);
|
||||
if (this.priceHistory.length > 1) {
|
||||
const ret = (price - this.priceHistory[this.priceHistory.length - 2]) /
|
||||
this.priceHistory[this.priceHistory.length - 2];
|
||||
this.returnHistory.push(ret);
|
||||
}
|
||||
|
||||
// Keep bounded
|
||||
if (this.priceHistory.length > 100) {
|
||||
this.priceHistory.shift();
|
||||
this.returnHistory.shift();
|
||||
}
|
||||
}
|
||||
|
||||
encode(portfolio) {
|
||||
const state = [];
|
||||
|
||||
// Price-based features
|
||||
if (this.returnHistory.length >= 20) {
|
||||
// Recent returns
|
||||
for (let i = 1; i <= 5; i++) {
|
||||
state.push(this.returnHistory[this.returnHistory.length - i] * 10); // Scaled
|
||||
}
|
||||
|
||||
// Return statistics
|
||||
const recent20 = this.returnHistory.slice(-20);
|
||||
const mean = recent20.reduce((a, b) => a + b, 0) / 20;
|
||||
const variance = recent20.reduce((s, r) => s + (r - mean) ** 2, 0) / 20;
|
||||
const volatility = Math.sqrt(variance);
|
||||
|
||||
state.push(mean * 100);
|
||||
state.push(volatility * 100);
|
||||
|
||||
// Momentum
|
||||
const momentum5 = this.returnHistory.slice(-5).reduce((a, b) => a + b, 0);
|
||||
const momentum10 = this.returnHistory.slice(-10).reduce((a, b) => a + b, 0);
|
||||
const momentum20 = this.returnHistory.slice(-20).reduce((a, b) => a + b, 0);
|
||||
|
||||
state.push(momentum5 * 10);
|
||||
state.push(momentum10 * 10);
|
||||
state.push(momentum20 * 10);
|
||||
|
||||
// Price relative to moving averages
|
||||
const currentPrice = this.priceHistory[this.priceHistory.length - 1];
|
||||
const sma5 = this.priceHistory.slice(-5).reduce((a, b) => a + b, 0) / 5;
|
||||
const sma20 = this.priceHistory.slice(-20).reduce((a, b) => a + b, 0) / 20;
|
||||
|
||||
state.push((currentPrice / sma5 - 1) * 10);
|
||||
state.push((currentPrice / sma20 - 1) * 10);
|
||||
|
||||
// Trend direction
|
||||
const trend = this.returnHistory.slice(-10).filter(r => r > 0).length / 10;
|
||||
state.push(trend - 0.5);
|
||||
} else {
|
||||
// Pad with zeros
|
||||
for (let i = 0; i < 13; i++) {
|
||||
state.push(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Portfolio features
|
||||
state.push(portfolio.positionPct - 0.5); // Position as fraction of capital
|
||||
state.push(portfolio.unrealizedPnL / portfolio.capital);
|
||||
state.push(portfolio.realizedPnL / portfolio.capital);
|
||||
state.push(portfolio.drawdown);
|
||||
state.push(portfolio.winRate - 0.5);
|
||||
state.push(portfolio.sharpe / 2);
|
||||
state.push(portfolio.tradeCount / 100);
|
||||
|
||||
// Ensure state dimension
|
||||
while (state.length < this.config.network.stateDim) {
|
||||
state.push(0);
|
||||
}
|
||||
|
||||
return state.slice(0, this.config.network.stateDim);
|
||||
}
|
||||
}
|
||||
|
||||
// Trading Environment
|
||||
class TradingEnvironment {
|
||||
constructor(config, priceData) {
|
||||
this.config = config;
|
||||
this.priceData = priceData;
|
||||
this.reset();
|
||||
}
|
||||
|
||||
reset() {
|
||||
this.currentStep = 50; // Start after warmup
|
||||
this.capital = this.config.trading.initialCapital;
|
||||
this.position = 0;
|
||||
this.avgCost = 0;
|
||||
this.realizedPnL = 0;
|
||||
this.trades = [];
|
||||
this.peakCapital = this.capital;
|
||||
this.returns = [];
|
||||
|
||||
return this.getState();
|
||||
}
|
||||
|
||||
getState() {
|
||||
return {
|
||||
price: this.priceData[this.currentStep].close,
|
||||
capital: this.capital,
|
||||
position: this.position,
|
||||
positionPct: this.position * this.priceData[this.currentStep].close / this.getPortfolioValue(),
|
||||
unrealizedPnL: this.getUnrealizedPnL(),
|
||||
realizedPnL: this.realizedPnL,
|
||||
drawdown: this.getDrawdown(),
|
||||
winRate: this.getWinRate(),
|
||||
sharpe: this.getSharpe(),
|
||||
tradeCount: this.trades.length
|
||||
};
|
||||
}
|
||||
|
||||
getPortfolioValue() {
|
||||
const price = this.priceData[this.currentStep].close;
|
||||
return this.capital + this.position * price;
|
||||
}
|
||||
|
||||
getUnrealizedPnL() {
|
||||
if (this.position === 0) return 0;
|
||||
const price = this.priceData[this.currentStep].close;
|
||||
return this.position * (price - this.avgCost);
|
||||
}
|
||||
|
||||
getDrawdown() {
|
||||
const value = this.getPortfolioValue();
|
||||
this.peakCapital = Math.max(this.peakCapital, value);
|
||||
return (this.peakCapital - value) / this.peakCapital;
|
||||
}
|
||||
|
||||
getWinRate() {
|
||||
const closedTrades = this.trades.filter(t => t.closed);
|
||||
if (closedTrades.length === 0) return 0.5;
|
||||
const wins = closedTrades.filter(t => t.pnl > 0).length;
|
||||
return wins / closedTrades.length;
|
||||
}
|
||||
|
||||
getSharpe() {
|
||||
if (this.returns.length < 10) return 0;
|
||||
const mean = this.returns.reduce((a, b) => a + b, 0) / this.returns.length;
|
||||
const variance = this.returns.reduce((s, r) => s + (r - mean) ** 2, 0) / this.returns.length;
|
||||
if (variance === 0) return 0;
|
||||
return mean / Math.sqrt(variance) * Math.sqrt(252);
|
||||
}
|
||||
|
||||
step(action) {
|
||||
const prevValue = this.getPortfolioValue();
|
||||
const price = this.priceData[this.currentStep].close;
|
||||
|
||||
// Execute action
|
||||
this.executeAction(action, price);
|
||||
|
||||
// Move to next step
|
||||
this.currentStep++;
|
||||
const done = this.currentStep >= this.priceData.length - 1;
|
||||
|
||||
// Calculate reward
|
||||
const newValue = this.getPortfolioValue();
|
||||
const stepReturn = (newValue - prevValue) / prevValue;
|
||||
this.returns.push(stepReturn);
|
||||
// Bound returns array to prevent memory leak
|
||||
if (this.returns.length > 1000) {
|
||||
this.returns = this.returns.slice(-500);
|
||||
}
|
||||
|
||||
// Shape reward
|
||||
let reward = stepReturn * 100; // Scale returns
|
||||
|
||||
// Penalty for excessive trading
|
||||
if (action !== Actions.HOLD) {
|
||||
reward -= 0.1;
|
||||
}
|
||||
|
||||
// Penalty for drawdown
|
||||
const drawdown = this.getDrawdown();
|
||||
if (drawdown > 0.1) {
|
||||
reward -= drawdown * 10;
|
||||
}
|
||||
|
||||
// Bonus for profitable trades
|
||||
const winRate = this.getWinRate();
|
||||
if (winRate > 0.5) {
|
||||
reward += (winRate - 0.5) * 2;
|
||||
}
|
||||
|
||||
return {
|
||||
state: this.getState(),
|
||||
reward,
|
||||
done,
|
||||
info: {
|
||||
portfolioValue: newValue,
|
||||
stepReturn,
|
||||
action: ActionNames[action]
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
executeAction(action, price) {
|
||||
const slippage = this.config.trading.slippage;
|
||||
const cost = this.config.trading.transactionCost;
|
||||
|
||||
switch (action) {
|
||||
case Actions.BUY_SMALL:
|
||||
this.buy(0.1, price * (1 + slippage + cost));
|
||||
break;
|
||||
case Actions.BUY_LARGE:
|
||||
this.buy(0.3, price * (1 + slippage + cost));
|
||||
break;
|
||||
case Actions.SELL_SMALL:
|
||||
this.sell(0.1, price * (1 - slippage - cost));
|
||||
break;
|
||||
case Actions.SELL_LARGE:
|
||||
this.sell(0.3, price * (1 - slippage - cost));
|
||||
break;
|
||||
case Actions.HOLD:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
buy(fraction, price) {
|
||||
const maxBuy = this.capital * this.config.trading.maxPosition;
|
||||
const amount = Math.min(this.capital * fraction, maxBuy);
|
||||
|
||||
if (amount < 100) return; // Min trade size
|
||||
|
||||
const shares = amount / price;
|
||||
const totalCost = this.position * this.avgCost + amount;
|
||||
const totalShares = this.position + shares;
|
||||
|
||||
this.avgCost = totalCost / totalShares;
|
||||
this.position = totalShares;
|
||||
this.capital -= amount;
|
||||
|
||||
this.trades.push({
|
||||
type: 'buy',
|
||||
shares,
|
||||
price,
|
||||
timestamp: this.currentStep,
|
||||
closed: false
|
||||
});
|
||||
}
|
||||
|
||||
sell(fraction, price) {
|
||||
if (this.position <= 0) return;
|
||||
|
||||
const sharesToSell = this.position * fraction;
|
||||
if (sharesToSell < 0.01) return;
|
||||
|
||||
const proceeds = sharesToSell * price;
|
||||
const costBasis = sharesToSell * this.avgCost;
|
||||
const tradePnL = proceeds - costBasis;
|
||||
|
||||
this.position -= sharesToSell;
|
||||
this.capital += proceeds;
|
||||
this.realizedPnL += tradePnL;
|
||||
|
||||
this.trades.push({
|
||||
type: 'sell',
|
||||
shares: sharesToSell,
|
||||
price,
|
||||
pnl: tradePnL,
|
||||
timestamp: this.currentStep,
|
||||
closed: true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// DQN Agent
|
||||
class DQNAgent {
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
|
||||
// Networks
|
||||
this.qNetwork = new DQN(config.network);
|
||||
this.targetNetwork = new DQN(config.network);
|
||||
this.targetNetwork.copyFrom(this.qNetwork);
|
||||
|
||||
// Experience replay
|
||||
this.replayBuffer = new ReplayBuffer(config.learning.replayBufferSize);
|
||||
|
||||
// Exploration
|
||||
this.epsilon = config.exploration.epsilonStart;
|
||||
|
||||
// Training stats
|
||||
this.stepCount = 0;
|
||||
this.episodeCount = 0;
|
||||
this.totalReward = 0;
|
||||
this.losses = [];
|
||||
}
|
||||
|
||||
selectAction(state) {
|
||||
// Epsilon-greedy
|
||||
if (Math.random() < this.epsilon) {
|
||||
return Math.floor(Math.random() * this.config.network.actionSpace);
|
||||
}
|
||||
|
||||
// Greedy action
|
||||
const qValues = this.qNetwork.forward(state);
|
||||
return qValues.indexOf(Math.max(...qValues));
|
||||
}
|
||||
|
||||
train() {
|
||||
if (this.replayBuffer.size() < this.config.learning.batchSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const batch = this.replayBuffer.sample(this.config.learning.batchSize);
|
||||
let totalLoss = 0;
|
||||
|
||||
for (const experience of batch) {
|
||||
const { state, action, reward, nextState, done } = experience;
|
||||
|
||||
// Current Q-value
|
||||
const currentQ = this.qNetwork.forward(state);
|
||||
|
||||
// Target Q-value
|
||||
let targetQ;
|
||||
if (done) {
|
||||
targetQ = reward;
|
||||
} else {
|
||||
const nextQ = this.targetNetwork.forward(nextState);
|
||||
targetQ = reward + this.config.learning.gamma * Math.max(...nextQ);
|
||||
}
|
||||
|
||||
// TD error
|
||||
const tdError = targetQ - currentQ[action];
|
||||
totalLoss += tdError ** 2;
|
||||
|
||||
// Simplified update (in production, use proper backprop)
|
||||
this.updateQNetwork(state, action, tdError);
|
||||
}
|
||||
|
||||
this.losses.push(totalLoss / batch.length);
|
||||
return totalLoss / batch.length;
|
||||
}
|
||||
|
||||
updateQNetwork(state, action, tdError) {
|
||||
const lr = this.config.learning.learningRate;
|
||||
|
||||
// Get the actual hidden layer output (activation before output layer)
|
||||
const hiddenOutput = this.qNetwork.getPreOutputActivation();
|
||||
|
||||
if (!hiddenOutput) {
|
||||
// Fallback: run forward pass to get activations
|
||||
this.qNetwork.forward(state);
|
||||
return this.updateQNetwork(state, action, tdError);
|
||||
}
|
||||
|
||||
// Update output layer using actual hidden activations
|
||||
const outputLayer = this.qNetwork.layers[this.qNetwork.layers.length - 1];
|
||||
|
||||
// Gradient for output layer: dL/dW = tdError * hiddenOutput
|
||||
for (let i = 0; i < outputLayer.inputDim; i++) {
|
||||
outputLayer.weights[i][action] += lr * tdError * hiddenOutput[i];
|
||||
}
|
||||
outputLayer.bias[action] += lr * tdError;
|
||||
|
||||
// Simplified backprop through hidden layers (gradient clipping for stability)
|
||||
const maxGrad = 1.0;
|
||||
let delta = tdError * outputLayer.weights.map(row => row[action]);
|
||||
|
||||
for (let l = this.qNetwork.layers.length - 2; l >= 0; l--) {
|
||||
const layer = this.qNetwork.layers[l];
|
||||
const prevActivation = this.qNetwork.activations[l];
|
||||
const currentActivation = this.qNetwork.activations[l + 1];
|
||||
|
||||
// ReLU derivative: 1 if activation > 0, else 0
|
||||
const reluGrad = currentActivation.map(a => a > 0 ? 1 : 0);
|
||||
|
||||
// Apply ReLU gradient
|
||||
delta = delta.map((d, i) => d * (reluGrad[i] || 0));
|
||||
|
||||
// Clip gradients for stability
|
||||
delta = delta.map(d => Math.max(-maxGrad, Math.min(maxGrad, d)));
|
||||
|
||||
// Update weights for this layer
|
||||
for (let i = 0; i < layer.inputDim; i++) {
|
||||
for (let j = 0; j < layer.outputDim; j++) {
|
||||
layer.weights[i][j] += lr * 0.1 * delta[j] * (prevActivation[i] || 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Propagate delta to previous layer
|
||||
if (l > 0) {
|
||||
const newDelta = new Array(layer.inputDim).fill(0);
|
||||
for (let i = 0; i < layer.inputDim; i++) {
|
||||
for (let j = 0; j < layer.outputDim; j++) {
|
||||
newDelta[i] += delta[j] * layer.weights[i][j];
|
||||
}
|
||||
}
|
||||
delta = newDelta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
updateTargetNetwork() {
|
||||
this.targetNetwork.copyFrom(this.qNetwork);
|
||||
}
|
||||
|
||||
decayEpsilon() {
|
||||
this.epsilon = Math.max(
|
||||
this.config.exploration.epsilonEnd,
|
||||
this.epsilon * this.config.exploration.epsilonDecay
|
||||
);
|
||||
}
|
||||
|
||||
addExperience(state, action, reward, nextState, done) {
|
||||
this.replayBuffer.add({ state, action, reward, nextState, done });
|
||||
this.stepCount++;
|
||||
|
||||
if (this.stepCount % this.config.learning.targetUpdateFreq === 0) {
|
||||
this.updateTargetNetwork();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate synthetic price data
|
||||
function generatePriceData(n, seed = 42) {
|
||||
const data = [];
|
||||
let price = 100;
|
||||
|
||||
let rng = seed;
|
||||
const random = () => {
|
||||
rng = (rng * 9301 + 49297) % 233280;
|
||||
return rng / 233280;
|
||||
};
|
||||
|
||||
for (let i = 0; i < n; i++) {
|
||||
// Regime-switching dynamics
|
||||
const regime = Math.floor(i / 100) % 3;
|
||||
let drift = 0, volatility = 0.015;
|
||||
|
||||
if (regime === 0) {
|
||||
drift = 0.001;
|
||||
volatility = 0.012;
|
||||
} else if (regime === 1) {
|
||||
drift = -0.0005;
|
||||
volatility = 0.02;
|
||||
} else {
|
||||
drift = 0;
|
||||
volatility = 0.01;
|
||||
}
|
||||
|
||||
const return_ = drift + volatility * (random() + random() - 1);
|
||||
price = price * (1 + return_);
|
||||
|
||||
data.push({
|
||||
timestamp: i,
|
||||
open: price * (1 - random() * 0.002),
|
||||
high: price * (1 + random() * 0.005),
|
||||
low: price * (1 - random() * 0.005),
|
||||
close: price,
|
||||
volume: 1000000 * (0.5 + random())
|
||||
});
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('═'.repeat(70));
|
||||
console.log('REINFORCEMENT LEARNING TRADING AGENT');
|
||||
console.log('═'.repeat(70));
|
||||
console.log();
|
||||
|
||||
// 1. Generate data
|
||||
console.log('1. Environment Setup:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
const priceData = generatePriceData(1000);
|
||||
const env = new TradingEnvironment(rlConfig, priceData);
|
||||
const stateEncoder = new StateEncoder(rlConfig);
|
||||
|
||||
console.log(` Price data: ${priceData.length} candles`);
|
||||
console.log(` Initial capital: $${rlConfig.trading.initialCapital.toLocaleString()}`);
|
||||
console.log(` Action space: ${rlConfig.network.actionSpace} actions`);
|
||||
console.log(` State dimension: ${rlConfig.network.stateDim}`);
|
||||
console.log();
|
||||
|
||||
// 2. Initialize agent
|
||||
console.log('2. Agent Configuration:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
const agent = new DQNAgent(rlConfig);
|
||||
|
||||
console.log(` Network: ${rlConfig.network.hiddenLayers.join(' → ')} → ${rlConfig.network.actionSpace}`);
|
||||
console.log(` Learning rate: ${rlConfig.learning.learningRate}`);
|
||||
console.log(` Discount factor: ${rlConfig.learning.gamma}`);
|
||||
console.log(` Replay buffer: ${rlConfig.learning.replayBufferSize}`);
|
||||
console.log(` Batch size: ${rlConfig.learning.batchSize}`);
|
||||
console.log();
|
||||
|
||||
// 3. Training
|
||||
console.log('3. Training Loop:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
const numEpisodes = 20;
|
||||
const episodeRewards = [];
|
||||
const episodeValues = [];
|
||||
|
||||
for (let episode = 0; episode < numEpisodes; episode++) {
|
||||
let state = env.reset();
|
||||
let totalReward = 0;
|
||||
let done = false;
|
||||
|
||||
// Update price history for state encoding
|
||||
for (let i = 0; i < 50; i++) {
|
||||
stateEncoder.update(priceData[i].close);
|
||||
}
|
||||
|
||||
while (!done) {
|
||||
const encodedState = stateEncoder.encode(state);
|
||||
const action = agent.selectAction(encodedState);
|
||||
|
||||
const { state: nextState, reward, done: episodeDone, info } = env.step(action);
|
||||
|
||||
stateEncoder.update(priceData[env.currentStep].close);
|
||||
const nextEncodedState = stateEncoder.encode(nextState);
|
||||
|
||||
agent.addExperience(encodedState, action, reward, nextEncodedState, episodeDone);
|
||||
|
||||
// Train
|
||||
if (agent.stepCount % 4 === 0) {
|
||||
agent.train();
|
||||
}
|
||||
|
||||
totalReward += reward;
|
||||
state = nextState;
|
||||
done = episodeDone;
|
||||
}
|
||||
|
||||
agent.decayEpsilon();
|
||||
agent.episodeCount++;
|
||||
|
||||
const finalValue = env.getPortfolioValue();
|
||||
episodeRewards.push(totalReward);
|
||||
episodeValues.push(finalValue);
|
||||
|
||||
if ((episode + 1) % 5 === 0) {
|
||||
const avgReward = episodeRewards.slice(-5).reduce((a, b) => a + b, 0) / 5;
|
||||
console.log(` Episode ${(episode + 1).toString().padStart(3)}: Reward=${avgReward.toFixed(1).padStart(7)}, Value=$${finalValue.toFixed(0).padStart(7)}, ε=${agent.epsilon.toFixed(3)}`);
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
|
||||
// 4. Final evaluation
|
||||
console.log('4. Final Evaluation:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
// Run one episode with no exploration
|
||||
agent.epsilon = 0;
|
||||
let evalState = env.reset();
|
||||
let evalDone = false;
|
||||
const evalActions = [];
|
||||
|
||||
for (let i = 0; i < 50; i++) {
|
||||
stateEncoder.update(priceData[i].close);
|
||||
}
|
||||
|
||||
while (!evalDone) {
|
||||
const encodedState = stateEncoder.encode(evalState);
|
||||
const action = agent.selectAction(encodedState);
|
||||
evalActions.push(ActionNames[action]);
|
||||
|
||||
const { state: nextState, done } = env.step(action);
|
||||
stateEncoder.update(priceData[env.currentStep].close);
|
||||
evalState = nextState;
|
||||
evalDone = done;
|
||||
}
|
||||
|
||||
const finalValue = env.getPortfolioValue();
|
||||
const totalReturn = (finalValue - rlConfig.trading.initialCapital) / rlConfig.trading.initialCapital;
|
||||
|
||||
console.log(` Final Portfolio: $${finalValue.toFixed(2)}`);
|
||||
console.log(` Total Return: ${(totalReturn * 100).toFixed(2)}%`);
|
||||
console.log(` Realized P&L: $${env.realizedPnL.toFixed(2)}`);
|
||||
console.log(` Total Trades: ${env.trades.length}`);
|
||||
console.log(` Win Rate: ${(env.getWinRate() * 100).toFixed(1)}%`);
|
||||
console.log(` Sharpe Ratio: ${env.getSharpe().toFixed(3)}`);
|
||||
console.log(` Max Drawdown: ${(env.getDrawdown() * 100).toFixed(1)}%`);
|
||||
console.log();
|
||||
|
||||
// 5. Action distribution
|
||||
console.log('5. Action Distribution:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
const actionCounts = {};
|
||||
for (const action of evalActions) {
|
||||
actionCounts[action] = (actionCounts[action] || 0) + 1;
|
||||
}
|
||||
|
||||
for (const [action, count] of Object.entries(actionCounts).sort((a, b) => b[1] - a[1])) {
|
||||
const pct = (count / evalActions.length * 100).toFixed(1);
|
||||
const bar = '█'.repeat(Math.floor(count / evalActions.length * 40));
|
||||
console.log(` ${action.padEnd(12)} ${bar.padEnd(40)} ${pct}%`);
|
||||
}
|
||||
console.log();
|
||||
|
||||
// 6. Learning curve
|
||||
console.log('6. Learning Curve:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
console.log(' Episode Returns:');
|
||||
let curve = ' ';
|
||||
const minReward = Math.min(...episodeRewards);
|
||||
const maxReward = Math.max(...episodeRewards);
|
||||
const range = maxReward - minReward || 1;
|
||||
|
||||
for (const reward of episodeRewards) {
|
||||
const normalized = (reward - minReward) / range;
|
||||
if (normalized < 0.25) curve += '▁';
|
||||
else if (normalized < 0.5) curve += '▃';
|
||||
else if (normalized < 0.75) curve += '▅';
|
||||
else curve += '█';
|
||||
}
|
||||
console.log(curve);
|
||||
console.log(` Min: ${minReward.toFixed(1)} Max: ${maxReward.toFixed(1)}`);
|
||||
console.log();
|
||||
|
||||
// 7. Q-value analysis
|
||||
console.log('7. Q-Value Analysis (Sample State):');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
const sampleState = stateEncoder.encode(evalState);
|
||||
const qValues = agent.qNetwork.forward(sampleState);
|
||||
|
||||
console.log(' Action Q-Values:');
|
||||
for (let i = 0; i < ActionNames.length; i++) {
|
||||
const bar = qValues[i] > 0 ? '+'.repeat(Math.min(20, Math.floor(qValues[i] * 2))) : '';
|
||||
const negBar = qValues[i] < 0 ? '-'.repeat(Math.min(20, Math.floor(Math.abs(qValues[i]) * 2))) : '';
|
||||
console.log(` ${ActionNames[i].padEnd(12)} ${qValues[i] >= 0 ? '+' : ''}${qValues[i].toFixed(3)} ${bar}${negBar}`);
|
||||
}
|
||||
console.log();
|
||||
|
||||
// 8. Experience replay stats
|
||||
console.log('8. Experience Replay Statistics:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
console.log(` Buffer size: ${agent.replayBuffer.size()}`);
|
||||
console.log(` Total steps: ${agent.stepCount}`);
|
||||
console.log(` Training updates: ${agent.losses.length}`);
|
||||
if (agent.losses.length > 0) {
|
||||
const avgLoss = agent.losses.reduce((a, b) => a + b, 0) / agent.losses.length;
|
||||
console.log(` Average loss: ${avgLoss.toFixed(4)}`);
|
||||
}
|
||||
console.log();
|
||||
|
||||
// 9. Trading strategy emerged
|
||||
console.log('9. Emergent Strategy Analysis:');
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
// Analyze when agent buys vs sells
|
||||
const buyActions = evalActions.filter(a => a.includes('BUY')).length;
|
||||
const sellActions = evalActions.filter(a => a.includes('SELL')).length;
|
||||
const holdActions = evalActions.filter(a => a === 'HOLD').length;
|
||||
|
||||
console.log(' The agent learned to:');
|
||||
if (holdActions > evalActions.length * 0.5) {
|
||||
console.log(' - Be patient (primarily holding positions)');
|
||||
}
|
||||
if (buyActions > sellActions) {
|
||||
console.log(' - Favor long positions (more buys than sells)');
|
||||
} else if (sellActions > buyActions) {
|
||||
console.log(' - Manage risk actively (frequent profit taking)');
|
||||
}
|
||||
console.log();
|
||||
|
||||
// 10. RuVector integration
|
||||
console.log('10. RuVector Vector Storage:');
|
||||
console.log('─'.repeat(70));
|
||||
console.log(' State vectors can be stored for similarity search:');
|
||||
console.log();
|
||||
console.log(` State vector sample (first 5 dims):`);
|
||||
console.log(` [${sampleState.slice(0, 5).map(v => v.toFixed(4)).join(', ')}]`);
|
||||
console.log();
|
||||
console.log(' Use cases:');
|
||||
console.log(' - Find similar market states from history');
|
||||
console.log(' - Experience replay with prioritized sampling');
|
||||
console.log(' - State clustering for interpretability');
|
||||
console.log();
|
||||
|
||||
console.log('═'.repeat(70));
|
||||
console.log('Reinforcement learning agent training completed');
|
||||
console.log('═'.repeat(70));
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user