-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathdemo_classification_trading.py
143 lines (125 loc) · 5.5 KB
/
demo_classification_trading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import expm, inv, eig
from sklearn.metrics import accuracy_score, plot_confusion_matrix
from sklearn.neural_network import MLPClassifier
from bayesian_decision_tree.classification import PerpendicularClassificationTree
def get_covariance(sigma: float, delta: float, theta: np.ndarray) -> np.ndarray:
theta_p = theta + theta.T
return (sigma ** 2.0) * inv(theta_p) * (np.eye(theta.shape[0]) - expm(-theta_p * delta))
def sample_gaussian(n: int, covariance: np.ndarray) -> np.ndarray:
d, v = eig(covariance)
a = np.dot(v, np.diag(np.sqrt(np.real(d))))
g = np.random.normal(0.0, 1.0, (a.shape[0], n))
return np.dot(a, g)
def sample_mean_reversion(n: int, x0: np.ndarray, mu: np.ndarray, sigma: float, delta: float,
theta: np.ndarray) -> np.ndarray:
if not positive_eigenvalues(theta):
raise AssertionError("Input theta does not have all positive eigenvalues")
covariance = get_covariance(sigma, delta, theta)
if not positive_eigenvalues(covariance):
raise AssertionError("Covariance does not have all positive eigenvalues")
gaussian_matrix = sample_gaussian(n, covariance)
sample_paths = np.ndarray(gaussian_matrix.shape)
sample_paths[:, [0]] = x0
exp_theta = expm(-theta * delta)
for i in range(1, sample_paths.shape[1]):
prev = sample_paths[:, [i - 1]]
sample_paths[:, [i]] = mu + np.dot(exp_theta, (prev - mu)) + gaussian_matrix[:, [i - 1]]
return sample_paths
def positive_eigenvalues(theta: np.ndarray) -> bool:
d, v = eig(theta)
return np.all(np.real(d) > 0.0)
# demo script for classification (binary or multiclass) using classic, axis-normal splits
if __name__ == '__main__':
np.random.seed(0)
default_font_size = 16
model_type = 'tree' # it can be 'tree' or 'nn'
plt.rc('axes', titlesize=default_font_size) # fontsize of the axes title
plt.rc('axes', labelsize=default_font_size) # fontsize of the x and y labels
plt.rc('xtick', labelsize=default_font_size) # fontsize of the tick labels
plt.rc('ytick', labelsize=default_font_size) # fontsize of the tick labels
plt.rc('legend', fontsize=default_font_size) # legend fontsize
plt.rc('figure', titlesize=default_font_size) # fontsize of the figure title
n = 10_000
n += 1 # used for the deltas
mu = np.array([[100.0], [110.0], [105.0]])
theta = np.array([[2.0, -0.5, 0.0], [0.2, 1.0, 0.0], [0.0, 0.0, 0.1]])
dt = 0.1
sigma = 1.0
d = mu.shape[0]
paths = sample_mean_reversion(n, mu, mu, sigma, dt, theta)
x = paths.T
plt.plot(x)
plt.hlines(mu, 0, n, linestyles=d * ['--'], zorder=100)
plt.title('Stock prices')
plt.legend(['Stock A', 'Stock B', 'Stock C'])
ax = plt.gca()
ax.set_xlim([0, n])
ax.set_ylim([90, 120])
plt.savefig('trading_example_prices.png')
plt.show()
# artificial 4-class data somewhat similar to the Ripley data
y_diff = np.diff(x, axis=0)
x = x[:-1, :]
y = np.dot((np.sign(y_diff) + 1) / 2, np.reshape(2.0 ** np.arange(d), (d, 1))).astype(int)
n_train = int(x.shape[0] * 0.8)
X_train = x[:n_train, :]
y_train = y[:n_train, :]
X_test = x[n_train:, :]
y_test = y[n_train:, :]
y_diff_test = y_diff[n_train:, :]
n_classes = len(np.unique(y))
# prior
prior_strength = 1
prior = prior_strength * np.array(n_classes * [1.0]) / n_classes
# model
if model_type is 'tree':
model = PerpendicularClassificationTree(
partition_prior=0.9,
prior=prior,
delta=0,
prune=False)
elif model_type is 'nn':
model = MLPClassifier(
hidden_layer_sizes=(10, 10),
random_state=0)
else:
raise AssertionError('Model not included ' + model_type)
# train
model.fit(X_train, y_train)
print(model)
print()
# compute accuracy
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)
positions = (2 * (y_pred_test.reshape((y_pred_test.shape[0], 1)) // 2.0 ** np.arange(d).astype(int) % 2) - 1)
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
info_train = 'Train accuracy: {:.4f} %'.format(100 * accuracy_train)
info_test = 'Test accuracy: {:.4f} %'.format(100 * accuracy_test)
print(info_train)
print(info_test)
pnl = np.cumsum(positions * y_diff_test, axis=0)
plt.plot(pnl)
plt.hlines(0, 0, pnl.shape[0])
ax = plt.gca()
ax.set_xlim([0, pnl.shape[0]])
ax.set_ylim(np.array([-30, 200]))
plt.grid(True)
plt.title('Test period PnL')
plt.legend(['Stock A', 'Stock B', 'Stock C'])
plt.savefig('trading_example_pnl_' + model_type + '.png')
plt.show()
disp = plot_confusion_matrix(model, X_test, y_test,
display_labels=[''.join(
np.core.defchararray.add(['-' if x < 0 else '+' for x in (2 * row - 1)],
['A', 'B', 'C'])) for row in
np.reshape(np.arange(2 ** d), (2 ** d, 1)) // 2.0 ** np.arange(
d).astype(int) % 2],
cmap=plt.cm.Blues,
normalize='true')
disp.ax_.set_title('Test period confusion matrix')
plt.xticks(rotation=90)
plt.savefig('trading_example_confusion_matrix_' + model_type + '.png', bbox_inches='tight')
plt.show()