-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathdigits.py
143 lines (120 loc) · 5.69 KB
/
digits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
# coding: utf-8
"""
Recognizing hand-written digits
-------------------------------
This notebook adapts the existing example of applying support vector
classification from scikit-learn to PyRCN to demonstrate, how PyRCN can be used
to classify hand-written digits.
The tutorial is based on numpy, scikit-learn and PyRCN.
"""
import numpy as np
import time
from sklearn.base import clone
from sklearn.model_selection import train_test_split
from sklearn.model_selection import (
ParameterGrid, RandomizedSearchCV, cross_validate)
from scipy.stats import uniform, loguniform
from sklearn.metrics import make_scorer
from pyrcn.model_selection import SequentialSearchCV
from pyrcn.echo_state_network import ESNClassifier
from pyrcn.metrics import accuracy_score
from pyrcn.datasets import load_digits
# Load the dataset (part of scikit-learn) and consists of 1797 8x8 images.
# We are using our dataloader that is derived from scikit-learns dataloader and
# returns arrays of 8x8 sequences and corresponding labels.
X, y = load_digits(return_X_y=True, as_sequence=True)
print("Number of digits: {0}".format(len(X)))
print("Shape of digits {0}".format(X[0].shape))
# Split dataset in training and test
# Afterwards, we split the dataset into training and test sets.
# We train the ESN using 80% of the digits and test it using the remaining
# images.
stratify = np.asarray([np.unique(yt) for yt in y]).flatten()
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=stratify, random_state=42)
X_tr = np.copy(X_train)
y_tr = np.copy(y_train)
X_te = np.copy(X_test)
y_te = np.copy(y_test)
for k, _ in enumerate(y_tr):
y_tr[k] = np.repeat(y_tr[k], 8, 0)
for k, _ in enumerate(y_te):
y_te[k] = np.repeat(y_te[k], 8, 0)
print("Number of digits in training set: {0}".format(len(X_train)))
print("Shape of digits in training set: {0}".format(X_train[0].shape))
print("Number of digits in test set: {0}".format(len(X_test)))
print("Shape of digits in test set: {0}".format(X_test[0].shape))
# Set up a ESN
# To develop an ESN model for digit recognition, we need to tune several
# hyper-parameters, e.g., input_scaling, spectral_radius, bias_scaling and
# leaky integration.
#
# We follow the way proposed in the introductory paper of PyRCN to optimize
# hyper-parameters sequentially.
#
# We define the search spaces for each step together with the type of search
# (a grid search in this context).
#
# At last, we initialize an ESNClassifier with the desired output strategy
# and with the initially fixed parameters.
initially_fixed_params = {
'hidden_layer_size': 50, 'input_activation': 'identity', 'k_in': 5,
'bias_scaling': 0.0, 'reservoir_activation': 'tanh', 'leakage': 1.0,
'bidirectional': False, 'k_rec': 10, 'continuation': False, 'alpha': 1e-5,
'random_state': 42, 'decision_strategy': "winner_takes_all"}
step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
'spectral_radius': uniform(loc=0, scale=2)}
step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': uniform(loc=0, scale=2)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e0)}
kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': 1,
'scoring': make_scorer(accuracy_score)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1,
'scoring': make_scorer(accuracy_score)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1,
'scoring': make_scorer(accuracy_score)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1,
'scoring': make_scorer(accuracy_score)}
# The searches are defined similarly to the steps of a
# sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
('step3', RandomizedSearchCV, step3_esn_params, kwargs_step3),
('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]
base_esn = ESNClassifier(**initially_fixed_params)
# Optimization
# We provide a SequentialSearchCV that basically iterates through the list of
# searches that we have defined before. It can be combined with any model
# selection tool from
# scikit-learn.
sequential_search = SequentialSearchCV(base_esn,searches=searches).fit(
X_tr, y_tr)
# Use the ESN with final hyper-parameters
#
# After the optimization, we extract the ESN with final hyper-parameters as the
# result # of the optimization.
base_esn = sequential_search.best_estimator_
# Test the ESN
# Finally, we increase the reservoir size and compare the impact of uni- and
# bidirectional ESNs. Notice that the ESN strongly benefit from both,
# increasing the reservoir size and from the bi-directional working mode.
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 500],
'bidirectional': [False, True]}
print("CV results\tFit time\tInference time\tAccuracy score\tSize[Bytes]")
for params in ParameterGrid(param_grid):
esn_cv = cross_validate(clone(base_esn).set_params(**params), X=X_train,
y=y_train, scoring=make_scorer(accuracy_score),
n_jobs=-1)
t1 = time.time()
esn = clone(base_esn).set_params(**params).fit(X_train, y_train)
t_fit = time.time() - t1
t1 = time.time()
esn_par = clone(base_esn).set_params(**params).fit(X_train, y_train,
n_jobs=-1)
t_fit_par = time.time() - t1
mem_size = esn.__sizeof__()
t1 = time.time()
acc_score = accuracy_score(y_test, esn.predict(X_test))
t_inference = time.time() - t1
print(f"{esn_cv}\t{t_fit}\t{t_inference}\t{acc_score}\t{mem_size}")