This repository was archived by the owner on May 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathPDeltaRuleTrainer.cs
288 lines (276 loc) · 10.8 KB
/
PDeltaRuleTrainer.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
using RCNet.Extensions;
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace RCNet.Neural.Network.NonRecurrent.PP
{
/// <summary>
/// Implements the P-delta rule trainer of the parallel perceptron network.
/// </summary>
[Serializable]
public class PDeltaRuleTrainer : INonRecurrentNetworkTrainer
{
//Attribute properties
/// <inheritdoc/>
public double MSE { get; private set; }
/// <inheritdoc/>
public int MaxAttempt { get; private set; }
/// <inheritdoc/>
public int Attempt { get; private set; }
/// <inheritdoc/>
public int MaxAttemptEpoch { get; private set; }
/// <inheritdoc/>
public int AttemptEpoch { get; private set; }
/// <inheritdoc/>
public string InfoMessage { get; private set; }
//Attributes
private readonly PDeltaRuleTrainerSettings _cfg;
private readonly ParallelPerceptron _net;
private readonly List<double[]> _inputVectorCollection;
private readonly List<double[]> _outputVectorCollection;
private readonly Random _rand;
private readonly double _acceptableError;
private readonly double _resSquashCoeff;
private readonly double _marginSignificance;
private double _clearMargin;
private readonly double _minM;
private readonly double _maxM;
private double _learningRate;
private double[] _prevWeights;
private double _prevMSE;
private readonly List<WorkerRange> _workerRangeCollection;
//Constructor
/// <summary>
/// Creates an initialized instance.
/// </summary>
/// <param name="net">The PP to be trained.</param>
/// <param name="inputVectorCollection">The input vectors (input).</param>
/// <param name="outputVectorCollection">The output vectors (ideal).</param>
/// <param name="cfg">The configuration of the trainer.</param>
/// <param name="rand">The random object to be used.</param>
public PDeltaRuleTrainer(ParallelPerceptron net,
List<double[]> inputVectorCollection,
List<double[]> outputVectorCollection,
PDeltaRuleTrainerSettings cfg,
Random rand
)
{
//Parameters
_cfg = (PDeltaRuleTrainerSettings)cfg.DeepClone();
MaxAttempt = _cfg.NumOfAttempts;
MaxAttemptEpoch = _cfg.NumOfAttemptEpochs;
_net = net;
_rand = rand;
_inputVectorCollection = inputVectorCollection;
_outputVectorCollection = outputVectorCollection;
_resSquashCoeff = _net.ResSquashCoeff;
_acceptableError = 1d / (2d * _resSquashCoeff);
_marginSignificance = 1;
_clearMargin = 0.05;
_minM = _acceptableError * _resSquashCoeff;
_maxM = 4d * _minM;
//Parallel workers / batch ranges preparation
_workerRangeCollection = new List<WorkerRange>();
int numOfWorkers = Math.Min(Environment.ProcessorCount, _inputVectorCollection.Count);
numOfWorkers = Math.Max(1, numOfWorkers);
int workerBatchSize = _inputVectorCollection.Count / numOfWorkers;
for (int workerIdx = 0, fromRow = 0; workerIdx < numOfWorkers; workerIdx++, fromRow += workerBatchSize)
{
int toRow = 0;
if (workerIdx == numOfWorkers - 1)
{
toRow = _inputVectorCollection.Count - 1;
}
else
{
toRow = (fromRow + workerBatchSize) - 1;
}
WorkerRange workerRange = new WorkerRange(fromRow, toRow, _net.NumOfWeights);
_workerRangeCollection.Add(workerRange);
}
InfoMessage = string.Empty;
//Start training attempt
Attempt = 0;
NextAttempt();
return;
}
//Properties
/// <inheritdoc/>
public INonRecurrentNetwork Net { get { return _net; } }
//Methods
private void AdjustLearning(double M)
{
bool applyWeights = true;
double clearMarginLR = _learningRate;
if (AttemptEpoch >= 2)
{
if (_prevMSE > MSE)
{
//Increase learning rate
_learningRate *= _cfg.IncLR;
_learningRate = Math.Min(_cfg.MaxLR, _learningRate);
}
else if (_prevMSE < MSE)
{
if (_learningRate > _cfg.MinLR)
{
applyWeights = false;
}
//Decrease learning rate
_learningRate *= _cfg.DecLR;
_learningRate = Math.Max(_cfg.MinLR, _learningRate);
}
}
if (applyWeights)
{
//Store MSE
_prevMSE = MSE;
//Adjust clear margin
for (int i = 0; i < _net.Gates; i++)
{
_clearMargin += clearMarginLR * (_minM - Math.Min(_maxM, M));
}
}
else
{
//Do not apply iteration updates
MSE = _prevMSE;
_net.SetWeights(_prevWeights);
}
return;
}
/// <inheritdoc/>
public bool NextAttempt()
{
if (Attempt < MaxAttempt)
{
//Next attempt is allowed
++Attempt;
//Reset
_net.RandomizeWeights(_rand);
_clearMargin = 0.05;
_learningRate = _cfg.IniLR;
_prevWeights = _net.GetWeights();
_prevMSE = 0;
MSE = 0;
AttemptEpoch = 0;
return true;
}
else
{
//Max attempt reached -> do nothhing and return false
return false;
}
}
/// <inheritdoc/>
public bool Iteration()
{
if (AttemptEpoch == MaxAttemptEpoch)
{
//Max epoch reached, try new attempt
if (!NextAttempt())
{
//Next attempt is not available
return false;
}
}
//Epoch increment
++AttemptEpoch;
//Store network weights
_prevWeights = _net.GetWeights();
//Weights update
double[] adjustedNetworkWeights = _net.GetWeights();
Parallel.ForEach(_workerRangeCollection, worker =>
{
double[] gateSums = new double[_net.Gates];
for (int row = worker.FromRow; row <= worker.ToRow; row++)
{
double computedOutput = _net.Compute(_inputVectorCollection[row], gateSums)[0];
double idealOutput = _outputVectorCollection[row][0];
for (int gateIdx = 0; gateIdx < _net.Gates; gateIdx++)
{
double x = 0;
if (computedOutput > (idealOutput + _acceptableError) && gateSums[gateIdx] >= 0)
{
x = -1;
}
else if (computedOutput < (idealOutput - _acceptableError) && gateSums[gateIdx] < 0)
{
x = 1;
}
else if (computedOutput <= (idealOutput + _acceptableError) && gateSums[gateIdx] >= 0 && gateSums[gateIdx] < _clearMargin)
{
++worker.M;
x = _marginSignificance;
}
else if (computedOutput >= (idealOutput - _acceptableError) && gateSums[gateIdx] >= -_clearMargin && gateSums[gateIdx] < 0)
{
++worker.M;
x = -_marginSignificance;
}
else
{
//No change
x = 0;
}
//Weights update
if (x != 0)
{
int weightFlatIdx = gateIdx * (_net.NumOfInputValues + 1);
for (int i = 0; i < _net.NumOfInputValues + 1; i++, weightFlatIdx++)
{
double inputValue = i < _net.NumOfInputValues ? _inputVectorCollection[row][i] : ParallelPerceptron.BiasValue;
worker.WeightChangeAcc[weightFlatIdx] += _learningRate * inputValue * x;
}
}
}
}
});
//Update original weights, affect workers accumulators.
double glM = 0;
foreach (WorkerRange worker in _workerRangeCollection)
{
glM += worker.M;
worker.UpdateWeigths(adjustedNetworkWeights);
}
//How it looks after weight changes?
_net.SetWeights(adjustedNetworkWeights);
_net.NormalizeWeights();
MSE = _net.ComputeBatchErrorStat(_inputVectorCollection, _outputVectorCollection).MeanSquare;
//Adjust learning parameters and weights according to results
AdjustLearning(glM / (double)_inputVectorCollection.Count);
return true;
}
//Inner classes
[Serializable]
internal class WorkerRange
{
public int FromRow { get; set; }
public int ToRow { get; set; }
public double[] WeightChangeAcc { get; set; }
public double M { get; set; }
//Constructor
internal WorkerRange(int fromRow, int toRow, int numOfWeights)
{
FromRow = fromRow;
ToRow = toRow;
WeightChangeAcc = new double[numOfWeights];
WeightChangeAcc.Populate(0);
M = 0;
return;
}
//Methods
internal void UpdateWeigths(double[] weights)
{
Parallel.For(0, WeightChangeAcc.Length, i =>
{
weights[i] += WeightChangeAcc[i];
//Reset back to zero
WeightChangeAcc[i] = 0;
});
M = 0;
return;
}
}//WorkerRange
}//ParallelPerceptronTrainer
}//Namespace