Skip to content

Commit b277d06

Browse files
committed
#1. Provide sparse features for each layer. The sparse feature is from input layer.
#2. Refactoring dropout layer and output layer #3. Refactoring layer initialization
1 parent f13243e commit b277d06

13 files changed

+254
-207
lines changed

RNNSharp/BPTTLayer.cs

+16-43
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ public class BPTTLayer : SimpleLayer
2525
protected Matrix<double> BpttWeightsDelta { get; set; }
2626
protected Matrix<double> BpttWeightsLearningRate { get; set; }
2727

28-
public BPTTLayer(int hiddenLayerSize) : base(hiddenLayerSize)
28+
public BPTTLayer(int hiddenLayerSize, ModelSetting modelsetting) : base(hiddenLayerSize)
2929
{
30-
30+
bptt = modelsetting.Bptt + 1;
31+
bptt_block = 10;
3132
}
3233

3334
public BPTTLayer()
@@ -55,7 +56,8 @@ public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSi
5556
BpttWeights = new Matrix<double>(LayerSize, LayerSize);
5657
BpttWeightsDelta = new Matrix<double>(LayerSize, LayerSize);
5758

58-
Logger.WriteLine("Initializing weights, random value is {0}", RNNHelper.rand.NextDouble());
59+
Logger.WriteLine("Initializing weights, sparse feature size: {0}, dense feature size: {1}, random value is {2}",
60+
SparseFeatureSize, DenseFeatureSize, RNNHelper.rand.NextDouble());
5961
initWeights();
6062

6163
//Initialize BPTT
@@ -206,36 +208,24 @@ public override void computeLayer(SparseVector sparseFeature, double[] denseFeat
206208
}
207209

208210
//activate layer
209-
activityLayer(isTrain);
211+
activityLayer();
210212
}
211213

212-
private void activityLayer(bool isTrain)
214+
private void activityLayer()
213215
{
214216
Parallel.For(0, LayerSize, parallelOption, a =>
215217
{
216218
double score = cellOutput[a];
217-
if (mask[a] == true)
219+
if (score > 50)
218220
{
219-
score = 0;
221+
score = 50; //for numerical stability
220222
}
221-
else
223+
else if (score < -50)
222224
{
223-
if (isTrain == false)
224-
{
225-
score = score * (1.0 - Dropout);
226-
}
227-
228-
if (score > 50)
229-
{
230-
score = 50; //for numerical stability
231-
}
232-
else if (score < -50)
233-
{
234-
score = -50; //for numerical stability
235-
}
236-
237-
score = 1.0 / (1.0 + Math.Exp(-score));
225+
score = -50; //for numerical stability
238226
}
227+
228+
score = 1.0 / (1.0 + Math.Exp(-score));
239229
cellOutput[a] = score;
240230
});
241231
}
@@ -268,7 +258,6 @@ public override void LearnFeatureWeights(int numStates, int curState)
268258
{
269259
last_bptt_hidden.cellOutput[i] = cellOutput[i];
270260
last_bptt_hidden.er[i] = er[i];
271-
last_bptt_hidden.mask[i] = mask[i];
272261
}
273262

274263
for (int i = 0; i < DenseFeatureSize; i++)
@@ -505,32 +494,16 @@ public override void netReset(bool updateNet = false) //cleans hidden layer ac
505494
for (int a = 0; a < LayerSize; a++)
506495
{
507496
cellOutput[a] = 0.1;
508-
mask[a] = false;
509497
}
510498

511499
if (updateNet == true)
512500
{
513501
//Train mode
514502
SimpleLayer last_bptt_hidden = bptt_hidden[0];
515-
if (Dropout > 0)
516-
{
517-
for (int a = 0; a < LayerSize; a++)
518-
{
519-
if (RNNHelper.rand.NextDouble() < Dropout)
520-
{
521-
mask[a] = true;
522-
}
523-
last_bptt_hidden.cellOutput[a] = cellOutput[a];
524-
last_bptt_hidden.er[a] = 0;
525-
}
526-
}
527-
else
503+
for (int a = 0; a < LayerSize; a++)
528504
{
529-
for (int a = 0; a < LayerSize; a++)
530-
{
531-
last_bptt_hidden.cellOutput[a] = cellOutput[a];
532-
last_bptt_hidden.er[a] = 0;
533-
}
505+
last_bptt_hidden.cellOutput[a] = cellOutput[a];
506+
last_bptt_hidden.er[a] = 0;
534507
}
535508

536509
Array.Clear(bptt_inputs, 0, MAX_RNN_HIST);

RNNSharp/BiRNN.cs

+12-25
Original file line numberDiff line numberDiff line change
@@ -198,15 +198,21 @@ private SimpleLayer[] ComputeTopLayer(Sequence pSequence, SimpleLayer[] lastLaye
198198
Parallel.For(0, numStates, parallelOption, curState =>
199199
{
200200
State state = pSequence.States[curState];
201+
201202
seqFinalOutput[curState] = new SimpleLayer(OutputLayer.LayerSize);
202203
SimpleLayer outputCells = seqFinalOutput[curState];
203204

204205
outputCells.DenseWeights = OutputLayer.DenseWeights;
205206
outputCells.DenseWeightsLearningRate = OutputLayer.DenseWeightsLearningRate;
206207
outputCells.DenseFeatureSize = OutputLayer.DenseFeatureSize;
208+
209+
outputCells.SparseWeights = OutputLayer.SparseWeights;
210+
outputCells.SparseWeightsLearningRate = OutputLayer.SparseWeightsLearningRate;
211+
outputCells.SparseFeatureSize = OutputLayer.SparseFeatureSize;
212+
207213
outputCells.computeLayer(state.SparseData, lastLayer[curState].cellOutput, isTrain);
208214
outputCells.cellOutput.CopyTo(tmp_rawOutputLayer[curState], 0);
209-
outputCells.Softmax();
215+
outputCells.Softmax(isTrain);
210216

211217
});
212218

@@ -252,9 +258,8 @@ private SimpleLayer[] ComputeLayers(Sequence pSequence, bool isTrain, out List<S
252258
/// </summary>
253259
/// <param name="pSequence"></param>
254260
/// <param name="seqFinalOutput"></param>
255-
/// <param name="isCRF"></param>
256261
/// <returns></returns>
257-
private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, out List<double[][]> fErrLayers, out List<double[][]> bErrLayers, bool isCRF = false)
262+
private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, out List<double[][]> fErrLayers, out List<double[][]> bErrLayers)
258263
{
259264
int numStates = pSequence.States.Length;
260265
int numLayers = forwardHiddenLayers.Count;
@@ -264,24 +269,7 @@ private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, ou
264269
{
265270
int label = pSequence.States[curState].Label;
266271
SimpleLayer layer = seqFinalOutput[curState];
267-
268-
if (isCRF == false)
269-
{
270-
for (int c = 0; c < layer.LayerSize; c++)
271-
{
272-
layer.er[c] = -layer.cellOutput[c];
273-
}
274-
layer.er[label] = 1.0 - layer.cellOutput[label];
275-
}
276-
else
277-
{
278-
double[] CRFOutputLayer = CRFSeqOutput[curState];
279-
for (int c = 0; c < layer.LayerSize; c++)
280-
{
281-
layer.er[c] = -CRFOutputLayer[c];
282-
}
283-
layer.er[label] = 1 - CRFOutputLayer[label];
284-
}
272+
layer.ComputeLayerErr(CRFSeqOutput, pSequence.States[curState], curState);
285273
}
286274

287275
//Now we already have err in output layer, let's pass them back to other layers
@@ -391,10 +379,9 @@ private void DeepLearningNet(Sequence pSequence, SimpleLayer[] seqOutput, List<d
391379
/// <param name="pSequence"></param>
392380
/// <param name="runningMode"></param>
393381
/// <returns></returns>
394-
public override Matrix<double> ProcessSequence(Sequence pSequence, RunningMode runningMode)
382+
public override int[] ProcessSequence(Sequence pSequence, RunningMode runningMode, bool outputRawScore, out Matrix<double> rawOutputLayer)
395383
{
396384
List<SimpleLayer[]> layerList;
397-
Matrix<double> rawOutputLayer;
398385

399386
//Forward process from bottom layer to top layer
400387
SimpleLayer[] seqOutput = ComputeLayers(pSequence, runningMode == RunningMode.Train, out layerList, out rawOutputLayer);
@@ -416,7 +403,7 @@ public override Matrix<double> ProcessSequence(Sequence pSequence, RunningMode r
416403
DeepLearningNet(pSequence, seqOutput, fErrLayers, bErrLayers, layerList);
417404
}
418405

419-
return rawOutputLayer;
406+
return GetBestResult(rawOutputLayer);
420407
}
421408

422409
/// <summary>
@@ -453,7 +440,7 @@ public override int[] ProcessSequenceCRF(Sequence pSequence, RunningMode running
453440

454441
List<double[][]> fErrLayers;
455442
List<double[][]> bErrLayers;
456-
ComputeDeepErr(pSequence, seqOutput, out fErrLayers, out bErrLayers, true);
443+
ComputeDeepErr(pSequence, seqOutput, out fErrLayers, out bErrLayers);
457444
DeepLearningNet(pSequence, seqOutput, fErrLayers, bErrLayers, layerList);
458445
}
459446

RNNSharp/DropoutLayer.cs

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace RNNSharp
8+
{
9+
class DropoutLayer : SimpleLayer
10+
{
11+
bool[] mask;
12+
ModelSetting m_modelSetting;
13+
Random rnd;
14+
15+
public DropoutLayer(int hiddenLayerSize, ModelSetting modelSetting) : base(hiddenLayerSize)
16+
{
17+
rnd = new Random();
18+
m_modelSetting = modelSetting;
19+
}
20+
21+
public DropoutLayer()
22+
{
23+
rnd = new Random();
24+
}
25+
26+
public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
27+
{
28+
if (LayerSize != denseFeature.Length)
29+
{
30+
throw new Exception("The layer size of dropout layer must be equal to its denseFeature size.");
31+
}
32+
33+
if (isTrain == true)
34+
{
35+
mask = new bool[LayerSize];
36+
for (int i = 0; i < LayerSize; i++)
37+
{
38+
double val = rnd.NextDouble();
39+
if (val < m_modelSetting.Dropout)
40+
{
41+
mask[i] = true;
42+
cellOutput[i] = 0;
43+
}
44+
else
45+
{
46+
mask[i] = false;
47+
cellOutput[i] = denseFeature[i];
48+
}
49+
}
50+
}
51+
else
52+
{
53+
for (int i = 0; i < LayerSize; i++)
54+
{
55+
cellOutput[i] = (1.0 - m_modelSetting.Dropout) * denseFeature[i];
56+
}
57+
}
58+
}
59+
60+
public override void LearnFeatureWeights(int numStates, int curState)
61+
{
62+
63+
}
64+
65+
public override void ComputeLayerErr(SimpleLayer nextLayer, double[] destErrLayer, double[] srcErrLayer)
66+
{
67+
//error output->hidden for words from specific class
68+
RNNHelper.matrixXvectorADDErr(destErrLayer, srcErrLayer, nextLayer.DenseWeights, LayerSize, nextLayer.LayerSize);
69+
70+
for (int i = 0; i < LayerSize; i++)
71+
{
72+
if (mask[i] == true)
73+
{
74+
destErrLayer[i] = 0;
75+
}
76+
}
77+
}
78+
79+
public override void ComputeLayerErr(SimpleLayer nextLayer)
80+
{
81+
//error output->hidden for words from specific class
82+
RNNHelper.matrixXvectorADDErr(er, nextLayer.er, nextLayer.DenseWeights, LayerSize, nextLayer.LayerSize);
83+
84+
//Apply drop out on error in hidden layer
85+
for (int i = 0; i < LayerSize; i++)
86+
{
87+
if (mask[i] == true)
88+
{
89+
er[i] = 0;
90+
}
91+
}
92+
}
93+
}
94+
}

0 commit comments

Comments
 (0)