Skip to content

Commit df4406b

Browse files
committed
#1. Fix the bug that incremental training is crashed.
#2. Delete useless code
1 parent ebc087f commit df4406b

File tree

4 files changed

+4
-274
lines changed

4 files changed

+4
-274
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -367,5 +367,5 @@ RNNSharp is a pure C# project, so it can be compiled by .NET Core and Mono, and
367367
The RNNSharp also provides some APIs for developers to leverage it into their projects. By download source code package and open RNNSharpConsole project, you will see how to use APIs in your project to encode and decode RNN models. Note that, before use RNNSharp APIs, you should add RNNSharp.dll as reference into your project.
368368

369369
## RNNSharp referenced by the following published papers
370-
1. [Project-Team IntuiDoc: Intuitive user interaction for document](https://www.irisa.fr/intuidoc/data/ra/intuidoc2015.pdf)
371-
2. [A New Pre-training Method for Training Deep Learning Models with Application to Spoken Language Understanding](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/07/IS2016.CameraReady-1.pdf)
370+
1. [Project-Team IntuiDoc: Intuitive user interaction for document](https://www.irisa.fr/intuidoc/data/ra/intuidoc2015.pdf)
371+
2. [A New Pre-training Method for Training Deep Learning Models with Application to Spoken Language Understanding](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/07/IS2016.CameraReady-1.pdf)

RNNSharp/Layers/LSTMLayer.cs

-17
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ public class LSTMLayer : SimpleLayer
114114
//Z - wInputCell
115115
//W - wInputOutputGate
116116
protected Vector4[][] sparseFeatureWeights;
117-
// protected Vector3[][] sparseFeatureToHiddenDeri;
118117
protected Vector4[][] sparseFeatureToHiddenLearningRate;
119-
120118
protected Dictionary<int, Vector3>[] sparseFeatureToHiddenDeri;
121119

122120

@@ -129,14 +127,12 @@ public class LSTMLayer : SimpleLayer
129127
private Vector4 vecNormalLearningRate;
130128
private Vector3 vecNormalLearningRate3;
131129
private Vector<float> vecNormalLearningRateFloat;
132-
// protected float[] previousCellOutputs;
133130

134131
LSTMLayerConfig config;
135132

136133
public LSTMLayer(LSTMLayerConfig config) : base(config)
137134
{
138135
this.config = config;
139-
// previousCellOutputs = new float[LayerSize];
140136
LSTMCells = new LSTMCell[LayerSize];
141137
for (var i = 0; i < LayerSize; i++)
142138
{
@@ -155,23 +151,18 @@ public override SimpleLayer CreateLayerSharedWegiths()
155151
public override Neuron CopyNeuronTo(Neuron neuron)
156152
{
157153
LSTMNeuron lstmNeuron = neuron as LSTMNeuron;
158-
159154
Cells.CopyTo(lstmNeuron.Cells, 0);
160-
// previousCellOutputs.CopyTo(lstmNeuron.PrevCellOutputs, 0);
161155
for (int i = 0; i < LayerSize; i++)
162156
{
163157
lstmNeuron.LSTMCells[i].Set(LSTMCells[i]);
164158
}
165159

166-
167160
return lstmNeuron;
168161
}
169162

170163
public override void PreUpdateWeights(Neuron neuron, float[] errs)
171164
{
172165
LSTMNeuron lstmNeuron = neuron as LSTMNeuron;
173-
// lstmNeuron.Cells.CopyTo(Cells, 0);
174-
// lstmNeuron.PrevCellOutputs.CopyTo(previousCellOutputs, 0);
175166
for (int i = 0; i < LayerSize; i++)
176167
{
177168
LSTMCells[i].Set(lstmNeuron.LSTMCells[i]);
@@ -206,12 +197,6 @@ public override void InitializeInternalTrainingParameters()
206197
{
207198
if (SparseFeatureSize > 0)
208199
{
209-
//sparseFeatureToHiddenDeri = new Vector3[LayerSize][];
210-
//for (var i = 0; i < LayerSize; i++)
211-
//{
212-
// sparseFeatureToHiddenDeri[i] = new Vector3[SparseFeatureSize];
213-
//}
214-
215200
sparseFeatureToHiddenDeri = new Dictionary<int, Vector3>[LayerSize];
216201
for (var i = 0; i < LayerSize; i++)
217202
{
@@ -729,7 +714,6 @@ public override void ForwardPass(SparseVector sparseFeature, float[] denseFeatur
729714
//hidden(t-1) -> hidden(t)
730715
cell_j.previousCellState = cell_j.cellState;
731716
cell_j.previousCellOutput = Cells[j];
732-
// previousCellOutputs[j] = Cells[j];
733717

734718
var vecCell_j = Vector4.Zero;
735719

@@ -1068,7 +1052,6 @@ public override void Reset()
10681052

10691053
private void InitializeLSTMCell(LSTMCell c, LSTMCellWeight cw, LSTMCellWeightDeri deri)
10701054
{
1071-
// c.previousCellState = 0;
10721055
c.cellState = 0;
10731056

10741057
//partial derivatives

RNNSharp/Networks/BiRNN.cs

+1-10
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ public override void LoadModel(string filename, bool bTrain = false)
636636
if (bTrain)
637637
{
638638
OutputLayer.SetRunningMode(RunningMode.Training);
639+
OutputLayer.InitializeInternalTrainingParameters();
639640
}
640641
else
641642
{
@@ -649,15 +650,5 @@ public override void LoadModel(string filename, bool bTrain = false)
649650
}
650651
}
651652
}
652-
653-
//public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode)
654-
//{
655-
// throw new NotImplementedException();
656-
//}
657-
658-
//public override int[] TestSeq2Seq(Sentence srcSentence, Config featurizer)
659-
//{
660-
// throw new NotImplementedException();
661-
//}
662653
}
663654
}

RNNSharp/Networks/FowardRNN.cs

+1-245
Original file line numberDiff line numberDiff line change
@@ -112,251 +112,6 @@ public override float[][] ComputeTopHiddenLayerOutput(Sequence pSequence)
112112
return outputs;
113113
}
114114

115-
///// <summary>
116-
///// Extract features from source sequence
117-
///// </summary>
118-
///// <param name="decoder"></param>
119-
///// <param name="srcSequence"></param>
120-
///// <param name="targetSparseFeatureSize"></param>
121-
///// <param name="srcHiddenAvgOutput"></param>
122-
///// <param name="srcSparseFeatures"></param>
123-
//private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize)
124-
//{
125-
// //Extract dense features from source sequence
126-
// var srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence);
127-
// int srcSequenceDenseFeatureSize = srcOutputs[0].Length;
128-
// int srcSequenceLength = srcOutputs.Length - 1;
129-
130-
// if (srcHiddenAvgOutput == null)
131-
// {
132-
// srcHiddenAvgOutput = new float[srcSequenceDenseFeatureSize * 2];
133-
// }
134-
135-
// var j = 0;
136-
// float[] srcOutputForward = srcOutputs[0];
137-
// float[] srcOutputBackward = srcOutputs[srcSequenceLength];
138-
// while (j < srcSequenceDenseFeatureSize - Vector<float>.Count)
139-
// {
140-
// var vForward = new Vector<float>(srcOutputForward, j);
141-
// var vBackward = new Vector<float>(srcOutputBackward, j);
142-
143-
// vForward.CopyTo(srcHiddenAvgOutput, j);
144-
// vBackward.CopyTo(srcHiddenAvgOutput, srcSequenceDenseFeatureSize + j);
145-
146-
// j += Vector<float>.Count;
147-
// }
148-
149-
// while (j < srcSequenceDenseFeatureSize)
150-
// {
151-
// srcHiddenAvgOutput[j] = srcOutputForward[j];
152-
// srcHiddenAvgOutput[srcSequenceDenseFeatureSize + j] = srcOutputBackward[j];
153-
// j++;
154-
// }
155-
156-
// //Extract sparse features from source sequence
157-
// if (srcSparseFeatures == null)
158-
// {
159-
// srcSparseFeatures = new Dictionary<int, float>();
160-
// }
161-
// else
162-
// {
163-
// srcSparseFeatures.Clear();
164-
// }
165-
166-
// for (var i = 0; i < srcSequence.States.Length; i++)
167-
// {
168-
// foreach (var kv in srcSequence.States[i].SparseFeature)
169-
// {
170-
// var srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize;
171-
172-
// if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false)
173-
// {
174-
// srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value);
175-
// }
176-
// else
177-
// {
178-
// srcSparseFeatures[srcSparseFeatureIndex] += kv.Value;
179-
// }
180-
// }
181-
// }
182-
//}
183-
184-
//public override int[] TestSeq2Seq(Sentence srcSentence, Config featurizer)
185-
//{
186-
// var curState = featurizer.BuildState(new[] { "<s>" });
187-
// curState.Label = featurizer.TagSet.GetIndex("<s>");
188-
189-
// //Reset all layers
190-
// foreach (var layer in HiddenLayerList)
191-
// {
192-
// layer.Reset();
193-
// }
194-
195-
// //Extract features from source sentence
196-
// var srcSequence = featurizer.Seq2SeqAutoEncoder.Config.BuildSequence(srcSentence);
197-
198-
// ExtractSourceSentenceFeature(featurizer.Seq2SeqAutoEncoder, srcSequence, curState.SparseFeature.Length);
199-
200-
// var numLayers = HiddenLayerList.Count;
201-
// var predicted = new List<int> { curState.Label };
202-
203-
// CreateDenseFeatureList();
204-
// for (int i = 0; i < numLayers; i++)
205-
// {
206-
// srcHiddenAvgOutput.CopyTo(denseFeaturesList[i], 0);
207-
// }
208-
// srcHiddenAvgOutput.CopyTo(denseFeaturesList[numLayers], 0);
209-
210-
// var sparseVector = new SparseVector();
211-
// while (true)
212-
// {
213-
// //Build sparse features
214-
// sparseVector.Clean();
215-
// sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize);
216-
// sparseVector.AddKeyValuePairData(curState.SparseFeature);
217-
// sparseVector.AddKeyValuePairData(srcSparseFeatures);
218-
219-
// //Compute first layer
220-
// curState.DenseFeature.CopyTo().CopyTo(denseFeaturesList[0], srcHiddenAvgOutput.Length);
221-
// HiddenLayerList[0].ForwardPass(sparseVector, denseFeaturesList[0]);
222-
223-
// //Compute middle layers
224-
// for (var i = 1; i < numLayers; i++)
225-
// {
226-
// //We use previous layer's output as dense feature for current layer
227-
// HiddenLayerList[i - 1].Cells.CopyTo(denseFeaturesList[i], srcHiddenAvgOutput.Length);
228-
// HiddenLayerList[i].ForwardPass(sparseVector, denseFeaturesList[i]);
229-
// }
230-
231-
// //Compute output layer
232-
// HiddenLayerList[numLayers - 1].Cells.CopyTo(denseFeaturesList[numLayers], srcHiddenAvgOutput.Length);
233-
// OutputLayer.ForwardPass(sparseVector, denseFeaturesList[numLayers]);
234-
235-
// var nextTagId = OutputLayer.GetBestOutputIndex();
236-
// var nextWord = featurizer.TagSet.GetTagName(nextTagId);
237-
238-
// curState = featurizer.BuildState(new[] { nextWord });
239-
// curState.Label = nextTagId;
240-
241-
// predicted.Add(nextTagId);
242-
243-
// if (nextWord == "</s>" || predicted.Count >= 100)
244-
// {
245-
// break;
246-
// }
247-
// }
248-
249-
// return predicted.ToArray();
250-
//}
251-
252-
//List<float[]> denseFeaturesList = null;
253-
//float[] srcHiddenAvgOutput;
254-
//Dictionary<int, float> srcSparseFeatures;
255-
//private void CreateDenseFeatureList()
256-
//{
257-
// if (denseFeaturesList == null)
258-
// {
259-
// denseFeaturesList = new List<float[]>();
260-
// for (int i = 0; i < HiddenLayerList.Count; i++)
261-
// {
262-
// denseFeaturesList.Add(new float[2048]);
263-
// }
264-
265-
// denseFeaturesList.Add(new float[2048]);
266-
// }
267-
//}
268-
269-
//public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode)
270-
//{
271-
// var tgtSequence = pSequence.tgtSequence;
272-
273-
// //Reset all layers
274-
// foreach (var layer in HiddenLayerList)
275-
// {
276-
// layer.Reset();
277-
// }
278-
279-
// Sequence srcSequence;
280-
281-
// //Extract features from source sentences
282-
// srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence);
283-
// ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize);
284-
285-
// var numStates = pSequence.tgtSequence.States.Length;
286-
// var numLayers = HiddenLayerList.Count;
287-
// var predicted = new int[numStates];
288-
289-
// //Set target sentence labels into short list in output layer
290-
// OutputLayer.LabelShortList.Clear();
291-
// foreach (var state in tgtSequence.States)
292-
// {
293-
// OutputLayer.LabelShortList.Add(state.Label);
294-
// }
295-
296-
// CreateDenseFeatureList();
297-
// for (int i = 0; i < numLayers; i++)
298-
// {
299-
// srcHiddenAvgOutput.CopyTo(denseFeaturesList[i], 0);
300-
// }
301-
// srcHiddenAvgOutput.CopyTo(denseFeaturesList[numLayers], 0);
302-
303-
// var sparseVector = new SparseVector();
304-
// for (var curState = 0; curState < numStates; curState++)
305-
// {
306-
// //Build runtime features
307-
// var state = tgtSequence.States[curState];
308-
// SetRuntimeFeatures(state, curState, numStates, predicted);
309-
310-
// //Build sparse features for all layers
311-
// sparseVector.Clean();
312-
// sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize);
313-
// sparseVector.AddKeyValuePairData(state.SparseFeature);
314-
// sparseVector.AddKeyValuePairData(srcSparseFeatures);
315-
316-
// //Compute first layer
317-
// state.DenseFeature.CopyTo().CopyTo(denseFeaturesList[0], srcHiddenAvgOutput.Length);
318-
// HiddenLayerList[0].ForwardPass(sparseVector, denseFeaturesList[0]);
319-
320-
// //Compute middle layers
321-
// for (var i = 1; i < numLayers; i++)
322-
// {
323-
// //We use previous layer's output as dense feature for current layer
324-
// HiddenLayerList[i - 1].Cells.CopyTo(denseFeaturesList[i], srcHiddenAvgOutput.Length);
325-
// HiddenLayerList[i].ForwardPass(sparseVector, denseFeaturesList[i]);
326-
// }
327-
328-
// //Compute output layer
329-
// HiddenLayerList[numLayers - 1].Cells.CopyTo(denseFeaturesList[numLayers], srcHiddenAvgOutput.Length);
330-
// OutputLayer.ForwardPass(sparseVector, denseFeaturesList[numLayers]);
331-
332-
// predicted[curState] = OutputLayer.GetBestOutputIndex();
333-
334-
// if (runningMode == RunningMode.Training)
335-
// {
336-
// // error propogation
337-
// OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState);
338-
339-
// //propogate errors to each layer from output layer to input layer
340-
// HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer);
341-
// for (var i = numLayers - 2; i >= 0; i--)
342-
// {
343-
// HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]);
344-
// }
345-
346-
// //Update net weights
347-
// OutputLayer.BackwardPass();
348-
349-
// for (var i = 0; i < numLayers; i++)
350-
// {
351-
// HiddenLayerList[i].BackwardPass();
352-
// }
353-
354-
// }
355-
// }
356-
357-
// return predicted;
358-
//}
359-
360115
public override int[] ProcessSequence(ISentence sentence, Config featurizer, RunningMode runningMode, bool outputRawScore, out Matrix<float> m)
361116
{
362117
var seq = featurizer.BuildSequence(sentence as Sentence);
@@ -560,6 +315,7 @@ public override void LoadModel(string filename, bool bTrain = false)
560315
if (bTrain)
561316
{
562317
OutputLayer.SetRunningMode(RunningMode.Training);
318+
OutputLayer.InitializeInternalTrainingParameters();
563319
}
564320
else
565321
{

0 commit comments

Comments
 (0)