zhongkaifu
diff --git a/‎RNNSharp/BPTTLayer.cs
+16-43 b/‎RNNSharp/BPTTLayer.cs
+16-43
diff --git a/‎RNNSharp/BiRNN.cs
+12-25 b/‎RNNSharp/BiRNN.cs
+12-25
diff --git a/‎RNNSharp/DropoutLayer.cs
+94 b/‎RNNSharp/DropoutLayer.cs
+94
@@ -25,9 +25,10 @@ public class BPTTLayer : SimpleLayer
         protected Matrix<double> BpttWeightsDelta { get; set; }
         protected Matrix<double> BpttWeightsLearningRate { get; set; }
 
-        public BPTTLayer(int hiddenLayerSize) : base(hiddenLayerSize)
+        public BPTTLayer(int hiddenLayerSize, ModelSetting modelsetting) : base(hiddenLayerSize)
         {
-            
+            bptt = modelsetting.Bptt + 1;
+            bptt_block = 10;
         }
 
         public BPTTLayer()
@@ -55,7 +56,8 @@ public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSi
             BpttWeights = new Matrix<double>(LayerSize, LayerSize);
             BpttWeightsDelta = new Matrix<double>(LayerSize, LayerSize);
 
-            Logger.WriteLine("Initializing weights, random value is {0}", RNNHelper.rand.NextDouble());
+            Logger.WriteLine("Initializing weights, sparse feature size: {0}, dense feature size: {1}, random value is {2}", 
+                SparseFeatureSize, DenseFeatureSize, RNNHelper.rand.NextDouble());
             initWeights();
 
             //Initialize BPTT
@@ -206,36 +208,24 @@ public override void computeLayer(SparseVector sparseFeature, double[] denseFeat
             }
 
             //activate layer
-            activityLayer(isTrain);
+            activityLayer();
         }
 
-        private void activityLayer(bool isTrain)
+        private void activityLayer()
         {
             Parallel.For(0, LayerSize, parallelOption, a =>
             {
                 double score = cellOutput[a];
-                if (mask[a] == true)
+                if (score > 50)
                 {
-                    score = 0;
+                    score = 50;  //for numerical stability
                 }
-                else
+                else if (score < -50)
                 {
-                    if (isTrain == false)
-                    {
-                        score = score * (1.0 - Dropout);
-                    }
-
-                    if (score > 50)
-                    {
-                        score = 50;  //for numerical stability
-                    }
-                    else if (score < -50)
-                    {
-                        score = -50;  //for numerical stability
-                    }
-
-                    score = 1.0 / (1.0 + Math.Exp(-score));
+                    score = -50;  //for numerical stability
                 }
+
+                score = 1.0 / (1.0 + Math.Exp(-score));
                 cellOutput[a] = score;
             });
         }
@@ -268,7 +258,6 @@ public override void LearnFeatureWeights(int numStates, int curState)
             {
                 last_bptt_hidden.cellOutput[i] = cellOutput[i];
                 last_bptt_hidden.er[i] = er[i];
-                last_bptt_hidden.mask[i] = mask[i];
             }
 
             for (int i = 0; i < DenseFeatureSize; i++)
@@ -505,32 +494,16 @@ public override void netReset(bool updateNet = false)   //cleans hidden layer ac
             for (int a = 0; a < LayerSize; a++)
             {
                 cellOutput[a] = 0.1;
-                mask[a] = false;
             }
 
             if (updateNet == true)
             {
                 //Train mode
                 SimpleLayer last_bptt_hidden = bptt_hidden[0];
-                if (Dropout > 0)
-                {
-                    for (int a = 0; a < LayerSize; a++)
-                    {
-                        if (RNNHelper.rand.NextDouble() < Dropout)
-                        {
-                            mask[a] = true;
-                        }
-                        last_bptt_hidden.cellOutput[a] = cellOutput[a];
-                        last_bptt_hidden.er[a] = 0;
-                    }
-                }
-                else
+                for (int a = 0; a < LayerSize; a++)
                 {
-                    for (int a = 0; a < LayerSize; a++)
-                    {
-                        last_bptt_hidden.cellOutput[a] = cellOutput[a];
-                        last_bptt_hidden.er[a] = 0;
-                    }
+                    last_bptt_hidden.cellOutput[a] = cellOutput[a];
+                    last_bptt_hidden.er[a] = 0;
                 }
 
                 Array.Clear(bptt_inputs, 0, MAX_RNN_HIST);
 
@@ -198,15 +198,21 @@ private SimpleLayer[] ComputeTopLayer(Sequence pSequence, SimpleLayer[] lastLaye
             Parallel.For(0, numStates, parallelOption, curState =>
             {
                 State state = pSequence.States[curState];
+
                 seqFinalOutput[curState] = new SimpleLayer(OutputLayer.LayerSize);
                 SimpleLayer outputCells = seqFinalOutput[curState];
 
                 outputCells.DenseWeights = OutputLayer.DenseWeights;
                 outputCells.DenseWeightsLearningRate = OutputLayer.DenseWeightsLearningRate;
                 outputCells.DenseFeatureSize = OutputLayer.DenseFeatureSize;
+
+                outputCells.SparseWeights = OutputLayer.SparseWeights;
+                outputCells.SparseWeightsLearningRate = OutputLayer.SparseWeightsLearningRate;
+                outputCells.SparseFeatureSize = OutputLayer.SparseFeatureSize;
+
                 outputCells.computeLayer(state.SparseData, lastLayer[curState].cellOutput, isTrain);
                 outputCells.cellOutput.CopyTo(tmp_rawOutputLayer[curState], 0);
-                outputCells.Softmax();
+                outputCells.Softmax(isTrain);
 
             });
 
@@ -252,9 +258,8 @@ private SimpleLayer[] ComputeLayers(Sequence pSequence, bool isTrain, out List<S
         /// </summary>
         /// <param name="pSequence"></param>
         /// <param name="seqFinalOutput"></param>
-        /// <param name="isCRF"></param>
         /// <returns></returns>
-        private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, out List<double[][]> fErrLayers, out List<double[][]> bErrLayers, bool isCRF = false)
+        private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, out List<double[][]> fErrLayers, out List<double[][]> bErrLayers)
         {
             int numStates = pSequence.States.Length;
             int numLayers = forwardHiddenLayers.Count;
@@ -264,24 +269,7 @@ private void ComputeDeepErr(Sequence pSequence, SimpleLayer[] seqFinalOutput, ou
             {
                 int label = pSequence.States[curState].Label;
                 SimpleLayer layer = seqFinalOutput[curState];
-
-                if (isCRF == false)
-                {
-                    for (int c = 0; c < layer.LayerSize; c++)
-                    {
-                        layer.er[c] = -layer.cellOutput[c];
-                    }
-                    layer.er[label] = 1.0 - layer.cellOutput[label];
-                }
-                else
-                {
-                    double[] CRFOutputLayer = CRFSeqOutput[curState];
-                    for (int c = 0; c < layer.LayerSize; c++)
-                    {
-                        layer.er[c] = -CRFOutputLayer[c];
-                    }
-                    layer.er[label] = 1 - CRFOutputLayer[label];
-                }
+                layer.ComputeLayerErr(CRFSeqOutput, pSequence.States[curState], curState);
             }
 
             //Now we already have err in output layer, let's pass them back to other layers
@@ -391,10 +379,9 @@ private void DeepLearningNet(Sequence pSequence, SimpleLayer[] seqOutput, List<d
         /// <param name="pSequence"></param>
         /// <param name="runningMode"></param>
         /// <returns></returns>
-        public override Matrix<double> ProcessSequence(Sequence pSequence, RunningMode runningMode)
+        public override int[] ProcessSequence(Sequence pSequence, RunningMode runningMode, bool outputRawScore, out Matrix<double> rawOutputLayer)
         {
             List<SimpleLayer[]> layerList;
-            Matrix<double> rawOutputLayer;
 
             //Forward process from bottom layer to top layer
             SimpleLayer[] seqOutput = ComputeLayers(pSequence, runningMode == RunningMode.Train, out layerList, out rawOutputLayer);
@@ -416,7 +403,7 @@ public override Matrix<double> ProcessSequence(Sequence pSequence, RunningMode r
                 DeepLearningNet(pSequence, seqOutput, fErrLayers, bErrLayers, layerList);
             }
 
-            return rawOutputLayer;
+            return GetBestResult(rawOutputLayer);
         }
 
         /// <summary>
@@ -453,7 +440,7 @@ public override int[] ProcessSequenceCRF(Sequence pSequence, RunningMode running
 
                 List<double[][]> fErrLayers;
                 List<double[][]> bErrLayers;
-                ComputeDeepErr(pSequence, seqOutput, out fErrLayers, out bErrLayers, true);
+                ComputeDeepErr(pSequence, seqOutput, out fErrLayers, out bErrLayers);
                 DeepLearningNet(pSequence, seqOutput, fErrLayers, bErrLayers, layerList);
             }
 
 
@@ -0,0 +1,94 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace RNNSharp
+{
+    class DropoutLayer : SimpleLayer
+    {
+        bool[] mask;
+        ModelSetting m_modelSetting;
+        Random rnd;
+
+        public DropoutLayer(int hiddenLayerSize, ModelSetting modelSetting) : base(hiddenLayerSize)
+        {
+            rnd = new Random();
+            m_modelSetting = modelSetting;
+        }
+
+        public DropoutLayer()
+        {
+            rnd = new Random();
+        }
+
+        public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
+        {
+            if (LayerSize != denseFeature.Length)
+            {
+                throw new Exception("The layer size of dropout layer must be equal to its denseFeature size.");
+            }
+
+            if (isTrain == true)
+            {
+                mask = new bool[LayerSize];
+                for (int i = 0; i < LayerSize; i++)
+                {
+                    double val = rnd.NextDouble();
+                    if (val < m_modelSetting.Dropout)
+                    {
+                        mask[i] = true;
+                        cellOutput[i] = 0;
+                    }
+                    else
+                    {
+                        mask[i] = false;
+                        cellOutput[i] = denseFeature[i];
+                    }
+                }
+            }
+            else
+            {
+                for (int i = 0; i < LayerSize; i++)
+                {
+                    cellOutput[i] = (1.0 - m_modelSetting.Dropout) * denseFeature[i];
+                }
+            }
+        }
+
+        public override void LearnFeatureWeights(int numStates, int curState)
+        {
+
+        }
+
+        public override void ComputeLayerErr(SimpleLayer nextLayer, double[] destErrLayer, double[] srcErrLayer)
+        {
+            //error output->hidden for words from specific class    	
+            RNNHelper.matrixXvectorADDErr(destErrLayer, srcErrLayer, nextLayer.DenseWeights, LayerSize, nextLayer.LayerSize);
+
+            for (int i = 0; i < LayerSize; i++)
+            {
+                if (mask[i] == true)
+                {
+                    destErrLayer[i] = 0;
+                }
+            }
+        }
+
+        public override void ComputeLayerErr(SimpleLayer nextLayer)
+        {
+            //error output->hidden for words from specific class    	
+            RNNHelper.matrixXvectorADDErr(er, nextLayer.er, nextLayer.DenseWeights, LayerSize, nextLayer.LayerSize);
+
+            //Apply drop out on error in hidden layer
+            for (int i = 0; i < LayerSize; i++)
+            {
+                if (mask[i] == true)
+                {
+                    er[i] = 0;
+                }
+            }
+        }
+    }
+}