@@ -30,10 +30,6 @@ public class LSTMCell : SimpleCell
30
30
public double wCellForget ;
31
31
public double wCellOut ;
32
32
33
- public float dCellInLearningRate ;
34
- public float dCellForgetLearningRate ;
35
- public float dCellOutLearningRate ;
36
-
37
33
//partial derivatives
38
34
public double dSWCellIn ;
39
35
public double dSWCellForget ;
@@ -52,22 +48,6 @@ public struct LSTMWeight
52
48
public float wInputOutputGate ;
53
49
}
54
50
55
- //public struct LSTMWeightLearningRate
56
- //{
57
- // public float dInputCellLearningRate;
58
- // public float dInputInputGateLearningRate;
59
- // public float dInputForgetGateLearningRate;
60
- // public float dInputOutputGateLearningRate;
61
- //}
62
-
63
- //public struct LSTMWeightDerivative
64
- //{
65
- // //partial derivatives. dont need partial derivative for output gate as it uses BP not RTRL
66
- // public double dSInputCell;
67
- // public double dSInputInputGate;
68
- // public double dSInputForgetGate;
69
- //}
70
-
71
51
public class LSTMRNN : RNN
72
52
{
73
53
public LSTMCell [ ] neuHidden ; //neurons in hidden layer
@@ -76,10 +56,15 @@ public class LSTMRNN : RNN
76
56
77
57
protected Vector4 [ ] [ ] Input2HiddenLearningRate ;
78
58
protected Vector4 [ ] [ ] Feature2HiddenLearningRate ;
59
+ protected Vector3 [ ] CellLearningRate ;
79
60
80
61
protected Vector3 [ ] [ ] input2hiddenDeri ;
81
62
protected Vector3 [ ] [ ] feature2hiddenDeri ;
82
63
64
+ private Vector4 vecLearningRate ;
65
+ private Vector3 vecLearningRate3 ;
66
+
67
+
83
68
public LSTMRNN ( )
84
69
{
85
70
ModelType = MODELTYPE . LSTM ;
@@ -368,7 +353,7 @@ public override void SaveModel(string filename)
368
353
//weight input->hidden
369
354
Logger . WriteLine ( "Saving input2hidden weights..." ) ;
370
355
saveLSTMWeight ( input2hidden , fo ) ;
371
-
356
+
372
357
if ( DenseFeatureSize > 0 )
373
358
{
374
359
//weight fea->hidden
@@ -453,7 +438,7 @@ public override void initWeights()
453
438
}
454
439
455
440
//Create and intialise the weights from hidden to output layer, these are just normal weights
456
- Hidden2OutputWeight = new Matrix < double > ( L2 , L1 ) ;
441
+ Hidden2OutputWeight = new Matrix < float > ( L2 , L1 ) ;
457
442
458
443
for ( int i = 0 ; i < Hidden2OutputWeight . Height ; i ++ )
459
444
{
@@ -499,12 +484,9 @@ public override void CleanStatus()
499
484
Feature2HiddenLearningRate = new Vector4 [ L1 ] [ ] ;
500
485
}
501
486
487
+ CellLearningRate = new Vector3 [ L1 ] ;
502
488
Parallel . For ( 0 , L1 , parallelOption , i =>
503
489
{
504
- neuHidden [ i ] . dCellForgetLearningRate = 0 ;
505
- neuHidden [ i ] . dCellInLearningRate = 0 ;
506
- neuHidden [ i ] . dCellOutLearningRate = 0 ;
507
-
508
490
Input2HiddenLearningRate [ i ] = new Vector4 [ L0 ] ;
509
491
510
492
if ( DenseFeatureSize > 0 )
@@ -515,6 +497,8 @@ public override void CleanStatus()
515
497
} ) ;
516
498
517
499
Hidden2OutputWeightLearningRate = new Matrix < float > ( L2 , L1 ) ;
500
+ vecLearningRate = new Vector4 ( LearningRate , LearningRate , LearningRate , LearningRate ) ;
501
+ vecLearningRate3 = new Vector3 ( LearningRate , LearningRate , LearningRate ) ;
518
502
}
519
503
520
504
public override void InitMem ( )
@@ -583,7 +567,7 @@ public override void ComputeHiddenLayerErr()
583
567
//find the error by find the product of the output errors and their weight connection.
584
568
SimpleCell cell = neuHidden [ i ] ;
585
569
586
- cell . er = 0.0 ;
570
+ cell . er = 0.0f ;
587
571
588
572
if ( cell . mask == false )
589
573
{
@@ -600,30 +584,22 @@ public override void LearnOutputWeight()
600
584
//update weights for hidden to output layer
601
585
Parallel . For ( 0 , L1 , parallelOption , i =>
602
586
{
603
- double cellOutput = neuHidden [ i ] . cellOutput ;
587
+ float cellOutput = neuHidden [ i ] . cellOutput ;
604
588
for ( int k = 0 ; k < L2 ; k ++ )
605
589
{
606
- double delta = NormalizeGradient ( cellOutput * OutputLayer . er [ k ] ) ;
607
- double newLearningRate = UpdateLearningRate ( Hidden2OutputWeightLearningRate , i , k , delta ) ;
590
+ float delta = NormalizeGradient ( cellOutput * OutputLayer . er [ k ] ) ;
591
+ double newLearningRate = UpdateLearningRate ( Hidden2OutputWeightLearningRate , k , i , delta ) ;
608
592
609
- Hidden2OutputWeight [ k ] [ i ] += newLearningRate * delta ;
593
+ Hidden2OutputWeight [ k ] [ i ] += ( float ) ( newLearningRate * delta ) ;
610
594
}
611
595
} ) ;
612
596
}
613
597
614
- public double UpdateLearningRate ( ref float mg , double delta )
615
- {
616
- double dg = mg + delta * delta ;
617
- mg = ( float ) dg ;
618
- return LearningRate / ( 1.0 + Math . Sqrt ( dg ) ) ;
619
- }
620
-
621
598
public override void LearnNet ( State state , int numStates , int curState )
622
599
{
623
600
//Get sparse feature and apply it into hidden layer
624
601
var sparse = state . SparseData ;
625
602
int sparseFeatureSize = sparse . Count ;
626
- Vector4 vecLearningRate = new Vector4 ( LearningRate , LearningRate , LearningRate , LearningRate ) ;
627
603
628
604
//put variables for derivaties in weight class and cell class
629
605
Parallel . For ( 0 , L1 , parallelOption , i =>
@@ -650,8 +626,6 @@ public override void LearnNet(State state, int numStates, int curState)
650
626
( float ) Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn ,
651
627
( float ) ci_previousCellState_mul_SigmoidDerivative_ci_netForget ) ;
652
628
653
- double delta = 0 ;
654
- double newLearningRate = 0 ;
655
629
for ( int k = 0 ; k < sparseFeatureSize ; k ++ )
656
630
{
657
631
var entry = sparse . GetEntry ( k ) ;
@@ -673,9 +647,7 @@ public override void LearnNet(State state, int numStates, int curState)
673
647
vecAlpha = wlr + vecAlpha ;
674
648
wlr_i [ entry . Key ] = vecAlpha ;
675
649
676
- vecAlpha = Vector4 . SquareRoot ( vecAlpha ) + Vector4 . One ;
677
- vecAlpha = vecLearningRate / vecAlpha ;
678
-
650
+ vecAlpha = vecLearningRate / ( Vector4 . SquareRoot ( vecAlpha ) + Vector4 . One ) ;
679
651
vecDelta = vecAlpha * vecDelta ;
680
652
681
653
w . wInputCell += vecDelta . X ;
@@ -713,9 +685,7 @@ public override void LearnNet(State state, int numStates, int curState)
713
685
vecAlpha = wlr + vecAlpha ;
714
686
wlr_i [ j ] = vecAlpha ;
715
687
716
- vecAlpha = Vector4 . SquareRoot ( vecAlpha ) + Vector4 . One ;
717
- vecAlpha = vecLearningRate / vecAlpha ;
718
-
688
+ vecAlpha = vecLearningRate / ( Vector4 . SquareRoot ( vecAlpha ) + Vector4 . One ) ;
719
689
vecDelta = vecAlpha * vecDelta ;
720
690
721
691
w . wInputCell += vecDelta . X ;
@@ -736,17 +706,22 @@ public override void LearnNet(State state, int numStates, int curState)
736
706
737
707
738
708
//update internal weights
739
- delta = cellStateError * c . dSWCellIn ;
740
- newLearningRate = UpdateLearningRate ( ref c . dCellInLearningRate , delta ) ;
741
- c . wCellIn += newLearningRate * delta ;
709
+ Vector3 vecCellDelta = new Vector3 ( ( float ) c . dSWCellIn , ( float ) c . dSWCellForget , ( float ) c . cellState ) ;
710
+ Vector3 vecCellErr = new Vector3 ( cellStateError , cellStateError , gradientOutputGate ) ;
711
+ Vector3 vecCellLearningRate = CellLearningRate [ i ] ;
712
+
713
+ vecCellDelta = vecCellErr * vecCellDelta ;
714
+ vecCellLearningRate += ( vecCellDelta * vecCellDelta ) ;
715
+ CellLearningRate [ i ] = vecCellLearningRate ;
716
+
717
+ //LearningRate / (1.0 + Math.Sqrt(dg));
718
+ vecCellLearningRate = vecLearningRate3 / ( Vector3 . One + Vector3 . SquareRoot ( vecCellLearningRate ) ) ;
719
+ vecCellDelta = vecCellLearningRate * vecCellDelta ;
742
720
743
- delta = cellStateError * c . dSWCellForget ;
744
- newLearningRate = UpdateLearningRate ( ref c . dCellForgetLearningRate , delta ) ;
745
- c . wCellForget += newLearningRate * delta ;
721
+ c . wCellIn += vecCellDelta . X ;
722
+ c . wCellForget += vecCellDelta . Y ;
723
+ c . wCellOut += vecCellDelta . Z ;
746
724
747
- delta = gradientOutputGate * c . cellState ;
748
- newLearningRate = UpdateLearningRate ( ref c . dCellOutLearningRate , delta ) ;
749
- c . wCellOut += newLearningRate * delta ;
750
725
751
726
neuHidden [ i ] = c ;
752
727
} ) ;
@@ -833,15 +808,15 @@ public override void computeHiddenLayer(State state, bool isTrain = true)
833
808
//squash output gate
834
809
cell_j . yOut = Sigmoid ( cell_j . netOut ) ;
835
810
836
- cell_j . cellOutput = cell_j . cellState * cell_j . yOut ;
811
+ cell_j . cellOutput = ( float ) ( cell_j . cellState * cell_j . yOut ) ;
837
812
838
813
839
814
neuHidden [ j ] = cell_j ;
840
815
} ) ;
841
816
}
842
817
843
818
844
- public override void computeOutput ( double [ ] doutput )
819
+ public override void computeOutput ( float [ ] doutput )
845
820
{
846
821
matrixXvectorADD ( OutputLayer , neuHidden , Hidden2OutputWeight , L2 , L1 , 0 ) ;
847
822
if ( doutput != null )
0 commit comments