FluxML · Chandu-4444 · Jul 19, 2022 · Jul 19, 2022 · Jul 21, 2022 · Jul 21, 2022
diff --git a/FastText/src/models.jl b/FastText/src/models.jl
@@ -1,5 +1,17 @@
 function blockmodel(inblock::NumberVector, outblock::OneHotTensor, backbone; k = 10)
 
     classifier = TextClassifier(backbone)
-    return (input) -> model(input, k = k, classifier = classifier)
+    return classifier
+end
+
+
+function (b::TextClassifier)(input)
+    k = 10
+    Zygote.ignore() do
+        Flux.reset!(b.rnn_layers)
+        [b.rnn_layers(x) for x in input[1:(end - k)]]
+    end
+
+    # bptt
+    model = b.linear_layers([b.rnn_layers(x) for x in input[(end - k + 1):end]])
 end
diff --git a/FastText/src/models/custom_layers.jl b/FastText/src/models/custom_layers.jl
@@ -27,15 +27,15 @@ Moreover this also follows the Vartional DropOut citeria, that is,
 the drop mask is remains same for a whole training pass.
 This is done by saving the masks in 'maskWi' and 'maskWh' fields
 """
-mutable struct WeightDroppedLSTMCell{A,V,S}
+mutable struct WeightDroppedLSTMCell{A,V,S,T}
     Wi::A
     Wh::A
     b::V
     h::S
     c::S
     p::Float32
     active::Union{Bool,Nothing}
-    state0::Tuple{Matrix{Float32},Matrix{Float32}}
+    state0::T
 end
 
 function WeightDroppedLSTMCell(in::Integer, out::Integer, p::Float32 = 0.0f0;
@@ -92,15 +92,15 @@ julia> wd = WeightDroppedLSTM(4, 5, 0.3);
 """
 function WeightDroppedLSTM(a...; kw...)
     cell = WeightDroppedLSTMCell(a...; kw...)
-    maskWi = Flux.dropout_mask(Flux.rng_from_array(), cell.Wi, cell.p)
-    maskWh = Flux.dropout_mask(Flux.rng_from_array(), cell.Wh, cell.p)
+    maskWi = Flux.dropout_mask(Flux.rng_from_array(cell.Wi), cell.Wi, cell.p)
+    maskWh = Flux.dropout_mask(Flux.rng_from_array(cell.Wh), cell.Wh, cell.p)
     hidden = (cell.state0..., maskWi, maskWh)
     return Flux.Recur(cell, hidden)
 end
 
 function Flux.reset!(layer::Flux.Recur{<:WeightDroppedLSTMCell})
-    maskWi = Flux.dropout_mask(Flux.rng_from_array(), layer.cell.Wi, layer.cell.p)
-    maskWh = Flux.dropout_mask(Flux.rng_from_array(), layer.cell.Wh, layer.cell.p)
+    maskWi = Flux.dropout_mask(Flux.rng_from_array(layer.cell.Wi), layer.cell.Wi, layer.cell.p)
+    maskWh = Flux.dropout_mask(Flux.rng_from_array(layer.cell.Wh), layer.cell.Wh, layer.cell.p)
     layer.state = (layer.cell.state0..., maskWi, maskWh)
     return nothing
 end
@@ -136,7 +136,7 @@ testmode!(m::VarDropCell, mode = true) =
 
 function (vd::VarDropCell)((has_mask, mask), x)
     if Flux._isactive(vd)
-        mask = has_mask ? mask : Flux.dropout_mask(Flux.rng_from_array(), x, vd.p)
+        mask = has_mask ? mask : Flux.dropout_mask(Flux.rng_from_array(x), x, vd.p)
         return (true, mask), x .* mask
     elseif !has_mask
         return (has_mask, mask), x
@@ -180,10 +180,10 @@ To reset mask:
 
 julia> reset_masks!(de)
 """
-mutable struct DroppedEmbeddings{A,F}
+mutable struct DroppedEmbeddings{A,F,M}
     emb::A
     p::F
-    mask::Vector{Float32}
+    mask::M
     active::Union{Bool,Nothing}
 end
 
@@ -264,4 +264,4 @@ function (a::PooledDense)(x)
     meanpool = (sum(x, dims = 3) / size(x, 3))[:, :, 1]
     hc = cat(x[:, :, 1], maxpool, meanpool, dims = 1)
     σ.(W * hc .+ b)
-end
+end
diff --git a/FastText/src/models/pretrain_lm.jl b/FastText/src/models/pretrain_lm.jl
@@ -50,4 +50,4 @@ function LanguageModel(load_pretrained::Bool = false, task::Any = Nothing; embed
     return lm
 end
 
-Flux.@functor LanguageModel
+Flux.@functor LanguageModel
diff --git a/FastText/src/models/train_text_classifier.jl b/FastText/src/models/train_text_classifier.jl
@@ -9,7 +9,7 @@ linear_layers   : contains Chain of two Dense layers [PooledDense and Dense] wit
 
 To train create and instance and give it as first argument to 'train_classifier!' function
 """
-mutable struct TextClassifier{A,F,Q}
+struct TextClassifier{A,F,Q}
     vocab::A
     rnn_layers::F
     linear_layers::Q
@@ -33,6 +33,7 @@ function TextClassifier(lm::LanguageModel = LanguageModel(), clsfr_out_sz::Integ
 end
 
 Flux.@functor TextClassifier
+Flux.trainable(m::TextClassifier) = (rnn_layers = m.rnn_layers, linear_layers = m.linear_layers)
 
 function loss(m, xs, y; k = 10)
     # forward steps
@@ -57,4 +58,4 @@ function train_text_classifier(classifier::TextClassifier = Nothing, batches = N
         Flux.Optimise.update!(opt, ps, gs)
     end
 
-end
+end