Clean up tests.

jordannad · jordannad · commit 56766011c50f · 2020-03-22T17:18:53.000-07:00
As part of ensuring the train test is reliable, I have matched the initialization
of the reference implementation for the Embedding layers. Additionally, the DLRM
model is susceptible to a "bad initialization" that doesn't perfectly memorize
the single test minibatch. Although this is infrequent (~1 out of 50 test runs),
I have modified the tests to randomly re-initialize 5 times, ensuring the test
is approximately flaky with a probability of 3.2e-9 while still maintaining the
quality of the test (e.g. testing random initialization, etc). Finally, instead
of checking that loss drops below a particular value, the test checks that the
accuracy is 100%. This results in a faster stopping condition, and thus the
convergence test often runs in under 300ms on a laptop.
diff --git a/Models/Recommendation/DLRM.swift b/Models/Recommendation/DLRM.swift
@@ -61,7 +61,14 @@ public struct DLRM: Module {
         mlpBottom = MLP(dims: [nDense] + lnBot)
         let topInput = lnEmb.count * mSpa + lnBot.last!
         mlpTop = MLP(dims: [topInput] + lnTop + [1], sigmoidLastLayer: true)
-        latentFactors = lnEmb.map { Embedding(vocabularySize: $0, embeddingSize: mSpa) }
+        latentFactors = lnEmb.map { embeddingSize -> Embedding<Float> in
+            // Use a random uniform initialization to match the reference implementation.
+            let weights = Tensor<Float>(
+                randomUniform: [embeddingSize, mSpa],
+                lowerBound: Tensor(Float(-1.0)/Float(embeddingSize)),
+                upperBound: Tensor(Float(1.0)/Float(embeddingSize)))
+            return Embedding(embeddings: weights)
+        }
         self.interaction = interaction
     }
 
@@ -80,8 +87,8 @@ public struct DLRM: Module {
         let denseEmbVec = mlpBottom(denseInput)
         let sparseEmbVecs = computeEmbeddings(sparseInputs: sparseInput,
                                               latentFactors: latentFactors)
-        let topInput = Tensor(concatenating: sparseEmbVecs + [denseEmbVec],
-                              alongAxis: 1)
+        let topInput = computeInteractions(
+            denseEmbVec: denseEmbVec, sparseEmbVecs: sparseEmbVecs)
         let prediction = mlpTop(topInput)
 
         // TODO: loss threshold clipping
diff --git a/Tests/RecommendationModelTests/DLRMTests.swift b/Tests/RecommendationModelTests/DLRMTests.swift
@@ -43,52 +43,58 @@ final class DLRMTests: XCTestCase {
     }
 
     func testDLRMTraining() {
-        let trainingSteps = 2000
+        let trainingSteps = 400
         let nDense = 9
         let dimEmbed = 4
         let bottomMLPSize = [8, 4]
         let topMLPSize = [11, 4]
         let batchSize = 10
 
-        var model = DLRM(
-            nDense: nDense,
-            mSpa: dimEmbed,
-            lnEmb: [10, 20],
-            lnBot: bottomMLPSize,
-            lnTop: topMLPSize)
-
         func lossFunc(predicted: Tensor<Float>, labels: Tensor<Float>) -> Tensor<Float> {
             let difference = predicted - labels
             let squared = difference * difference
             return squared.sum()
         }
 
-        let trainingData = DLRMInput(dense: Tensor(ones: [batchSize, nDense]),
+        let trainingData = DLRMInput(dense: Tensor(randomNormal: [batchSize, nDense]),
                                       sparse: [Tensor([7, 3, 1, 3, 1, 6, 7, 8, 9, 2]),
                                                Tensor([17, 13, 19, 0, 1, 6, 7, 8, 9, 10])])
         let labels = Tensor<Float>([1,0,0,1,1,1,0,1,0,1])
 
-        let optimizer = SGD(for: model, learningRate: 0.0015)
+        // Sometimes DLRM on such a small dataset can get "stuck" in a bad initialization.
+        // To ensure a reliable test, we give ourselves a few reinitializations.
+        for attempt in 1...5 {
+            var model = DLRM(
+                nDense: nDense,
+                mSpa: dimEmbed,
+                lnEmb: [10, 20],
+                lnBot: bottomMLPSize,
+                lnTop: topMLPSize)
+            let optimizer = SGD(for: model, learningRate: 0.1)
 
-        for step in 1...trainingSteps {
-            let (loss, grads) = valueWithGradient(at: model) { model in
-                lossFunc(predicted: model(trainingData), labels: labels)
-            }
-            if step % 100 == 0 {
-                print(step, loss)
-                if loss.scalarized() < 1e-7 {
-                    return // Success!
+            for step in 0...trainingSteps {
+                let (loss, grads) = valueWithGradient(at: model) { model in
+                    lossFunc(predicted: model(trainingData), labels: labels)
+                }
+                if step % 50 == 0 {
+                    print(step, loss)
+                    if round(model(trainingData)) == labels { return }  // Success
+                }
+                if step > 300 && step % 50 == 0 {
+                    print("\n\n-----------------------------------------")
+                    print("Step: \(step), loss: \(loss)\nGrads:\n\(grads)\nModel:\n\(model)")
                 }
+                optimizer.update(&model, along: grads)
             }
-            optimizer.update(&model, along: grads)
+            print("Final model outputs (attempt: \(attempt)):\n\(model(trainingData))\nTarget:\n\(labels)")
         }
-        XCTFail("Could not perfectly fit a single mini-batch.")
+        XCTFail("Could not perfectly fit a single mini-batch after 5 reinitializations.")
      }
 }
 
 extension DLRMTests {
     static var allTests = [
         ("testDLRM", testDLRM),
-        ("testDLRMTraining", testDLRMTraining)
+        ("testDLRMTraining", testDLRMTraining),
     ]
 }