being able to load weights from a checkpoint and to run test successfully.

w32zhong · w32zhong · commit b0e93c6dda42 · 2023-01-10T18:12:05.000-05:00
diff --git a/examples/mnist.py b/examples/mnist.py
@@ -11,10 +11,10 @@
 import pickle
 
 
-def train(epochs=10, dryrun=False, debug=False, batch_size=64,
+def train(epochs=10, batch_size=64, dryrun=False, debug=False,
     save_file='data/mnist_model_ckpt.pkl'):
 
-    dataset = MNIST('./data/MNIST/mnn_test.pickle')
+    dataset = MNIST('./data/MNIST/mnn_train.pickle')
     loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
         shuffle=True, collate_fn=lambda batch: batch)
 
@@ -47,12 +47,41 @@ def train(epochs=10, dryrun=False, debug=False, batch_size=64,
 
     print('saving checkpoint ...')
     with open(save_file, 'wb') as fh:
-        save = net.state_dict(), net.config()
+        save = net.state_dict(), net.get_config()
         pickle.dump(save, fh)
 
 
-def test(dryrun=False, debug=False):
-    pass
+def test(checkpoint, batch_size=64):
+    with open(checkpoint, 'rb') as fh:
+        state_dict, config = pickle.load(fh)
+
+    dataset = MNIST('./data/MNIST/mnn_test.pickle')
+    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
+        shuffle=True, collate_fn=lambda batch: batch)
+
+    net = SequentialLayers([
+        LinearLayer(28 * 28, 256),
+        ReluLayer(),
+        LinearLayer(256, 10),
+        SoftmaxLayer()
+    ])
+    net.load_weights(state_dict, config=config, verbose=True)
+
+    correct_cnt, inference_cnt = 0, 0
+    for b, batch in enumerate(loader):
+        images = Tensor([data for data, label in batch])
+        images = images.unsqueeze(-1)
+        labels = Tensor([label for data, label in batch])
+
+        scores = net(images).squeeze(-1)
+
+        preds = scores.argmax(-1)
+        corrects = (preds == labels)
+        correct_cnt += corrects.sum().item()
+        inference_cnt += labels.shape[0]
+
+    accuracy = correct_cnt / inference_cnt
+    print(f'test accuracy: {accuracy:.3f}')
 
 
 if __name__ == '__main__':
diff --git a/mnn/layer.py b/mnn/layer.py
@@ -17,6 +17,14 @@ def _state_dict(self):
             state_dict[key] = (shape, param)
         return state_dict
 
+    def _load_weights(self, state_dict, config=None):
+        for path, (shape, param) in state_dict.items():
+            name, key = path.split('.')
+            assert name == self.name
+            assert key in self.params
+            assert shape == self.params[key].shape
+            self.params[key] = Tensor(param)
+
     def _accumulate_grads(self, key, val):
         reduced_val = self._batch_reduced(val)
         if key in self.grads:
diff --git a/mnn/seq_layers.py b/mnn/seq_layers.py
@@ -1,4 +1,3 @@
-import json
 from mnn.tensor import Tensor
 from mnn.layer import *
 
@@ -120,10 +119,19 @@ def state_dict(self):
                 state_dict[path] = val
         return state_dict
 
-    def config(self):
-        return json.dumps({
-            'layers': len(self.layers)
-        })
+    def get_config(self):
+        return {'layers': len(self.layers)}
+
+    def load_weights(self, state_dict, config=None, verbose=False):
+        assert config is not None
+        assert len(self.layers) == config['layers']
+        for path, value in state_dict.items():
+            if verbose: print('loading weights to:', path)
+            path_fields = path.split('.')
+            l = int(path_fields.pop(0))
+            subpath = '.'.join(path_fields)
+            state_dict = {subpath: value}
+            self.layers[l]._load_weights(state_dict)
 
 
 if __name__ == '__main__':
diff --git a/mnn/tensor.py b/mnn/tensor.py
@@ -63,6 +63,12 @@ def __mul__(self, x):
         else:
             return Tensor(self._data * x)
 
+    def __eq__(self, x):
+        if isinstance(x, Tensor):
+            return Tensor(self._data == x._data)
+        else:
+            raise NotImplemented
+
     def __rmul__(self, x):
         if isinstance(x, Tensor):
             return Tensor(self._data * x._data)