🚧 wip

nateraw · nateraw · commit bd452033a420 · 2020-08-13T20:04:36.000-06:00
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
@@ -180,7 +180,6 @@ class TrainerEvaluationLoopMixin(ABC):
     tpu_id: int
     verbose_test: bool
     running_sanity_check: bool
-    testing: bool
     amp_backend: AMPType
 
     # Callback system
@@ -372,9 +371,10 @@ def _evaluate(
 
                 # track outputs for collation
                 if output is not None:
-                    do_write_preds = self.testing and isinstance(output, EvalResult) and not self.running_sanity_check
-                    # Add predictions to our prediction collection if they are found in outputs
-                    if do_write_preds:
+
+                    # Add step predictions to prediction collection to write later
+                    do_write_predictions = is_result_obj and test_mode
+                    if do_write_predictions:
                         predictions.add(output.pop('predictions', None))
 
                     dl_outputs.append(output)
diff --git a/tests/base/model_test_steps.py b/tests/base/model_test_steps.py
@@ -1,8 +1,11 @@
+import random
 from abc import ABC
 from collections import OrderedDict
 
 import torch
 
+from pytorch_lightning import EvalResult
+
 
 class TestStepVariations(ABC):
     """
@@ -91,3 +94,51 @@ def test_step__multiple_dataloaders(self, batch, batch_idx, dataloader_idx, **kw
 
     def test_step__empty(self, batch, batch_idx, *args, **kwargs):
         return {}
+
+
+    def test_step_result_preds(self, batch, batch_idx, optimizer_idx=None):
+        """Lightning calls this inside the training loop"""
+        """
+        Default, baseline test_step
+        :param batch:
+        :return:
+        """
+        x, y = batch
+        x = x.view(x.size(0), -1)
+        y_hat = self(x)
+
+        loss_test = self.loss(y, y_hat)
+
+        # acc
+        labels_hat = torch.argmax(y_hat, dim=1)
+        test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
+        test_acc = torch.tensor(test_acc)
+
+        test_acc = test_acc.type_as(x)
+
+        # Do regular EvalResult Logging
+        result = EvalResult(checkpoint_on=loss_test)
+        result.log('test_loss', loss_test)
+        result.log('test_acc', test_acc)
+
+        #lst_of_str = [random.choice(['dog', 'cat']) for i in range(batch_size)]
+        # int_outputs = [random.randint(500, 1000) for i in range(batch_size)]
+        #nested_lst = [[x] for x in int_outputs]
+        #lst_of_dicts = [{k: v} for k, v in zip(lst_of_str, int_outputs)]
+
+        # This is passed in from pytest via parameterization
+        option = getattr(self, 'test_option', 0)
+
+        lazy_ids = torch.arange(batch_idx * self.batch_size, (batch_idx + 1) * x.size(0))
+
+        # Base
+        if option == 0:
+            result.write('idxs', lazy_ids)
+            result.write('preds', labels_hat)
+
+        # Check mismatching tensor len
+        elif option == 1:
+            result.write('idxs', torch.cat((lazy_ids, lazy_ids)))
+            result.write('preds', labels_hat)
+
+        return result
diff --git a/tests/core/test_results.py b/tests/core/test_results.py
@@ -1,10 +1,16 @@
+import sys
+from pathlib import Path
+
 import pytest
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
+from pytorch_lightning import Trainer
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 import tests.base.develop_utils as tutils
-import sys
+
+from tests.base import EvalModelTemplate
+from tests.base.datamodules import TrialMNISTDataModule
 
 
 def _setup_ddp(rank, worldsize):
@@ -35,3 +41,59 @@ def test_result_reduce_ddp(result_cls):
 
     worldsize = 2
     mp.spawn(_ddp_test_fn, args=(worldsize, result_cls), nprocs=worldsize)
+
+
+@pytest.mark.parametrize(
+    "option,do_train",
+    [
+        pytest.param(
+            0, True, id='full_loop'
+        ),
+        pytest.param(
+            0, False, id='test_only'
+        ),
+        pytest.param(
+            1, False, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")
+        ),
+    ]
+)
+def test_result_obj_predictions(tmpdir, option, do_train):
+    tutils.reset_seed()
+
+    dm = TrialMNISTDataModule(tmpdir)
+
+    model = EvalModelTemplate()
+    model.test_option = option
+    model.prediction_file = Path('predictions.pt')
+    model.test_step = model.test_step_result_preds
+    model.test_step_end = None
+    model.test_epoch_end = None
+    model.test_end = None
+
+    if model.prediction_file.exists():
+        model.prediction_file.unlink()
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=3,
+        weights_summary=None,
+        deterministic=True,
+    )
+
+    # Prediction file shouldn't exist yet because we haven't done anything
+    assert not model.prediction_file.exists()
+
+    if do_train:
+        result = trainer.fit(model, dm)
+        assert result == 1
+        result = trainer.test(datamodule=dm)
+        result = result[0]
+        assert result['test_loss'] < 0.6
+        assert result['test_acc'] > 0.8
+    else:
+        result = trainer.test(model, datamodule=dm)
+
+    # check prediction file now exists and is of expected length
+    assert model.prediction_file.exists()
+    predictions = torch.load(model.prediction_file)
+    assert len(predictions) == len(dm.mnist_test)