Lightning-AI · williamFalcon · Aug 14, 2020 · Aug 12, 2020 · Aug 12, 2020 · Aug 12, 2020
@@ -732,6 +732,61 @@ def get_callback_metrics(self) -> dict:
 
         return result
 
+    def write(self, name: str, values: Union[Tensor, list], filename: str = 'predictions.pt'):
+        """Add feature name and value pair to collection of predictions that will be written to disk on
+        `validation_end` or `test_end`. If running on multiple GPUs, you will get separate `n_gpu`
+        prediction files with the rank prepended onto filename.
+
+        Example::
+
+            result = pl.EvalResult()
+            result.write('ids', [0, 1, 2])
+            result.write('preds', ['cat', 'dog', 'dog'])
+
+        Args:
+            name: Feature name that will turn into column header of predictions file
+            values: Flat tensor or list of row values for given feature column 'name'.
+            filename: Filepath where your predictions will be saved. Defaults to 'predictions.pt'.
+        """
+        # Type check the incoming arguments
+        if not isinstance(name, str):
+            raise ValueError(f"Expected str for 'name' but got {type(name)}")
+        if not isinstance(filename, str):
+            raise ValueError(f"Expected str for 'filename' but got {type(name)}")
+
+        if isinstance(values, Tensor):
+            values = values.detach()
+
+        preds = getattr(self, 'predictions', None)
+        if preds is None:
+            self.predictions = {filename: {name: values}}
+        elif filename not in preds:
+            preds[filename] = {name: values}
+        elif name not in preds[filename]:
+            preds[filename][name] = values
+        elif isinstance(values, Tensor):
+            preds[filename][name] = torch.cat((preds[filename][name], values))
+        elif isinstance(values, list):
+            preds[filename][name].extend(values)
+
+    def write_dict(self, predictions_dict, filename='predictions.pt'):
+        """Calls EvalResult.write() for each key-value pair in predictions_dict.
+
+        It is recommended that you use this function call instead of .write if you need to
+        store more than one column of predictions in your output file.
+
+        Example::
+
+            predictions_to_write = {'preds': ['cat', 'dog'], 'ids': tensor([0, 1])}
+            result.write_dict(predictions_to_write)
+
+        Args:
+            predictions_dict ([type]): Dict of predictions to store and then write to filename at eval end.
+            filename (str, optional): File where your predictions will be stored. Defaults to './predictions.pt'.
+        """
+        for k, v in predictions_dict.items():
+            self.write(k, v, filename)
+
 
 def weighted_mean(result, weights):
     weights = weights.to(result.device)

@@ -134,7 +134,7 @@
 from pytorch_lightning.utilities import rank_zero_warn, flatten_dict, AMPType
 from pytorch_lightning.core.step_result import Result, EvalResult
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-
+from pytorch_lightning.trainer.supporters import PredictionCollection
 
 try:
     import torch_xla.distributed.parallel_loader as xla_pl
@@ -278,6 +278,7 @@ def _evaluate(
 
         # bookkeeping
         outputs = []
+        predictions = PredictionCollection(self.global_rank, self.world_size)
 
         # convert max_batches to list
         if isinstance(max_batches, int):
@@ -370,6 +371,12 @@ def _evaluate(
 
                 # track outputs for collation
                 if output is not None:
+
+                    # Add step predictions to prediction collection to write later
+                    do_write_predictions = is_result_obj and test_mode
+                    if do_write_predictions:
+                        predictions.add(output.pop('predictions', None))
+
                     dl_outputs.append(output)
 
                 self.__eval_add_step_metrics(output)
@@ -388,6 +395,9 @@ def _evaluate(
         # log callback metrics
         self.__update_callback_metrics(eval_results, using_eval_result)
 
+        # Write predictions to disk if they're available.
+        predictions.to_disk()
+
         # enable train mode again
         model.train()
 

@@ -1,6 +1,8 @@
+from pathlib import Path
 from typing import Optional
 
 import torch
+from torch import Tensor
 
 
 class TensorRunningAccum(object):
@@ -90,3 +92,60 @@ def accumulate(self, x):
 
     def mean(self):
         return self.total / self.num_values
+
+
+class PredictionCollection(object):
+
+    def __init__(self, global_rank: int, world_size: int):
+        self.global_rank = global_rank
+        self.world_size = world_size
+        self.predictions = {}
+        self.num_predictions = 0
+
+    def _add_prediction(self, name, values, filename):
+        if filename not in self.predictions:
+            self.predictions[filename] = {name: values}
+        elif name not in self.predictions[filename]:
+            self.predictions[filename][name] = values
+        elif isinstance(values, Tensor):
+            self.predictions[filename][name] = torch.cat((self.predictions[filename][name], values))
+        elif isinstance(values, list):
+            self.predictions[filename][name].extend(values)
+
+    def add(self, predictions):
+
+        if predictions is None:
+            return
+
+        for filename, pred_dict in predictions.items():
+            for feature_name, values in pred_dict.items():
+                self._add_prediction(feature_name, values, filename)
+
+    def to_disk(self):
+        """Write predictions to file(s).
+        """
+        for filename, predictions in self.predictions.items():
+
+            # Absolute path to defined prediction file. rank added to name if in multi-gpu environment
+            outfile = Path(filename).absolute()
+            outfile = outfile.with_name(
+                f"{outfile.stem}{f'_rank_{self.global_rank}' if self.world_size > 1 else ''}{outfile.suffix}"
+            )
+            outfile.parent.mkdir(exist_ok=True, parents=True)
+
+            # Convert any tensor values to list
+            predictions = {k: v if not isinstance(v, Tensor) else v.tolist() for k, v in predictions.items()}
+
+            # Check if all features for this file add up to same length
+            feature_lens = {k: len(v) for k, v in predictions.items()}
+            if len(set(feature_lens.values())) != 1:
+                raise ValueError('Mismatching feature column lengths found in stored EvalResult predictions.')
+
+            # Switch predictions so each entry has its own dict
+            outputs = []
+            for values in zip(*predictions.values()):
+                output_element = {k: v for k, v in zip(predictions.keys(), values)}
+                outputs.append(output_element)
+
+            # Write predictions for current file to disk
+            torch.save(outputs, outfile)
@@ -1,8 +1,11 @@
+import random
 from abc import ABC
 from collections import OrderedDict
 
 import torch
 
+from pytorch_lightning import EvalResult
+
 
 class TestStepVariations(ABC):
     """
@@ -91,3 +94,73 @@ def test_step__multiple_dataloaders(self, batch, batch_idx, dataloader_idx, **kw
 
     def test_step__empty(self, batch, batch_idx, *args, **kwargs):
         return {}
+
+
+    def test_step_result_preds(self, batch, batch_idx, optimizer_idx=None):
+        x, y = batch
+        x = x.view(x.size(0), -1)
+        y_hat = self(x)
+
+        loss_test = self.loss(y, y_hat)
+
+        # acc
+        labels_hat = torch.argmax(y_hat, dim=1)
+        test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
+        test_acc = torch.tensor(test_acc)
+
+        test_acc = test_acc.type_as(x)
+
+        # Do regular EvalResult Logging
+        result = EvalResult(checkpoint_on=loss_test)
+        result.log('test_loss', loss_test)
+        result.log('test_acc', test_acc)
+
+        batch_size = x.size(0)
+        lst_of_str = [random.choice(['dog', 'cat']) for i in range(batch_size)]
+        lst_of_int = [random.randint(500, 1000) for i in range(batch_size)]
+        lst_of_lst = [[x] for x in lst_of_int]
+        lst_of_dict = [{k: v} for k, v in zip(lst_of_str, lst_of_int)]
+
+        # This is passed in from pytest via parameterization
+        option = getattr(self, 'test_option', 0)
+        prediction_file = getattr(self, 'prediction_file', 'predictions.pt')
+
+        lazy_ids = torch.arange(batch_idx * self.batch_size, batch_idx * self.batch_size + x.size(0))
+
+        # Base
+        if option == 0:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('preds', labels_hat, prediction_file)
+
+        # Check mismatching tensor len
+        elif option == 1:
+            result.write('idxs', torch.cat((lazy_ids, lazy_ids)), prediction_file)
+            result.write('preds', labels_hat, prediction_file)
+
+        # write multi-dimension
+        elif option == 2:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('preds', labels_hat, prediction_file)
+            result.write('x', x, prediction_file)
+
+        # write str list
+        elif option == 3:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('vals', lst_of_str, prediction_file)
+
+        # write int list
+        elif option == 4:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('vals', lst_of_int, prediction_file)
+
+        # write nested list
+        elif option == 5:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('vals', lst_of_lst, prediction_file)
+
+        # write dict list
+        elif option == 6:
+            result.write('idxs', lazy_ids, prediction_file)
+            result.write('vals', lst_of_dict, prediction_file)
+
+        return result