Neptune integration (#648)

jakubczakon · williamFalcon · williamFalcon · commit 8dc8a8bfd31a · 2020-01-13T22:20:01.000-05:00
* added neptune integration

* added tests for NeptuneLogger, added neptune to docs

* updated link to neptune support

* fixed docstrings, fixed try/except in tests, changed append_tags input

* fixed docstrings line lenght

* bumped epoch nr in model restore tests

* added tags support for single strings

* fixed passing neptune token to backend

* fixed project name in offline mode

* added save_top_k=-1 to checkpoint callback

* reformated initialization of neptune in online mode

* bumped epoch nr to 4 in test_load_model_from_checkpoint

* bumped epoch nr to 5

Co-authored-by: William Falcon &lt;waf2107@columbia.edu&gt;
diff --git a/README.md b/README.md
@@ -306,6 +306,7 @@ Lightning also adds a text column with all the hyperparameters for this experime
 - [Save a snapshot of all hyperparameters](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#save-a-snapshot-of-all-hyperparameters) 
 - [Snapshot code for a training run](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#snapshot-code-for-a-training-run) 
 - [Write logs file to csv every k batches](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#write-logs-file-to-csv-every-k-batches)
+- [Logging experiment data to Neptune](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#neptune-support)
 
 #### Training loop    
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -62,7 +62,6 @@
 # The full version, including alpha/beta/rc tags
 release = pytorch_lightning.__version__
 
-
 # -- General configuration ---------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -128,7 +127,6 @@
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
 
-
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
@@ -174,7 +172,6 @@
 # Output file base name for HTML help builder.
 htmlhelp_basename = project + '-doc'
 
-
 # -- Options for LaTeX output ------------------------------------------------
 
 latex_elements = {
@@ -198,7 +195,6 @@
     (master_doc, project + '.tex', project + ' Documentation', author, 'manual'),
 ]
 
-
 # -- Options for manual page output ------------------------------------------
 
 # One entry per manual page. List of tuples
@@ -207,7 +203,6 @@
     (master_doc, project, project + ' Documentation', [author], 1)
 ]
 
-
 # -- Options for Texinfo output ----------------------------------------------
 
 # Grouping the document tree into Texinfo files. List of tuples
@@ -218,7 +213,6 @@
      'One line description of project.', 'Miscellaneous'),
 ]
 
-
 # -- Options for Epub output -------------------------------------------------
 
 # Bibliographic Dublin Core info.
@@ -236,7 +230,6 @@
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']
 
-
 # -- Extension configuration -------------------------------------------------
 
 # -- Options for intersphinx extension ---------------------------------------
@@ -249,7 +242,6 @@
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
 
-
 # https://github.com/rtfd/readthedocs.org/issues/1139
 # I use sphinx-apidoc to auto-generate API documentation for my project.
 # Right now I have to commit these auto-generated files to my repository
@@ -302,7 +294,7 @@ def setup(app):
             MOCK_REQUIRE_PACKAGES.append(pkg.rstrip())
 
 # TODO: better parse from package since the import name and package name may differ
-MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml']
+MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml', 'neptune']
 autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES
 # for mod_name in MOCK_REQUIRE_PACKAGES:
 #     sys.modules[mod_name] = mock.Mock()
diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py
@@ -187,3 +187,8 @@ def __init__(self, hparams):
     from .comet import CometLogger
 except ImportError:
     del environ["COMET_DISABLE_AUTO_LOGGING"]
+
+try:
+    from .neptune import NeptuneLogger
+except ImportError:
+    pass
diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py
@@ -0,0 +1,242 @@
+"""
+Log using `neptune <https://www.neptune.ml>`_
+
+Neptune logger can be used in the online mode or offline (silent) mode.
+To log experiment data in online mode, NeptuneLogger requries an API key:
+
+.. code-block:: python
+
+    from pytorch_lightning.logging import NeptuneLogger
+    # arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class
+
+    neptune_logger = NeptuneLogger(
+        api_key=os.environ["NEPTUNE_API_TOKEN"],
+        project_name="USER_NAME/PROJECT_NAME",
+        experiment_name="default", # Optional,
+        params={"max_epochs": 10}, # Optional,
+        tags=["pytorch-lightning","mlp"] # Optional,
+    )
+    trainer = Trainer(max_epochs=10, logger=neptune_logger)
+
+Use the logger anywhere in you LightningModule as follows:
+
+.. code-block:: python
+
+    def train_step(...):
+        # example
+        self.logger.experiment.log_metric("acc_train", acc_train) # log metrics
+        self.logger.experiment.log_image("worse_predictions", prediction_image) # log images
+        self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint
+        self.logger.experiment.whatever_neptune_supports(...)
+
+    def any_lightning_module_function_or_hook(...):
+        self.logger.experiment.log_metric("acc_train", acc_train) # log metrics
+        self.logger.experiment.log_image("worse_predictions", prediction_image) # log images
+        self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint
+        self.logger.experiment.whatever_neptune_supports(...)
+
+
+"""
+
+from logging import getLogger
+
+try:
+    import neptune
+except ImportError:
+    raise ImportError('Missing neptune package. Run `pip install neptune-client`')
+
+from torch import is_tensor
+
+# from .base import LightningLoggerBase, rank_zero_only
+from pytorch_lightning.logging.base import LightningLoggerBase, rank_zero_only
+
+logger = getLogger(__name__)
+
+
+class NeptuneLogger(LightningLoggerBase):
+    def __init__(self, api_key=None, project_name=None, offline_mode=False,
+                 experiment_name=None, upload_source_files=None,
+                 params=None, properties=None, tags=None, **kwargs):
+        """Initialize a neptune.ml logger.
+        Requires either an API Key (online mode) or a local directory path (offline mode)
+
+        :param str|None api_key: Required in online mode. Neputne API token, found on https://neptune.ml.
+           Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token.
+        :param str project_name: Required in online mode. Qualified name of a project in a form of
+           "namespace/project_name" for example "tom/minst-classification".
+           If None, the value of NEPTUNE_PROJECT environment variable will be taken.
+           You need to create the project in https://neptune.ml first.
+        :param bool offline_mode: Optional default False. If offline_mode=True no logs will be send to neptune.
+           Usually used for debug purposes.
+        :param str|None experiment_name: Optional. Editable name of the experiment.
+           Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column.
+        :param list|None upload_source_files: Optional. List of source files to be uploaded.
+           Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab.
+           If None is passed, Python file from which experiment was created will be uploaded.
+           Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported.
+           For example, you can pass '*.py' to upload all python source files from the current directory.
+           For recursion lookup use '**/*.py' (for Python 3.5 and later). For more information see glob library.
+        :param dict|None params: Optional. Parameters of the experiment. After experiment creation params are read-only.
+           Parameters are displayed in the experiment’s Parameters section and each key-value pair can be
+           viewed in experiments view as a column.
+        :param dict|None properties: Optional default is {}. Properties of the experiment.
+           They are editable after experiment is created. Properties are displayed in the experiment’s Details and
+           each key-value pair can be viewed in experiments view as a column.
+        :param list|None tags: Optional default []. Must be list of str. Tags of the experiment.
+           They are editable after experiment is created (see: append_tag() and remove_tag()).
+           Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column.
+        """
+        super().__init__()
+        self.api_key = api_key
+        self.project_name = project_name
+        self.offline_mode = offline_mode
+        self.experiment_name = experiment_name
+        self.upload_source_files = upload_source_files
+        self.params = params
+        self.properties = properties
+        self.tags = tags
+        self._experiment = None
+        self._kwargs = kwargs
+
+        if offline_mode:
+            self.mode = "offline"
+            neptune.init(project_qualified_name='dry-run/project',
+                         backend=neptune.OfflineBackend())
+        else:
+            self.mode = "online"
+            neptune.init(api_token=self.api_key,
+                         project_qualified_name=self.project_name)
+
+        logger.info(f"NeptuneLogger was initialized in {self.mode} mode")
+
+    @property
+    def experiment(self):
+        if self._experiment is not None:
+            return self._experiment
+        else:
+            self._experiment = neptune.create_experiment(name=self.experiment_name,
+                                                         params=self.params,
+                                                         properties=self.properties,
+                                                         tags=self.tags,
+                                                         upload_source_files=self.upload_source_files,
+                                                         **self._kwargs)
+        return self._experiment
+
+    @rank_zero_only
+    def log_hyperparams(self, params):
+        for key, val in vars(params).items():
+            self.experiment.set_property(f"param__{key}", val)
+
+    @rank_zero_only
+    def log_metrics(self, metrics, step=None):
+        """Log metrics (numeric values) in Neptune experiments
+
+        :param float metric: Dictionary with metric names as keys and measured quanties as values
+        :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
+
+        """
+
+        for key, val in metrics.items():
+            if is_tensor(val):
+                val = val.cpu().detach()
+
+            if step is None:
+                self.experiment.log_metric(key, val)
+            else:
+                self.experiment.log_metric(key, x=step, y=val)
+
+    @rank_zero_only
+    def finalize(self, status):
+        self.experiment.stop()
+
+    @property
+    def name(self):
+        if self.mode == "offline":
+            return "offline-name"
+        else:
+            return self.experiment.name
+
+    @property
+    def version(self):
+        if self.mode == "offline":
+            return "offline-id-1234"
+        else:
+            return self.experiment.id
+
+    @rank_zero_only
+    def log_metric(self, metric_name, metric_value, step=None):
+        """Log metrics (numeric values) in Neptune experiments
+
+        :param str metric_name:  The name of log, i.e. mse, loss, accuracy.
+        :param str metric_value: The value of the log (data-point).
+        :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
+
+        """
+        if step is None:
+            self.experiment.log_metric(metric_name, metric_value)
+        else:
+            self.experiment.log_metric(metric_name, x=step, y=metric_value)
+
+    @rank_zero_only
+    def log_text(self, log_name, text, step=None):
+        """Log text data in Neptune experiment
+
+        :param str log_name:  The name of log, i.e. mse, my_text_data, timing_info.
+        :param str text: The value of the log (data-point).
+        :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
+
+        """
+        if step is None:
+            self.experiment.log_metric(log_name, text)
+        else:
+            self.experiment.log_metric(log_name, x=step, y=text)
+
+    @rank_zero_only
+    def log_image(self, log_name, image, step=None):
+        """Log image data in Neptune experiment
+
+        :param str log_name: The name of log, i.e. bboxes, visualisations, sample_images.
+        :param str|PIL.Image|matplotlib.figure.Figure image: The value of the log (data-point).
+           Can be one of the following types: PIL image, matplotlib.figure.Figure, path to image file (str)
+        :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
+
+        """
+        if step is None:
+            self.experiment.log_image(log_name, image)
+        else:
+            self.experiment.log_image(log_name, x=step, y=image)
+
+    @rank_zero_only
+    def log_artifact(self, artifact, destination=None):
+        """Save an artifact (file) in Neptune experiment storage.
+
+        :param str artifact: A path to the file in local filesystem.
+        :param str|None destination: Optional default None.
+           A destination path. If None is passed, an artifact file name will be used.
+
+        """
+        self.experiment.log_artifact(artifact, destination)
+
+    @rank_zero_only
+    def set_property(self, key, value):
+        """Set key-value pair as Neptune experiment property.
+
+        :param str key: Property key.
+        :param obj value: New value of a property.
+
+        """
+        self.experiment.set_property(key, value)
+
+    @rank_zero_only
+    def append_tags(self, tags):
+        """appends tags to neptune experiment
+
+        :param str|tuple|list(str) tags: Tags to add to the current experiment.
+           If str is passed, singe tag is added.
+           If multiple - comma separated - str are passed, all of them are added as tags.
+           If list of str is passed, all elements of the list are added as tags.
+
+        """
+        if not isinstance(tags, (list, set, tuple)):
+            tags = [tags]  # make it as an iterable is if it is not yet
+        self.experiment.append_tags(*tags)
diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -8,5 +8,6 @@ check-manifest
 # test_tube  # already installed in main req.
 mlflow
 comet_ml
+neptune-client
 twine==1.13.0
 pillow<7.0.0
diff --git a/tests/test_logging.py b/tests/test_logging.py
@@ -193,6 +193,52 @@ def test_comet_pickle(tmpdir, monkeypatch):
     trainer2.logger.log_metrics({"acc": 1.0})
 
 
+def test_neptune_logger(tmpdir):
+    """Verify that basic functionality of neptune logger works."""
+    tutils.reset_seed()
+
+    from pytorch_lightning.logging import NeptuneLogger
+
+    hparams = tutils.get_hparams()
+    model = LightningTestModel(hparams)
+
+    logger = NeptuneLogger(offline_mode=True)
+
+    trainer_options = dict(
+        default_save_path=tmpdir,
+        max_epochs=1,
+        train_percent_check=0.01,
+        logger=logger
+    )
+    trainer = Trainer(**trainer_options)
+    result = trainer.fit(model)
+
+    print('result finished')
+    assert result == 1, "Training failed"
+
+
+def test_neptune_pickle(tmpdir):
+    """Verify that pickling trainer with neptune logger works."""
+    tutils.reset_seed()
+
+    from pytorch_lightning.logging import NeptuneLogger
+
+    # hparams = tutils.get_hparams()
+    # model = LightningTestModel(hparams)
+
+    logger = NeptuneLogger(offline_mode=True)
+    trainer_options = dict(
+        default_save_path=tmpdir,
+        max_epochs=1,
+        logger=logger
+    )
+
+    trainer = Trainer(**trainer_options)
+    pkl_bytes = pickle.dumps(trainer)
+    trainer2 = pickle.loads(pkl_bytes)
+    trainer2.logger.log_metrics({"acc": 1.0})
+
+
 def test_tensorboard_logger(tmpdir):
     """Verify that basic functionality of Tensorboard logger works."""
 
diff --git a/tests/test_restore_models.py b/tests/test_restore_models.py