Skip to content

Commit 8dc8a8b

Browse files
Neptune integration (#648)
* added neptune integration * added tests for NeptuneLogger, added neptune to docs * updated link to neptune support * fixed docstrings, fixed try/except in tests, changed append_tags input * fixed docstrings line lenght * bumped epoch nr in model restore tests * added tags support for single strings * fixed passing neptune token to backend * fixed project name in offline mode * added save_top_k=-1 to checkpoint callback * reformated initialization of neptune in online mode * bumped epoch nr to 4 in test_load_model_from_checkpoint * bumped epoch nr to 5 Co-authored-by: William Falcon <[email protected]>
1 parent 0ae3dd9 commit 8dc8a8b

File tree

7 files changed

+299
-13
lines changed

7 files changed

+299
-13
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ Lightning also adds a text column with all the hyperparameters for this experime
306306
- [Save a snapshot of all hyperparameters](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#save-a-snapshot-of-all-hyperparameters)
307307
- [Snapshot code for a training run](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#snapshot-code-for-a-training-run)
308308
- [Write logs file to csv every k batches](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#write-logs-file-to-csv-every-k-batches)
309+
- [Logging experiment data to Neptune](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#neptune-support)
309310

310311
#### Training loop
311312

docs/source/conf.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
# The full version, including alpha/beta/rc tags
6363
release = pytorch_lightning.__version__
6464

65-
6665
# -- General configuration ---------------------------------------------------
6766

6867
# If your documentation needs a minimal Sphinx version, state it here.
@@ -128,7 +127,6 @@
128127
# The name of the Pygments (syntax highlighting) style to use.
129128
pygments_style = None
130129

131-
132130
# -- Options for HTML output -------------------------------------------------
133131

134132
# The theme to use for HTML and HTML Help pages. See the documentation for
@@ -174,7 +172,6 @@
174172
# Output file base name for HTML help builder.
175173
htmlhelp_basename = project + '-doc'
176174

177-
178175
# -- Options for LaTeX output ------------------------------------------------
179176

180177
latex_elements = {
@@ -198,7 +195,6 @@
198195
(master_doc, project + '.tex', project + ' Documentation', author, 'manual'),
199196
]
200197

201-
202198
# -- Options for manual page output ------------------------------------------
203199

204200
# One entry per manual page. List of tuples
@@ -207,7 +203,6 @@
207203
(master_doc, project, project + ' Documentation', [author], 1)
208204
]
209205

210-
211206
# -- Options for Texinfo output ----------------------------------------------
212207

213208
# Grouping the document tree into Texinfo files. List of tuples
@@ -218,7 +213,6 @@
218213
'One line description of project.', 'Miscellaneous'),
219214
]
220215

221-
222216
# -- Options for Epub output -------------------------------------------------
223217

224218
# Bibliographic Dublin Core info.
@@ -236,7 +230,6 @@
236230
# A list of files that should not be packed into the epub file.
237231
epub_exclude_files = ['search.html']
238232

239-
240233
# -- Extension configuration -------------------------------------------------
241234

242235
# -- Options for intersphinx extension ---------------------------------------
@@ -249,7 +242,6 @@
249242
# If true, `todo` and `todoList` produce output, else they produce nothing.
250243
todo_include_todos = True
251244

252-
253245
# https://github.com/rtfd/readthedocs.org/issues/1139
254246
# I use sphinx-apidoc to auto-generate API documentation for my project.
255247
# Right now I have to commit these auto-generated files to my repository
@@ -302,7 +294,7 @@ def setup(app):
302294
MOCK_REQUIRE_PACKAGES.append(pkg.rstrip())
303295

304296
# TODO: better parse from package since the import name and package name may differ
305-
MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml']
297+
MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml', 'neptune']
306298
autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES
307299
# for mod_name in MOCK_REQUIRE_PACKAGES:
308300
# sys.modules[mod_name] = mock.Mock()

pytorch_lightning/logging/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,8 @@ def __init__(self, hparams):
187187
from .comet import CometLogger
188188
except ImportError:
189189
del environ["COMET_DISABLE_AUTO_LOGGING"]
190+
191+
try:
192+
from .neptune import NeptuneLogger
193+
except ImportError:
194+
pass

pytorch_lightning/logging/neptune.py

+242
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
"""
2+
Log using `neptune <https://www.neptune.ml>`_
3+
4+
Neptune logger can be used in the online mode or offline (silent) mode.
5+
To log experiment data in online mode, NeptuneLogger requries an API key:
6+
7+
.. code-block:: python
8+
9+
from pytorch_lightning.logging import NeptuneLogger
10+
# arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class
11+
12+
neptune_logger = NeptuneLogger(
13+
api_key=os.environ["NEPTUNE_API_TOKEN"],
14+
project_name="USER_NAME/PROJECT_NAME",
15+
experiment_name="default", # Optional,
16+
params={"max_epochs": 10}, # Optional,
17+
tags=["pytorch-lightning","mlp"] # Optional,
18+
)
19+
trainer = Trainer(max_epochs=10, logger=neptune_logger)
20+
21+
Use the logger anywhere in you LightningModule as follows:
22+
23+
.. code-block:: python
24+
25+
def train_step(...):
26+
# example
27+
self.logger.experiment.log_metric("acc_train", acc_train) # log metrics
28+
self.logger.experiment.log_image("worse_predictions", prediction_image) # log images
29+
self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint
30+
self.logger.experiment.whatever_neptune_supports(...)
31+
32+
def any_lightning_module_function_or_hook(...):
33+
self.logger.experiment.log_metric("acc_train", acc_train) # log metrics
34+
self.logger.experiment.log_image("worse_predictions", prediction_image) # log images
35+
self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint
36+
self.logger.experiment.whatever_neptune_supports(...)
37+
38+
39+
"""
40+
41+
from logging import getLogger
42+
43+
try:
44+
import neptune
45+
except ImportError:
46+
raise ImportError('Missing neptune package. Run `pip install neptune-client`')
47+
48+
from torch import is_tensor
49+
50+
# from .base import LightningLoggerBase, rank_zero_only
51+
from pytorch_lightning.logging.base import LightningLoggerBase, rank_zero_only
52+
53+
logger = getLogger(__name__)
54+
55+
56+
class NeptuneLogger(LightningLoggerBase):
57+
def __init__(self, api_key=None, project_name=None, offline_mode=False,
58+
experiment_name=None, upload_source_files=None,
59+
params=None, properties=None, tags=None, **kwargs):
60+
"""Initialize a neptune.ml logger.
61+
Requires either an API Key (online mode) or a local directory path (offline mode)
62+
63+
:param str|None api_key: Required in online mode. Neputne API token, found on https://neptune.ml.
64+
Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token.
65+
:param str project_name: Required in online mode. Qualified name of a project in a form of
66+
"namespace/project_name" for example "tom/minst-classification".
67+
If None, the value of NEPTUNE_PROJECT environment variable will be taken.
68+
You need to create the project in https://neptune.ml first.
69+
:param bool offline_mode: Optional default False. If offline_mode=True no logs will be send to neptune.
70+
Usually used for debug purposes.
71+
:param str|None experiment_name: Optional. Editable name of the experiment.
72+
Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column.
73+
:param list|None upload_source_files: Optional. List of source files to be uploaded.
74+
Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab.
75+
If None is passed, Python file from which experiment was created will be uploaded.
76+
Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported.
77+
For example, you can pass '*.py' to upload all python source files from the current directory.
78+
For recursion lookup use '**/*.py' (for Python 3.5 and later). For more information see glob library.
79+
:param dict|None params: Optional. Parameters of the experiment. After experiment creation params are read-only.
80+
Parameters are displayed in the experiment’s Parameters section and each key-value pair can be
81+
viewed in experiments view as a column.
82+
:param dict|None properties: Optional default is {}. Properties of the experiment.
83+
They are editable after experiment is created. Properties are displayed in the experiment’s Details and
84+
each key-value pair can be viewed in experiments view as a column.
85+
:param list|None tags: Optional default []. Must be list of str. Tags of the experiment.
86+
They are editable after experiment is created (see: append_tag() and remove_tag()).
87+
Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column.
88+
"""
89+
super().__init__()
90+
self.api_key = api_key
91+
self.project_name = project_name
92+
self.offline_mode = offline_mode
93+
self.experiment_name = experiment_name
94+
self.upload_source_files = upload_source_files
95+
self.params = params
96+
self.properties = properties
97+
self.tags = tags
98+
self._experiment = None
99+
self._kwargs = kwargs
100+
101+
if offline_mode:
102+
self.mode = "offline"
103+
neptune.init(project_qualified_name='dry-run/project',
104+
backend=neptune.OfflineBackend())
105+
else:
106+
self.mode = "online"
107+
neptune.init(api_token=self.api_key,
108+
project_qualified_name=self.project_name)
109+
110+
logger.info(f"NeptuneLogger was initialized in {self.mode} mode")
111+
112+
@property
113+
def experiment(self):
114+
if self._experiment is not None:
115+
return self._experiment
116+
else:
117+
self._experiment = neptune.create_experiment(name=self.experiment_name,
118+
params=self.params,
119+
properties=self.properties,
120+
tags=self.tags,
121+
upload_source_files=self.upload_source_files,
122+
**self._kwargs)
123+
return self._experiment
124+
125+
@rank_zero_only
126+
def log_hyperparams(self, params):
127+
for key, val in vars(params).items():
128+
self.experiment.set_property(f"param__{key}", val)
129+
130+
@rank_zero_only
131+
def log_metrics(self, metrics, step=None):
132+
"""Log metrics (numeric values) in Neptune experiments
133+
134+
:param float metric: Dictionary with metric names as keys and measured quanties as values
135+
:param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
136+
137+
"""
138+
139+
for key, val in metrics.items():
140+
if is_tensor(val):
141+
val = val.cpu().detach()
142+
143+
if step is None:
144+
self.experiment.log_metric(key, val)
145+
else:
146+
self.experiment.log_metric(key, x=step, y=val)
147+
148+
@rank_zero_only
149+
def finalize(self, status):
150+
self.experiment.stop()
151+
152+
@property
153+
def name(self):
154+
if self.mode == "offline":
155+
return "offline-name"
156+
else:
157+
return self.experiment.name
158+
159+
@property
160+
def version(self):
161+
if self.mode == "offline":
162+
return "offline-id-1234"
163+
else:
164+
return self.experiment.id
165+
166+
@rank_zero_only
167+
def log_metric(self, metric_name, metric_value, step=None):
168+
"""Log metrics (numeric values) in Neptune experiments
169+
170+
:param str metric_name: The name of log, i.e. mse, loss, accuracy.
171+
:param str metric_value: The value of the log (data-point).
172+
:param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
173+
174+
"""
175+
if step is None:
176+
self.experiment.log_metric(metric_name, metric_value)
177+
else:
178+
self.experiment.log_metric(metric_name, x=step, y=metric_value)
179+
180+
@rank_zero_only
181+
def log_text(self, log_name, text, step=None):
182+
"""Log text data in Neptune experiment
183+
184+
:param str log_name: The name of log, i.e. mse, my_text_data, timing_info.
185+
:param str text: The value of the log (data-point).
186+
:param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
187+
188+
"""
189+
if step is None:
190+
self.experiment.log_metric(log_name, text)
191+
else:
192+
self.experiment.log_metric(log_name, x=step, y=text)
193+
194+
@rank_zero_only
195+
def log_image(self, log_name, image, step=None):
196+
"""Log image data in Neptune experiment
197+
198+
:param str log_name: The name of log, i.e. bboxes, visualisations, sample_images.
199+
:param str|PIL.Image|matplotlib.figure.Figure image: The value of the log (data-point).
200+
Can be one of the following types: PIL image, matplotlib.figure.Figure, path to image file (str)
201+
:param int|None step: Step number at which the metrics should be recorded, must be strictly increasing
202+
203+
"""
204+
if step is None:
205+
self.experiment.log_image(log_name, image)
206+
else:
207+
self.experiment.log_image(log_name, x=step, y=image)
208+
209+
@rank_zero_only
210+
def log_artifact(self, artifact, destination=None):
211+
"""Save an artifact (file) in Neptune experiment storage.
212+
213+
:param str artifact: A path to the file in local filesystem.
214+
:param str|None destination: Optional default None.
215+
A destination path. If None is passed, an artifact file name will be used.
216+
217+
"""
218+
self.experiment.log_artifact(artifact, destination)
219+
220+
@rank_zero_only
221+
def set_property(self, key, value):
222+
"""Set key-value pair as Neptune experiment property.
223+
224+
:param str key: Property key.
225+
:param obj value: New value of a property.
226+
227+
"""
228+
self.experiment.set_property(key, value)
229+
230+
@rank_zero_only
231+
def append_tags(self, tags):
232+
"""appends tags to neptune experiment
233+
234+
:param str|tuple|list(str) tags: Tags to add to the current experiment.
235+
If str is passed, singe tag is added.
236+
If multiple - comma separated - str are passed, all of them are added as tags.
237+
If list of str is passed, all elements of the list are added as tags.
238+
239+
"""
240+
if not isinstance(tags, (list, set, tuple)):
241+
tags = [tags] # make it as an iterable is if it is not yet
242+
self.experiment.append_tags(*tags)

tests/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ check-manifest
88
# test_tube # already installed in main req.
99
mlflow
1010
comet_ml
11+
neptune-client
1112
twine==1.13.0
1213
pillow<7.0.0

tests/test_logging.py

+46
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,52 @@ def test_comet_pickle(tmpdir, monkeypatch):
193193
trainer2.logger.log_metrics({"acc": 1.0})
194194

195195

196+
def test_neptune_logger(tmpdir):
197+
"""Verify that basic functionality of neptune logger works."""
198+
tutils.reset_seed()
199+
200+
from pytorch_lightning.logging import NeptuneLogger
201+
202+
hparams = tutils.get_hparams()
203+
model = LightningTestModel(hparams)
204+
205+
logger = NeptuneLogger(offline_mode=True)
206+
207+
trainer_options = dict(
208+
default_save_path=tmpdir,
209+
max_epochs=1,
210+
train_percent_check=0.01,
211+
logger=logger
212+
)
213+
trainer = Trainer(**trainer_options)
214+
result = trainer.fit(model)
215+
216+
print('result finished')
217+
assert result == 1, "Training failed"
218+
219+
220+
def test_neptune_pickle(tmpdir):
221+
"""Verify that pickling trainer with neptune logger works."""
222+
tutils.reset_seed()
223+
224+
from pytorch_lightning.logging import NeptuneLogger
225+
226+
# hparams = tutils.get_hparams()
227+
# model = LightningTestModel(hparams)
228+
229+
logger = NeptuneLogger(offline_mode=True)
230+
trainer_options = dict(
231+
default_save_path=tmpdir,
232+
max_epochs=1,
233+
logger=logger
234+
)
235+
236+
trainer = Trainer(**trainer_options)
237+
pkl_bytes = pickle.dumps(trainer)
238+
trainer2 = pickle.loads(pkl_bytes)
239+
trainer2.logger.log_metrics({"acc": 1.0})
240+
241+
196242
def test_tensorboard_logger(tmpdir):
197243
"""Verify that basic functionality of Tensorboard logger works."""
198244

0 commit comments

Comments
 (0)