From f9360e615c96044524862dba9613299c85040b45 Mon Sep 17 00:00:00 2001 From: lezwon Date: Thu, 23 Apr 2020 07:32:01 +0530 Subject: [PATCH 1/2] check for kaggle env variable --- pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 892ee7af57331..e4a33fb449f6f 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -755,7 +755,7 @@ def fit( log.info(f'training on {self.num_tpu_cores} TPU cores') # COLAB_GPU is an env var available by default in Colab environments. - start_method = 'fork' if os.getenv('COLAB_GPU') else 'spawn' + start_method = 'fork' if os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE') else 'spawn' # track for predict self.model = model From 57806e176c3a8db691e805f99f71ea151d729529 Mon Sep 17 00:00:00 2001 From: lezwon Date: Thu, 23 Apr 2020 15:06:48 +0530 Subject: [PATCH 2/2] added changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dced0c148cdd..4a24046201819 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added [Horovod](http://horovod.ai) support as a distributed backend `Trainer(distributed_backend='horovod')` ([#1529](https://github.com/PyTorchLightning/pytorch-lightning/pull/1529)) +- Added support for 8 core distributed training on Kaggle TPU's (https://github.com/PyTorchLightning/pytorch-lightning/pull/1568) + ### Changed - Changed the default behaviour to no longer include a NaN check with each training iteration. ([#1475](https://github.com/PyTorchLightning/pytorch-lightning/pull/1475))