Skip to content

Commit 10b16db

Browse files
williamFalconBorda
andauthored
made ddp the default if no backend specified with multiple GPUs (#1789)
* made ddp the default if no backend specified with multiple GPUs * fix * spawn Co-authored-by: Jirka <[email protected]>
1 parent acab068 commit 10b16db

File tree

4 files changed

+6
-3
lines changed

4 files changed

+6
-3
lines changed

docs/source/multi_gpu.rst

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ Lightning allows multiple ways of training
132132
- Horovod (`distributed_backend='horovod'`) (multi-machine, multi-gpu, configured at runtime)
133133
- TPUs (`num_tpu_cores=8|x`) (tpu or TPU pod)
134134

135+
.. note:: If you request multiple GPUs without setting a mode, ddp will be automatically used.
136+
135137
Data Parallel (dp)
136138
^^^^^^^^^^^^^^^^^^
137139
`DataParallel <https://pytorch.org/docs/stable/nn.html#torch.nn.DataParallel>`_ splits a batch across k GPUs. That is, if you have a batch of 32 and use dp with 2 gpus,

pytorch_lightning/trainer/distrib_data_parallel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ def set_distributed_mode(self, distributed_backend):
203203
elif self.num_gpus > 1:
204204
rank_zero_warn('You requested multiple GPUs but did not specify a backend, e.g.'
205205
' Trainer(distributed_backend=dp) (or ddp, ddp2).'
206-
' Setting distributed_backend=dp for you.')
207-
self.use_dp = True
206+
' Setting distributed_backend=ddp for you.')
207+
self.use_ddp = True
208208
elif distributed_backend == "dp":
209209
# do nothing if num_gpus == 0
210210
if self.num_gpus == 1:

tests/models/test_gpu.py

+1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def assert_pred_same():
130130
trainer.fit(model)
131131

132132

133+
@pytest.mark.spawn
133134
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
134135
def test_multi_gpu_none_backend(tmpdir):
135136
"""Make sure when using multiple GPUs the user can't use `distributed_backend = None`."""

tests/trainer/test_trainer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ def test_gpu_choice(tmpdir):
712712
),
713713
pytest.param(
714714
dict(distributed_backend=None, gpus=2),
715-
dict(use_dp=True, use_ddp=False, use_ddp2=False, num_gpus=2, on_gpu=True, single_gpu=False, num_processes=1),
715+
dict(use_dp=False, use_ddp=True, use_ddp2=False, num_gpus=2, on_gpu=True, single_gpu=False, num_processes=1),
716716
marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")]
717717
),
718718
pytest.param(

0 commit comments

Comments
 (0)