|
5 | 5 | import subprocess
|
6 | 6 | import sys
|
7 | 7 |
|
| 8 | +from unittest.mock import patch |
| 9 | + |
| 10 | +import numpy as np |
8 | 11 | import pytest
|
9 | 12 | import torch
|
10 | 13 |
|
@@ -113,7 +116,6 @@ def test_horovod_multi_gpu(tmpdir):
|
113 | 116 | @pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support")
|
114 | 117 | @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
|
115 | 118 | def test_horovod_transfer_batch_to_gpu(tmpdir):
|
116 |
| - |
117 | 119 | class TestTrainingStepModel(EvalModelTemplate):
|
118 | 120 | def training_step(self, batch, *args, **kwargs):
|
119 | 121 | x, y = batch
|
@@ -175,3 +177,36 @@ def get_optimizer_params(optimizer):
|
175 | 177 | assert get_model_params(model.generator) != get_model_params(model.discriminator)
|
176 | 178 | assert get_model_params(model.generator) == get_optimizer_params(trainer.optimizers[0])
|
177 | 179 | assert get_model_params(model.discriminator) == get_optimizer_params(trainer.optimizers[1])
|
| 180 | + |
| 181 | + |
| 182 | +@pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") |
| 183 | +def test_horovod_multi_optimizer_with_scheduling_stepping(tmpdir): |
| 184 | + hparams = EvalModelTemplate.get_default_hparams() |
| 185 | + model = EvalModelTemplate(**hparams) |
| 186 | + model.configure_optimizers = model.configure_optimizers__multiple_schedulers |
| 187 | + |
| 188 | + num_workers = 8 |
| 189 | + init_lr = hparams.get('learning_rate') * num_workers |
| 190 | + |
| 191 | + with patch('pytorch_lightning.trainer.distrib_parts.hvd.size') as mock_hvd_size: |
| 192 | + mock_hvd_size.return_value = 8 |
| 193 | + |
| 194 | + # fit model |
| 195 | + trainer = Trainer( |
| 196 | + default_root_dir=tmpdir, |
| 197 | + max_epochs=1, |
| 198 | + limit_val_batches=0.5, |
| 199 | + limit_train_batches=0.2, |
| 200 | + distributed_backend='horovod' |
| 201 | + ) |
| 202 | + results = trainer.fit(model) |
| 203 | + assert results == 1 |
| 204 | + |
| 205 | + adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups][0] |
| 206 | + adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups][0] |
| 207 | + |
| 208 | + # Called ones after end of epoch with gamma=0.1 |
| 209 | + assert pytest.approx(init_lr * 0.1) == adjusted_lr1 |
| 210 | + |
| 211 | + # Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times with gamma=0.1 |
| 212 | + assert pytest.approx(init_lr * 0.1) == adjusted_lr2 |
0 commit comments