|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
| 3 | +import sys |
3 | 4 | import torch
|
4 | 5 | import torch.distributed as dist
|
5 | 6 | import torch.multiprocessing as mp
|
@@ -114,26 +115,44 @@ def _setup_ddp(rank, worldsize):
|
114 | 115 | dist.init_process_group("gloo", rank=rank, world_size=worldsize)
|
115 | 116 |
|
116 | 117 |
|
117 |
| -def _ddp_test_fn(rank, worldsize): |
| 118 | +def _ddp_test_fn(rank, worldsize, add_offset: bool, reduction_mean=False): |
118 | 119 | _setup_ddp(rank, worldsize)
|
119 |
| - tensor = torch.tensor([1.], device='cuda:0') |
120 |
| - |
121 |
| - reduced_tensor = _sync_ddp_if_available(tensor) |
| 120 | + if add_offset: |
| 121 | + tensor = torch.tensor([float(rank)]) |
| 122 | + else: |
| 123 | + tensor = torch.tensor([1.], ) |
| 124 | + if reduction_mean: |
| 125 | + reduced_tensor = _sync_ddp_if_available(tensor, reduce_op='avg') |
| 126 | + |
| 127 | + manual_reduction = sum([i for i in range(dist.get_world_size())]) / dist.get_world_size() |
| 128 | + print(reduced_tensor) |
| 129 | + print(manual_reduction) |
| 130 | + assert reduced_tensor.item() == manual_reduction |
| 131 | + else: |
| 132 | + reduced_tensor = _sync_ddp_if_available(tensor) |
122 | 133 |
|
123 |
| - assert reduced_tensor.item() == dist.get_world_size(), \ |
124 |
| - 'Sync-Reduce does not work properly with DDP and Tensors' |
| 134 | + assert reduced_tensor.item() == dist.get_world_size(), \ |
| 135 | + 'Sync-Reduce does not work properly with DDP and Tensors' |
125 | 136 |
|
126 | 137 |
|
127 |
| -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") |
| 138 | +@pytest.mark.skipif(sys.platform == "win32" , reason="DDP not available on windows") |
128 | 139 | def test_sync_reduce_ddp():
|
129 | 140 | """Make sure sync-reduce works with DDP"""
|
130 | 141 | tutils.reset_seed()
|
131 | 142 | tutils.set_random_master_port()
|
132 | 143 |
|
133 | 144 | worldsize = 2
|
134 |
| - mp.spawn(_ddp_test_fn, args=(worldsize,), nprocs=worldsize) |
| 145 | + mp.spawn(_ddp_test_fn, args=(worldsize, False), nprocs=worldsize) |
135 | 146 |
|
136 |
| - # dist.destroy_process_group() |
| 147 | + |
| 148 | +@pytest.mark.skipif(sys.platform == "win32" , reason="DDP not available on windows") |
| 149 | +def test_sync_reduce_ddp_mean(): |
| 150 | + """Make sure sync-reduce works with DDP""" |
| 151 | + tutils.reset_seed() |
| 152 | + tutils.set_random_master_port() |
| 153 | + |
| 154 | + worldsize = 2 |
| 155 | + mp.spawn(_ddp_test_fn, args=(worldsize, True, True), nprocs=worldsize) |
137 | 156 |
|
138 | 157 |
|
139 | 158 | def test_sync_reduce_simple():
|
@@ -172,7 +191,7 @@ def _ddp_test_tensor_metric(rank, worldsize):
|
172 | 191 | _test_tensor_metric(True)
|
173 | 192 |
|
174 | 193 |
|
175 |
| -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") |
| 194 | +@pytest.mark.skipif(sys.platform == "win32" , reason="DDP not available on windows") |
176 | 195 | def test_tensor_metric_ddp():
|
177 | 196 | tutils.reset_seed()
|
178 | 197 | tutils.set_random_master_port()
|
@@ -212,7 +231,7 @@ def _ddp_test_numpy_metric(rank, worldsize):
|
212 | 231 | _test_numpy_metric(True)
|
213 | 232 |
|
214 | 233 |
|
215 |
| -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") |
| 234 | +@pytest.mark.skipif(sys.platform == "win32" , reason="DDP not available on windows") |
216 | 235 | def test_numpy_metric_ddp():
|
217 | 236 | tutils.reset_seed()
|
218 | 237 | tutils.set_random_master_port()
|
|
0 commit comments