Lightning-AI
diff --git a/‎README.md
+10-10 b/‎README.md
+10-10
diff --git a/‎docs/source/intro.md
+2-2 b/‎docs/source/intro.md
+2-2
diff --git a/‎pl_examples/domain_templates/gan.py
+3-3 b/‎pl_examples/domain_templates/gan.py
+3-3
diff --git a/‎pl_examples/full_examples/imagenet/imagenet_example.py
+1-1 b/‎pl_examples/full_examples/imagenet/imagenet_example.py
+1-1
diff --git a/‎pl_examples/multi_node_examples/multi_node_ddp2_demo.py
+1-1 b/‎pl_examples/multi_node_examples/multi_node_ddp2_demo.py
+1-1
diff --git a/‎pl_examples/multi_node_examples/multi_node_ddp_demo.py
+1-1 b/‎pl_examples/multi_node_examples/multi_node_ddp_demo.py
+1-1
diff --git a/‎pytorch_lightning/core/__init__.py
+3-3 b/‎pytorch_lightning/core/__init__.py
+3-3
diff --git a/‎pytorch_lightning/core/lightning.py
+32-32 b/‎pytorch_lightning/core/lightning.py
+32-32
@@ -96,15 +96,15 @@ To use lightning do 2 things:
         def forward(self, x):
             return torch.relu(self.l1(x.view(x.size(0), -1)))
 
-        def training_step(self, batch, batch_nb):
+        def training_step(self, batch, batch_idx):
             # REQUIRED
             x, y = batch
             y_hat = self.forward(x)
             loss = F.cross_entropy(y_hat, y)
             tensorboard_logs = {'train_loss': loss}
             return {'loss': loss, 'log': tensorboard_logs}
 
-        def validation_step(self, batch, batch_nb):
+        def validation_step(self, batch, batch_idx):
             # OPTIONAL
             x, y = batch
             y_hat = self.forward(x)
@@ -154,16 +154,16 @@ use something other than tensorboard).
 Here are more advanced examples
 ```python   
 # train on cpu using only 10% of the data (for demo purposes)
-trainer = Trainer(max_nb_epochs=1, train_percent_check=0.1)
+trainer = Trainer(max_num_epochs=1, train_percent_check=0.1)
 
 # train on 4 gpus (lightning chooses GPUs for you)
-# trainer = Trainer(max_nb_epochs=1, gpus=4, distributed_backend='ddp')  
+# trainer = Trainer(max_num_epochs=1, gpus=4, distributed_backend='ddp')  
 
 # train on 4 gpus (you choose GPUs)
-# trainer = Trainer(max_nb_epochs=1, gpus=[0, 1, 3, 7], distributed_backend='ddp')   
+# trainer = Trainer(max_num_epochs=1, gpus=[0, 1, 3, 7], distributed_backend='ddp')   
 
 # train on 32 gpus across 4 nodes (make sure to submit appropriate SLURM job)
-# trainer = Trainer(max_nb_epochs=1, gpus=8, nb_gpu_nodes=4, distributed_backend='ddp')
+# trainer = Trainer(max_num_epochs=1, gpus=8, num_gpu_nodes=4, distributed_backend='ddp')
 
 # train (1 epoch only here for demo)
 trainer.fit(model)
@@ -187,10 +187,10 @@ You define the blue parts using the LightningModule interface:
 
 ```python
 # what to do in the training loop
-def training_step(self, batch, batch_nb):
+def training_step(self, batch, batch_idx):
 
 # what to do in the validation loop
-def validation_step(self, batch, batch_nb):
+def validation_step(self, batch, batch_idx):
 
 # how to aggregate validation_step outputs
 def validation_end(self, outputs):
@@ -205,7 +205,7 @@ def test_dataloader():
 
 ```python
 # define what happens for training here
-def training_step(self, batch, batch_nb):
+def training_step(self, batch, batch_idx):
     x, y = batch
 
     # define your own forward and loss calculation
@@ -232,7 +232,7 @@ def training_step(self, batch, batch_nb):
 
 ```python
 # define what happens for validation here
-def validation_step(self, batch, batch_nb):    
+def validation_step(self, batch, batch_idx):    
     x, y = batch
 
     # or as basic as a CNN classification
 
@@ -16,7 +16,7 @@ class BERT(pl.LightningModule):
         elif model_name == 'my_cool_version':
             self.net = MyCoolVersion()
 
-    def training_step(self, batch, batch_nb):
+    def training_step(self, batch, batch_idx):
         if self.task == 'standard_bert':
             # do standard bert training with self.net...
             # return loss
@@ -35,7 +35,7 @@ class CoolerNotBERT(pl.LightningModule):
     def __init__(self):
         self.net = ...
 
-    def training_step(self, batch, batch_nb):
+    def training_step(self, batch, batch_idx):
         # do some other cool task
         # return loss   
 ```   
 
@@ -90,12 +90,12 @@ def forward(self, z):
     def adversarial_loss(self, y_hat, y):
         return F.binary_cross_entropy(y_hat, y)
 
-    def training_step(self, batch, batch_nb, optimizer_i):
+    def training_step(self, batch, batch_idx, optimizer_idx):
         imgs, _ = batch
         self.last_imgs = imgs
 
         # train generator
-        if optimizer_i == 0:
+        if optimizer_idx == 0:
             # sample noise
             z = torch.randn(imgs.shape[0], self.hparams.latent_dim)
 
@@ -125,7 +125,7 @@ def training_step(self, batch, batch_nb, optimizer_i):
             return output
 
         # train discriminator
-        if optimizer_i == 1:
+        if optimizer_idx == 1:
             # Measure discriminator's ability to classify real from generated samples
 
             # how well can it label as real?
 
@@ -234,7 +234,7 @@ def main(hparams):
     trainer = pl.Trainer(
         default_save_path=hparams.save_path,
         gpus=hparams.gpus,
-        max_nb_epochs=hparams.epochs,
+        max_num_epochs=hparams.epochs,
         distributed_backend=hparams.distributed_backend,
         use_amp=hparams.use_16bit
     )
 
@@ -31,7 +31,7 @@ def main(hparams):
     # ------------------------
     trainer = Trainer(
         gpus=2,
-        nb_gpu_nodes=2,
+        num_nodes=2,
         distributed_backend='ddp2'
     )
 
 
@@ -31,7 +31,7 @@ def main(hparams):
     # ------------------------
     trainer = Trainer(
         gpus=2,
-        nb_gpu_nodes=2,
+        num_nodes=2,
         distributed_backend='ddp'
     )
 
 
@@ -34,13 +34,13 @@ def __init__(self):
         def forward(self, x):
             return torch.relu(self.l1(x.view(x.size(0), -1)))
 
-        def training_step(self, batch, batch_nb):
+        def training_step(self, batch, batch_idx):
             # REQUIRED
             x, y = batch
             y_hat = self.forward(x)
             return {'loss': F.cross_entropy(y_hat, y)}
 
-        def validation_step(self, batch, batch_nb):
+        def validation_step(self, batch, batch_idx):
             # OPTIONAL
             x, y = batch
             y_hat = self.forward(x)
@@ -51,7 +51,7 @@ def validation_end(self, outputs):
             avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
             return {'avg_val_loss': avg_loss}
 
-        def test_step(self, batch, batch_nb):
+        def test_step(self, batch, batch_idx):
             # OPTIONAL
             x, y = batch
             y_hat = self.forward(x)
 
@@ -109,7 +109,7 @@ def training_step(self, *args, **kwargs):
         """return loss, dict with metrics for tqdm
 
         :param batch: The output of your dataloader. A tensor, tuple or list
-        :param int batch_nb: Integer displaying which batch this is
+        :param int batch_idx: Integer displaying which batch this is
         :return: dict with loss key and optional log, progress keys
          if implementing training_step, return whatever you need in that step:
             - loss -> tensor scalar [REQUIRED]
@@ -124,7 +124,7 @@ def training_step(self, *args, **kwargs):
 
         .. code-block:: python
 
-            def training_step(self, batch, batch_nb):
+            def training_step(self, batch, batch_idx):
                 x, y, z = batch
 
                 # implement your own
@@ -150,7 +150,7 @@ def training_step(self, batch, batch_nb):
         .. code-block:: python
 
             # Multiple optimizers (ie: GANs)
-            def training_step(self, batch, batch_nb, optimizer_idx):
+            def training_step(self, batch, batch_idx, optimizer_idx):
                 if optimizer_idx == 0:
                     # do training_step with encoder
                 if optimizer_idx == 1:
@@ -163,7 +163,7 @@ def training_step(self, batch, batch_nb, optimizer_idx):
         .. code-block:: python
 
             # Truncated back-propagation through time
-            def training_step(self, batch, batch_nb, hiddens):
+            def training_step(self, batch, batch_idx, hiddens):
                 # hiddens are the hiddens from the previous truncated backprop step
 
         You can also return a -1 instead of a dict to stop the current loop. This is useful
@@ -192,9 +192,9 @@ def training_end(self, *args, **kwargs):
         .. code-block:: python
 
             # WITHOUT training_end
-            # if used in DP or DDP2, this batch is 1/nb_gpus large
-            def training_step(self, batch, batch_nb):
-                # batch is 1/nb_gpus big
+            # if used in DP or DDP2, this batch is 1/num_gpus large
+            def training_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
                 x, y = batch
 
                 out = self.forward(x)
@@ -204,8 +204,8 @@ def training_step(self, batch, batch_nb):
 
             # --------------
             # with training_end to do softmax over the full batch
-            def training_step(self, batch, batch_nb):
-                # batch is 1/nb_gpus big
+            def training_step(self, batch, batch_idx):
+                # batch is 1/num_gpus big
                 x, y = batch
 
                 out = self.forward(x)
@@ -225,7 +225,7 @@ def training_end(self, outputs):
         .. code-block:: python
 
             # Multiple optimizers (ie: GANs)
-            def training_step(self, batch, batch_nb, optimizer_idx):
+            def training_step(self, batch, batch_idx, optimizer_idx):
                 if optimizer_idx == 0:
                     # do training_step with encoder
                 if optimizer_idx == 1:
@@ -237,7 +237,7 @@ def training_step(self, batch, batch_nb, optimizer_idx):
         .. code-block:: python
 
             # Truncated back-propagation through time
-            def training_step(self, batch, batch_nb, hiddens):
+            def training_step(self, batch, batch_idx, hiddens):
                 # hiddens are the hiddens from the previous truncated backprop step
 
         You can also return a -1 instead of a dict to stop the current loop. This is useful if you want to
@@ -249,17 +249,17 @@ def validation_step(self, *args, **kwargs):
         """return whatever outputs will need to be aggregated in validation_end
 
         :param batch: The output of your dataloader. A tensor, tuple or list
-        :param int batch_nb: Integer displaying which batch this is
+        :param int batch_idx: Integer displaying which batch this is
         :param int dataloader_idx: Integer displaying which dataloader this is (only if multiple val datasets used)
         :return dict: Dict or OrderedDict - passed to the validation_end step
 
         .. code-block:: python
 
             # if you have one val dataloader:
-            def validation_step(self, batch, batch_nb)
+            def validation_step(self, batch, batch_idx)
 
             # if you have multiple val dataloaders:
-            def validation_step(self, batch, batch_nb, dataloader_idxdx)
+            def validation_step(self, batch, batch_idx, dataloader_idxdx)
 
         If you don't need to validate you don't need to implement this method.
          In this step you'd normally generate examples or calculate anything of interest such as accuracy.
@@ -275,7 +275,7 @@ def validation_step(self, batch, batch_nb, dataloader_idxdx)
         .. code-block:: python
 
             # CASE 1: A single validation dataset
-            def validation_step(self, batch, batch_nb):
+            def validation_step(self, batch, batch_idx):
                 x, y = batch
 
                 # implement your own
@@ -307,7 +307,7 @@ def validation_step(self, batch, batch_nb):
         .. code-block:: python
 
             # CASE 2: multiple validation datasets
-            def validation_step(self, batch, batch_nb, dataset_idx):
+            def validation_step(self, batch, batch_idx, dataset_idx):
                 # dataset_idx tells you which dataset this is.
 
         The `dataset_idx` corresponds to the order of datasets returned in `val_dataloader`.
@@ -318,17 +318,17 @@ def test_step(self, *args, **kwargs):
         """return whatever outputs will need to be aggregated in test_end
 
         :param batch: The output of your dataloader. A tensor, tuple or list
-        :param int batch_nb: Integer displaying which batch this is
+        :param int batch_idx: Integer displaying which batch this is
         :param int dataloader_idx: Integer displaying which dataloader this is (only if multiple test datasets used)
         :return dict: Dict or OrderedDict with metrics to display in progress bar. All keys must be tensors.
 
         .. code-block:: python
 
             # if you have one test dataloader:
-            def test_step(self, batch, batch_nb)
+            def test_step(self, batch, batch_idx)
 
             # if you have multiple test dataloaders:
-            def test_step(self, batch, batch_nb, dataloader_idxdx)
+            def test_step(self, batch, batch_idx, dataloader_idxdx)
 
 
         **OPTIONAL**
@@ -348,7 +348,7 @@ def test_step(self, batch, batch_nb, dataloader_idxdx)
         .. code-block:: python
 
             # CASE 1: A single test dataset
-            def test_step(self, batch, batch_nb):
+            def test_step(self, batch, batch_idx):
                 x, y = batch
 
                 # implement your own
@@ -375,7 +375,7 @@ def test_step(self, batch, batch_nb):
         .. code-block:: python
 
             # CASE 2: multiple test datasets
-            def test_step(self, batch, batch_nb, dataset_idx):
+            def test_step(self, batch, batch_idx, dataset_idx):
                 # dataset_idx tells you which dataset this is.
 
 
@@ -694,13 +694,13 @@ def configure_optimizers(self):
         """
         raise NotImplementedError
 
-    def optimizer_step(self, epoch_nb, batch_nb, optimizer, optimizer_i, second_order_closure=None):
+    def optimizer_step(self, epoch_idx, batch_idx, optimizer, optimizer_idx, second_order_closure=None):
         """Do something instead of the standard optimizer behavior
 
-        :param int epoch_nb:
-        :param int batch_nb:
+        :param int epoch_idx:
+        :param int batch_idx:
         :param optimizer:
-        :param optimizer_i:
+        :param optimizer_idx:
         :param second_order_closure: closure for second order methods
         :return:
 
@@ -712,21 +712,21 @@ def optimizer_step(self, epoch_nb, batch_nb, optimizer, optimizer_i, second_orde
         .. code-block:: python
 
             # DEFAULT
-            def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure=None):
+            def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):
                 optimizer.step()
                 optimizer.zero_grad()
 
             # Alternating schedule for optimizer steps (ie: GANs)
-            def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure=None):
+            def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):
                 # update generator opt every 2 steps
-                if optimizer_i == 0:
-                    if batch_nb % 2 == 0 :
+                if optimizer_idx == 0:
+                    if batch_idx % 2 == 0 :
                         optimizer.step()
                         optimizer.zero_grad()
 
                 # update discriminator opt every 4 steps
-                if optimizer_i == 1:
-                    if batch_nb % 4 == 0 :
+                if optimizer_idx == 1:
+                    if batch_idx % 4 == 0 :
                         optimizer.step()
                         optimizer.zero_grad()
 
@@ -739,7 +739,7 @@ def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second
         .. code-block:: python
 
             # learning rate warm-up
-            def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure=None):
+            def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):
                 # warm up lr
                 if self.trainer.global_step < 500:
                     lr_scale = min(1., float(self.trainer.global_step + 1) / 500.)
Original file line number	Diff line number	Diff line change
`@@ -234,7 +234,7 @@ def main(hparams):`
`234`	`234`	`trainer = pl.Trainer(`
`235`	`235`	`default_save_path=hparams.save_path,`
`236`	`236`	`gpus=hparams.gpus,`
`237`		`- max_nb_epochs=hparams.epochs,`
	`237`	`+ max_num_epochs=hparams.epochs,`
`238`	`238`	`distributed_backend=hparams.distributed_backend,`
`239`	`239`	`use_amp=hparams.use_16bit`
`240`	`240`	`)`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ def main(hparams):`
`31`	`31`	`# ------------------------`
`32`	`32`	`trainer = Trainer(`
`33`	`33`	`gpus=2,`
`34`		`- nb_gpu_nodes=2,`
	`34`	`+ num_nodes=2,`
`35`	`35`	`distributed_backend='ddp2'`
`36`	`36`	`)`
`37`	`37`