huggingface · sshleifer · Aug 6, 2020 · Jul 23, 2020 · Jul 23, 2020 · Jul 23, 2020
diff --git a/examples/lightning_base.py b/examples/lightning_base.py
@@ -56,7 +56,7 @@ def __init__(
         # self.save_hyperparameters()
         # can also expand arguments into trainer signature for easier reading
 
-        self.hparams = hparams
+        self.save_hyperparameters(hparams)
         self.step_count = 0
         self.tfmr_ckpts = {}
         self.output_dir = Path(self.hparams.output_dir)
@@ -194,7 +194,7 @@ def add_model_specific_args(parser, root_dir):
 
 class LoggingCallback(pl.Callback):
     def on_batch_end(self, trainer, pl_module):
-        lrs = {f"lr_group_{i}": lr for i, lr in enumerate(self.lr_scheduler.get_lr())}
+        lrs = {f"lr_group_{i}": param["lr"] for i, param in enumerate(pl_module.trainer.optimizers[0].param_groups)}
         pl_module.logger.log_metrics(lrs)
 
     def on_validation_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule):
@@ -227,6 +227,10 @@ def add_generic_args(parser, root_dir) -> None:
         help="The output directory where the model predictions and checkpoints will be written.",
     )
 
+    parser.add_argument(
+        "--gpus", default=0, type=int, help="The number of GPUs allocated for this, it is by default 0 meaning none",
+    )
+
     parser.add_argument(
         "--fp16",
         action="store_true",
@@ -240,7 +244,7 @@ def add_generic_args(parser, root_dir) -> None:
         help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
         "See details at https://nvidia.github.io/apex/amp.html",
     )
-    parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int, default=0)
+    parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int)
     parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm")
     parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
     parser.add_argument("--do_predict", action="store_true", help="Whether to run predictions on the test set.")

diff --git a/examples/text-classification/run_pl.sh b/examples/text-classification/run_pl.sh
@@ -23,7 +23,7 @@ mkdir -p $OUTPUT_DIR
 # Add parent directory to python path to access lightning_base.py
 export PYTHONPATH="../":"${PYTHONPATH}"
 
-python3 run_pl_glue.py --data_dir $DATA_DIR \
+python3 run_pl_glue.py --gpus 1 --data_dir $DATA_DIR \
 --task $TASK \
 --model_name_or_path $BERT_MODEL \
 --output_dir $OUTPUT_DIR \

diff --git a/examples/text-classification/run_pl_glue.py b/examples/text-classification/run_pl_glue.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import time
+from argparse import Namespace
 
 import numpy as np
 import torch
@@ -24,6 +25,8 @@ class GLUETransformer(BaseTransformer):
     mode = "sequence-classification"
 
     def __init__(self, hparams):
+        if type(hparams) == dict:
+            hparams = Namespace(**hparams)
         hparams.glue_output_mode = glue_output_modes[hparams.task]
         num_labels = glue_tasks_num_labels[hparams.task]
 
@@ -41,7 +44,8 @@ def training_step(self, batch, batch_idx):
         outputs = self(**inputs)
         loss = outputs[0]
 
-        tensorboard_logs = {"loss": loss, "rate": self.lr_scheduler.get_last_lr()[-1]}
+        # tensorboard_logs = {"loss": loss, "rate": self.lr_scheduler.get_last_lr()[-1]}
+        tensorboard_logs = {"loss": loss}
         return {"loss": loss, "log": tensorboard_logs}
 
     def prepare_data(self):
@@ -72,7 +76,7 @@ def prepare_data(self):
                 logger.info("Saving features into cached file %s", cached_features_file)
                 torch.save(features, cached_features_file)
 
-    def load_dataset(self, mode, batch_size):
+    def get_dataloader(self, mode: int, batch_size: int, shuffle: bool) -> DataLoader:
         "Load datasets. Called after prepare data."
 
         # We test on dev set to compare to benchmarks without having to submit to GLUE server