From 1fced53fb0f2c4ef7d7d03b3533849da3304ad07 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:00:13 -0400 Subject: [PATCH 001/100] remove the need for hparams --- pytorch_lightning/core/lightning.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 662c05a29338d..8101852955b09 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1757,3 +1757,28 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]: rank_zero_warn("`get_tqdm_dict` was renamed to `get_progress_bar_dict` in v0.7.3" " and this method will be removed in v1.0.0", DeprecationWarning) return self.get_progress_bar_dict() + + def _auto_register_hparams(self): + # two frames back is the init of the child module + frame = inspect.currentframe() + args = frame.f_back.f_back.f_locals + + # we'll save hparams automatically (renamed to module_arguments) + module_arguments = {} + + # pull out the child itself to make sure we have no issues + child = args['self'] + + # auto set the attr which enables self.attr anywhere in the code + for name, value in args.items(): + + # don't add self + if name not in ['self']: + + # don't overwrite something already set + if not hasattr(child, name): + setattr(child, name, value) + module_arguments[name] = value + + # set module_arguments in child + setattr(child, 'module_arguments', module_arguments) From 7fe5f130cfe79078ff12f81289b50a9ecc8edc4d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:02:11 -0400 Subject: [PATCH 002/100] remove the need for hparams --- pytorch_lightning/core/lightning.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 8101852955b09..893cde888ff78 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1759,6 +1759,15 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]: return self.get_progress_bar_dict() def _auto_register_hparams(self): + """ + Removes the need to pass in hparams. Instead, we register every argument in init + to the module with some caveats: + 1. we don't overwrite the property if it already exists + 2. we also store a module_arguments property for model loading and saving + + Returns: + + """ # two frames back is the init of the child module frame = inspect.currentframe() args = frame.f_back.f_back.f_locals From 0283055a2088bb2ee073db038b6031c3ad7f1867 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:02:49 -0400 Subject: [PATCH 003/100] remove the need for hparams --- pytorch_lightning/core/lightning.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 893cde888ff78..227046c652360 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -73,6 +73,9 @@ def __init__(self, *args, **kwargs): #: device reference self._device = torch.device('cpu') + # register all params passed into the child module in __init__ + self._auto_register_hparams() + @property def on_gpu(self): """ From 599c9ad5be3172972ae7c3b298fdb5cfc6d9ff09 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:04:20 -0400 Subject: [PATCH 004/100] remove the need for hparams --- pytorch_lightning/core/lightning.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 227046c652360..ba0f8aef9c66f 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1767,9 +1767,6 @@ def _auto_register_hparams(self): to the module with some caveats: 1. we don't overwrite the property if it already exists 2. we also store a module_arguments property for model loading and saving - - Returns: - """ # two frames back is the init of the child module frame = inspect.currentframe() From 32c74355b428801b3cc99d761259a0b8cae176a0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:08:23 -0400 Subject: [PATCH 005/100] replace self.hparams --- docs/source/hyperparameters.rst | 8 ++-- docs/source/lr_finder.rst | 14 +++---- docs/source/optimizers.rst | 2 +- docs/source/training_tricks.rst | 2 +- docs/source/weights_loading.rst | 4 +- .../computer_vision_fine_tuning.py | 36 ++++++++--------- .../generative_adversarial_net.py | 14 +++---- pl_examples/domain_templates/imagenet.py | 18 ++++----- .../domain_templates/reinforce_learn_Qnet.py | 22 +++++------ .../domain_templates/semantic_segmentation.py | 2 +- pl_examples/models/lightning_template.py | 28 ++++++------- pytorch_lightning/core/lightning.py | 12 +++--- pytorch_lightning/loggers/tensorboard.py | 6 +-- pytorch_lightning/profiler/__init__.py | 2 +- pytorch_lightning/trainer/trainer.py | 39 ++++++------------- tests/base/model_optimizers.py | 24 ++++++------ tests/base/model_template.py | 32 +++++++++------ tests/base/model_utilities.py | 4 +- tests/base/models.py | 10 ++--- tests/loggers/test_base.py | 4 +- tests/trainer/test_dataloaders.py | 2 +- tests/trainer/test_trainer.py | 4 ++ 22 files changed, 143 insertions(+), 146 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 5b2dd343fb622..fe3d94b8cc668 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -109,17 +109,17 @@ modify the network and read those values in the LightningModule super().__init__() # do this to save all arguments in any logger (tensorboard) - self.hparams = hparams + self = hparams self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) self.layer_2 = torch.nn.Linear(hparams.layer_1_dim, hparams.layer_2_dim) self.layer_3 = torch.nn.Linear(hparams.layer_2_dim, 10) def train_dataloader(self): - return DataLoader(mnist_train, batch_size=self.hparams.batch_size) + return DataLoader(mnist_train, batch_size=self.batch_size) def configure_optimizers(self): - return Adam(self.parameters(), lr=self.hparams.learning_rate) + return Adam(self.parameters(), lr=self.learning_rate) @staticmethod def add_model_specific_args(parent_parser): @@ -139,7 +139,7 @@ Now pass in the params when you init your model hparams = parser.parse_args() model = LitMNIST(hparams) -The line `self.hparams = hparams` is very special. This line assigns your hparams to the LightningModule. +The line `self = hparams` is very special. This line assigns your hparams to the LightningModule. This does two things: 1. It adds them automatically to TensorBoard logs under the hparams tab. diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index b92088fcae2f8..f426438f4d55b 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -22,19 +22,19 @@ Warnings: - For the moment, this feature only works with models having a single optimizer. - LR support for DDP is not implemented yet, it is comming soon. -Using Lightning's built-in LR finder -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Using Lightnings build-in LR finder +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the most basic use case, this feature can be enabled during trainer construction -with ``Trainer(auto_lr_find=True)``. When ``.fit(model)`` is called, the LR finder +with ``Trainer(auto_lr_find=True)``. When ``.fit(model)`` is called, the lr finder will automatically be run before any training is done. The ``lr`` that is found and used will be written to the console and logged together with all other hyperparameters of the model. .. testcode:: - # default: no automatic learning rate finder - trainer = Trainer(auto_lr_find=False) + # default, no automatic learning rate finder + trainer = Trainer(auto_lr_find=True) When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate. In both cases, if the respective fields are not found, an error will be thrown. @@ -44,10 +44,10 @@ In both cases, if the respective fields are not found, an error will be thrown. class LitModel(LightningModule): def __init__(self, hparams): - self.hparams = hparams + self = hparams def configure_optimizers(self): - return Adam(self.parameters(), lr=self.hparams.lr|self.hparams.learning_rate) + return Adam(self.parameters(), lr=self.lr|self.learning_rate) # finds learning rate automatically # sets hparams.lr or hparams.learning_rate to that learning rate diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 8f8715a09e7b3..7f5a56f1a5994 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -112,7 +112,7 @@ Here we add a learning-rate warm up if self.trainer.global_step < 500: lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) for pg in optimizer.param_groups: - pg['lr'] = lr_scale * self.hparams.learning_rate + pg['lr'] = lr_scale * self.learning_rate # update params optimizer.step() diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst index b748465eec014..53cb95bf9f029 100644 --- a/docs/source/training_tricks.rst +++ b/docs/source/training_tricks.rst @@ -67,7 +67,7 @@ a binary search. .. code-block:: python def train_dataloader(self): - return DataLoader(train_dataset, batch_size=self.hparams.batch_size) + return DataLoader(train_dataset, batch_size=self.batch_size) .. warning:: diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 64a6950738ef1..d99db851ceb2e 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -76,7 +76,7 @@ The Lightning checkpoint also saves the hparams (hyperparams) passed into the Li class MyLightningModule(LightningModule): def __init__(self, hparams, *args, **kwargs): - self.hparams = hparams + self = hparams Manual saving ^^^^^^^^^^^^^ @@ -107,7 +107,7 @@ The above only works if you used `hparams` in your model definition class LitModel(LightningModule): def __init__(self, hparams): - self.hparams = hparams + self = hparams self.l1 = nn.Linear(hparams.in_dim, hparams.out_dim) But if you don't and instead pass individual parameters diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 42a0a936d9e34..6c69f4956e350 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -151,7 +151,7 @@ def __init__(self, hparams: argparse.Namespace, dl_path: Union[str, Path]) -> None: super().__init__() - self.hparams = hparams + self = hparams self.dl_path = dl_path self.__build_model() @@ -159,12 +159,12 @@ def __build_model(self): """Define model layers & loss.""" # 1. Load pre-trained network: - model_func = getattr(models, self.hparams.backbone) + model_func = getattr(models, self.backbone) backbone = model_func(pretrained=True) _layers = list(backbone.children())[:-1] self.feature_extractor = torch.nn.Sequential(*_layers) - freeze(module=self.feature_extractor, train_bn=self.hparams.train_bn) + freeze(module=self.feature_extractor, train_bn=self.train_bn) # 2. Classifier: _fc_layers = [torch.nn.Linear(2048, 256), @@ -194,29 +194,29 @@ def train(self, mode=True): super().train(mode=mode) epoch = self.current_epoch - if epoch < self.hparams.milestones[0] and mode: + if epoch < self.milestones[0] and mode: # feature extractor is frozen (except for BatchNorm layers) freeze(module=self.feature_extractor, - train_bn=self.hparams.train_bn) + train_bn=self.train_bn) - elif self.hparams.milestones[0] <= epoch < self.hparams.milestones[1] and mode: + elif self.milestones[0] <= epoch < self.milestones[1] and mode: # Unfreeze last two layers of the feature extractor freeze(module=self.feature_extractor, n=-2, - train_bn=self.hparams.train_bn) + train_bn=self.train_bn) def on_epoch_start(self): """Use `on_epoch_start` to unfreeze layers progressively.""" optimizer = self.trainer.optimizers[0] - if self.current_epoch == self.hparams.milestones[0]: + if self.current_epoch == self.milestones[0]: _unfreeze_and_add_param_group(module=self.feature_extractor[-2:], optimizer=optimizer, - train_bn=self.hparams.train_bn) + train_bn=self.train_bn) - elif self.current_epoch == self.hparams.milestones[1]: + elif self.current_epoch == self.milestones[1]: _unfreeze_and_add_param_group(module=self.feature_extractor[:-2], optimizer=optimizer, - train_bn=self.hparams.train_bn) + train_bn=self.train_bn) def training_step(self, batch, batch_idx): @@ -246,7 +246,7 @@ def training_epoch_end(self, outputs): for output in outputs]).mean() train_acc_mean = torch.stack([output['num_correct'] for output in outputs]).sum().float() - train_acc_mean /= (len(outputs) * self.hparams.batch_size) + train_acc_mean /= (len(outputs) * self.batch_size) return {'log': {'train_loss': train_loss_mean, 'train_acc': train_acc_mean, 'step': self.current_epoch}} @@ -273,7 +273,7 @@ def validation_epoch_end(self, outputs): for output in outputs]).mean() val_acc_mean = torch.stack([output['num_correct'] for output in outputs]).sum().float() - val_acc_mean /= (len(outputs) * self.hparams.batch_size) + val_acc_mean /= (len(outputs) * self.batch_size) return {'log': {'val_loss': val_loss_mean, 'val_acc': val_acc_mean, 'step': self.current_epoch}} @@ -281,11 +281,11 @@ def validation_epoch_end(self, outputs): def configure_optimizers(self): optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), - lr=self.hparams.lr) + lr=self.lr) scheduler = MultiStepLR(optimizer, - milestones=self.hparams.milestones, - gamma=self.hparams.lr_scheduler_gamma) + milestones=self.milestones, + gamma=self.lr_scheduler_gamma) return [optimizer], [scheduler] @@ -326,8 +326,8 @@ def __dataloader(self, train): _dataset = self.train_dataset if train else self.valid_dataset loader = DataLoader(dataset=_dataset, - batch_size=self.hparams.batch_size, - num_workers=self.hparams.num_workers, + batch_size=self.batch_size, + num_workers=self.num_workers, shuffle=True if train else False) return loader diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 99a57f1a0b96a..e8467739635b8 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -74,7 +74,7 @@ class GAN(LightningModule): def __init__(self, hparams): super().__init__() - self.hparams = hparams + self = hparams # networks mnist_shape = (1, 28, 28) @@ -98,7 +98,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): # train generator if optimizer_idx == 0: # sample noise - z = torch.randn(imgs.shape[0], self.hparams.latent_dim) + z = torch.randn(imgs.shape[0], self.latent_dim) z = z.type_as(imgs) # generate images @@ -152,9 +152,9 @@ def training_step(self, batch, batch_idx, optimizer_idx): return output def configure_optimizers(self): - lr = self.hparams.lr - b1 = self.hparams.b1 - b2 = self.hparams.b2 + lr = self.lr + b1 = self.b1 + b2 = self.b2 opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2)) opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2)) @@ -164,10 +164,10 @@ def train_dataloader(self): transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) dataset = MNIST(os.getcwd(), train=True, download=True, transform=transform) - return DataLoader(dataset, batch_size=self.hparams.batch_size) + return DataLoader(dataset, batch_size=self.batch_size) def on_epoch_end(self): - z = torch.randn(8, self.hparams.latent_dim) + z = torch.randn(8, self.latent_dim) z = z.type_as(self.last_imgs) # log sampled images diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index c274cec90ddbb..46162676718b3 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -34,8 +34,8 @@ def __init__(self, hparams): TODO: add docstring here """ super().__init__() - self.hparams = hparams - self.model = models.__dict__[self.hparams.arch](pretrained=self.hparams.pretrained) + self = hparams + self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): return self.model(x) @@ -112,9 +112,9 @@ def __accuracy(cls, output, target, topk=(1,)): def configure_optimizers(self): optimizer = optim.SGD( self.parameters(), - lr=self.hparams.lr, - momentum=self.hparams.momentum, - weight_decay=self.hparams.weight_decay + lr=self.lr, + momentum=self.momentum, + weight_decay=self.weight_decay ) scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.1) return [optimizer], [scheduler] @@ -125,7 +125,7 @@ def train_dataloader(self): std=[0.229, 0.224, 0.225], ) - train_dir = os.path.join(self.hparams.data_path, 'train') + train_dir = os.path.join(self.data_path, 'train') train_dataset = datasets.ImageFolder( train_dir, transforms.Compose([ @@ -142,7 +142,7 @@ def train_dataloader(self): train_loader = torch.utils.data.DataLoader( dataset=train_dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, shuffle=(train_sampler is None), num_workers=0, sampler=train_sampler @@ -154,7 +154,7 @@ def val_dataloader(self): mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ) - val_dir = os.path.join(self.hparams.data_path, 'val') + val_dir = os.path.join(self.data_path, 'val') val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(val_dir, transforms.Compose([ transforms.Resize(256), @@ -162,7 +162,7 @@ def val_dataloader(self): transforms.ToTensor(), normalize, ])), - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, shuffle=False, num_workers=0, ) diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index ff3f634da7817..10600047517b3 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -192,20 +192,20 @@ class DQNLightning(pl.LightningModule): def __init__(self, hparams: argparse.Namespace) -> None: super().__init__() - self.hparams = hparams + self = hparams - self.env = gym.make(self.hparams.env) + self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] n_actions = self.env.action_space.n self.net = DQN(obs_size, n_actions) self.target_net = DQN(obs_size, n_actions) - self.buffer = ReplayBuffer(self.hparams.replay_size) + self.buffer = ReplayBuffer(self.replay_size) self.agent = Agent(self.env, self.buffer) self.total_reward = 0 self.episode_reward = 0 - self.populate(self.hparams.warm_start_steps) + self.populate(self.warm_start_steps) def populate(self, steps: int = 1000) -> None: """ @@ -250,7 +250,7 @@ def dqn_mse_loss(self, batch: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor next_state_values[dones] = 0.0 next_state_values = next_state_values.detach() - expected_state_action_values = next_state_values * self.hparams.gamma + rewards + expected_state_action_values = next_state_values * self.gamma + rewards return nn.MSELoss()(state_action_values, expected_state_action_values) @@ -267,8 +267,8 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> O Training loss and log metrics """ device = self.get_device(batch) - epsilon = max(self.hparams.eps_end, self.hparams.eps_start - - self.global_step + 1 / self.hparams.eps_last_frame) + epsilon = max(self.eps_end, self.eps_start - + self.global_step + 1 / self.eps_last_frame) # step through environment with agent reward, done = self.agent.play_step(self.net, epsilon, device) @@ -282,7 +282,7 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> O self.episode_reward = 0 # Soft update of target network - if self.global_step % self.hparams.sync_rate == 0: + if self.global_step % self.sync_rate == 0: self.target_net.load_state_dict(self.net.state_dict()) log = {'total_reward': torch.tensor(self.total_reward).to(device), @@ -293,14 +293,14 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> O def configure_optimizers(self) -> List[Optimizer]: """Initialize Adam optimizer""" - optimizer = optim.Adam(self.net.parameters(), lr=self.hparams.lr) + optimizer = optim.Adam(self.net.parameters(), lr=self.lr) return [optimizer] def __dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" - dataset = RLDataset(self.buffer, self.hparams.episode_length) + dataset = RLDataset(self.buffer, self.episode_length) dataloader = DataLoader(dataset=dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, sampler=None ) return dataloader diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 9d98c799a7283..8a96bcd861037 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -130,7 +130,7 @@ class SegModel(pl.LightningModule): def __init__(self, hparams): super().__init__() - self.hparams = hparams + self = hparams self.data_path = hparams.data_path self.batch_size = hparams.batch_size self.learning_rate = hparams.lr diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index 13b3bc67a912b..3e8ed72f1ca6e 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -45,14 +45,14 @@ def __init__(self, hparams): """ # init superclass super().__init__() - self.hparams = hparams - self.c_d1 = nn.Linear(in_features=self.hparams.in_features, - out_features=self.hparams.hidden_dim) - self.c_d1_bn = nn.BatchNorm1d(self.hparams.hidden_dim) - self.c_d1_drop = nn.Dropout(self.hparams.drop_prob) + self = hparams + self.c_d1 = nn.Linear(in_features=self.in_features, + out_features=self.hidden_dim) + self.c_d1_bn = nn.BatchNorm1d(self.hidden_dim) + self.c_d1_drop = nn.Dropout(self.drop_prob) - self.c_d2 = nn.Linear(in_features=self.hparams.hidden_dim, - out_features=self.hparams.out_features) + self.c_d2 = nn.Linear(in_features=self.hidden_dim, + out_features=self.out_features) def forward(self, x): """ @@ -122,32 +122,32 @@ def configure_optimizers(self): Return whatever optimizers and learning rate schedulers you want here. At least one optimizer is required. """ - optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) return [optimizer], [scheduler] def prepare_data(self): transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))]) - self.mnist_train = MNIST(self.hparams.data_root, train=True, download=True, transform=transform) - self.mnist_test = MNIST(self.hparams.data_root, train=False, download=True, transform=transform) + self.mnist_train = MNIST(self.data_root, train=True, download=True, transform=transform) + self.mnist_test = MNIST(self.data_root, train=False, download=True, transform=transform) def train_dataloader(self): log.info('Training data loader called.') - return DataLoader(self.mnist_train, batch_size=self.hparams.batch_size, num_workers=4) + return DataLoader(self.mnist_train, batch_size=self.batch_size, num_workers=4) def val_dataloader(self): log.info('Validation data loader called.') - return DataLoader(self.mnist_test, batch_size=self.hparams.batch_size, num_workers=4) + return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=4) def test_dataloader(self): log.info('Test data loader called.') - return DataLoader(self.mnist_test, batch_size=self.hparams.batch_size, num_workers=4) + return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=4) @staticmethod def add_model_specific_args(parent_parser, root_dir): # pragma: no-cover """ - Parameters you define here will be available to your model through `self.hparams`. + Parameters you define here will be available to your model through `self`. """ parser = ArgumentParser(parents=[parent_parser]) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ba0f8aef9c66f..d2a06d13f6082 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -65,7 +65,7 @@ def __init__(self, *args, **kwargs): #: True if using amp self.use_amp = False - self.hparams = None + self = None #: Current dtype self._dtype = torch.float @@ -1161,7 +1161,7 @@ def optimizer_step(self, current_epoch, batch_idx, optimizer, if self.trainer.global_step < 500: lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) for pg in optimizer.param_groups: - pg['lr'] = lr_scale * self.hparams.learning_rate + pg['lr'] = lr_scale * self.learning_rate # update params optimizer.step() @@ -1315,7 +1315,7 @@ def train_dataloader(self): download=True) loader = torch.utils.data.DataLoader( dataset=dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, shuffle=True ) return loader @@ -1366,7 +1366,7 @@ def test_dataloader(self): download=True) loader = torch.utils.data.DataLoader( dataset=dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, shuffle=False ) @@ -1411,7 +1411,7 @@ def val_dataloader(self): transform=transform, download=True) loader = torch.utils.data.DataLoader( dataset=dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, shuffle=False ) @@ -1622,7 +1622,7 @@ def _load_model_state(cls, checkpoint: Dict[str, Any], *args, **kwargs) -> 'Ligh rank_zero_warn( f"Checkpoint does not contain hyperparameters but {cls.__name__}'s __init__" " contains argument 'hparams'. Will pass in an empty Namespace instead." - " Did you forget to store your model hyperparameters in self.hparams?" + " Did you forget to store your model hyperparameters in self?" ) hparams = {} else: # The user's LightningModule does not define a hparams argument diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py index 62965d783231c..0b6b53a22c26b 100644 --- a/pytorch_lightning/loggers/tensorboard.py +++ b/pytorch_lightning/loggers/tensorboard.py @@ -56,7 +56,7 @@ def __init__(self, self._version = version self._experiment = None - self.hparams = {} + self = {} self._kwargs = kwargs @property @@ -107,7 +107,7 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], params = self._convert_params(params) # store params to output - self.hparams.update(params) + self.update(params) # format params into the suitable for tensorboard params = self._flatten_dict(params) @@ -158,7 +158,7 @@ def save(self) -> None: hparams_file = os.path.join(dir_path, self.NAME_HPARAMS_FILE) # save the metatags file - save_hparams_to_yaml(hparams_file, self.hparams) + save_hparams_to_yaml(hparams_file, self) @rank_zero_only def finalize(self, status: str) -> None: diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 683baccafa858..85d531fdaccc1 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -99,7 +99,7 @@ class MyModel(LightningModule): def __init__(self, hparams, profiler=None): - self.hparams = hparams + self = hparams self.profiler = profiler or PassThroughProfiler() def custom_processing_step(self, data): diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index e4eae0dedf143..e4a22e6de3589 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -35,6 +35,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities import rank_zero_warn, parsing + try: from apex import amp except ImportError: @@ -81,7 +82,7 @@ class Trainer( 'gradient_clip', 'nb_gpu_nodes', 'max_nb_epochs', 'min_nb_epochs', 'add_row_log_interval', 'nb_sanity_val_steps', 'tng_tqdm_dic', ) - DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') + DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict') def __init__( self, @@ -96,7 +97,7 @@ def __init__( num_processes: int = 1, gpus: Optional[Union[List[int], str, int]] = None, auto_select_gpus: bool = False, - tpu_cores: Optional[Union[List[int], int]] = None, + num_tpu_cores: Optional[int] = None, log_gpu_memory: Optional[str] = None, progress_bar_refresh_rate: int = 1, overfit_pct: float = 0.0, @@ -132,7 +133,6 @@ def __init__( progress_bar_callback: Optional[Union[ProgressBarBase, bool]] = True, terminate_on_nan: bool = False, auto_scale_batch_size: Union[str, bool] = False, - num_tpu_cores: Optional[int] = None, # backward compatible, todo: remove in v0.9.0 amp_level: str = 'O1', # backward compatible, todo: remove in v0.8.0 default_save_path=None, # backward compatible, todo: remove in v0.8.0 gradient_clip=None, # backward compatible, todo: remove in v0.8.0 @@ -188,10 +188,7 @@ def __init__( GPUs are configured to be in "exclusive mode", such that only one process at a time can access them. - tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1] - - num_tpu_cores: How many TPU cores to train on (1 or 8) - .. warning:: .. deprecated:: 0.7.6. Will remove 0.9.0. + num_tpu_cores: How many TPU cores to train on (1 or 8). log_gpu_memory: None, 'min_max', 'all'. Might slow performance @@ -288,7 +285,7 @@ def __init__( auto_lr_find: If set to True, will `initially` run a learning rate finder, trying to optimize initial learning for faster convergence. Sets learning - rate in self.hparams.lr | self.hparams.learning_rate in the lightning module. + rate in self.lr | self.learning_rate in the lightning module. To use a different key, set a string instead of True with the key name. replace_sampler_ddp: Explicitly enables or disables sampler replacement. @@ -303,7 +300,7 @@ def __init__( auto_scale_batch_size: If set to True, will `initially` run a batch size finder trying to find the largest batch size that fits into memory. - The result will be stored in self.hparams.batch_size in the LightningModule. + The result will be stored in self.batch_size in the LightningModule. Additionally, can be set to either `power` that estimates the batch size through a power search or `binsearch` that estimates the batch size through a binary search. """ @@ -345,19 +342,9 @@ def __init__( self.on_gpu = True if (gpus and torch.cuda.is_available()) else False # tpu config - if num_tpu_cores is not None: - rank_zero_warn("Argument `num_tpu_cores` is now set by `tpu_cores` since v0.7.6" - " and this argument will be removed in v0.9.0", DeprecationWarning) - - if tpu_cores is None: - tpu_cores = num_tpu_cores - self.on_tpu = tpu_cores is not None - self.tpu_cores = tpu_cores - assert self.tpu_cores in (1, 8, None) or ( - isinstance(self.tpu_cores, (list, tuple, set)) and len(self.tpu_cores) == 1 - ), '`tpu_cores` can only be 1, 8 or [<1-8>]' - - self.tpu_id = tpu_cores[0] if isinstance(tpu_cores, list) else None + self.on_tpu = num_tpu_cores is not None + self.num_tpu_cores = num_tpu_cores + assert num_tpu_cores in [1, 8, None], 'num_tpu_cores can only be 1 or 8' if num_processes != 1 and distributed_backend != "ddp_cpu": rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.") @@ -490,6 +477,7 @@ def __init__( # override dist backend when using tpus if self.on_tpu: self.init_tpu() + self.current_tpu_idx = None # init flags for SLURM+ddp to work self.proc_rank = 0 @@ -870,7 +858,7 @@ def fit( self.single_gpu_train(model) elif self.use_tpu: # pragma: no-cover - log.info(f'training on {self.tpu_cores} TPU cores') + log.info(f'training on {self.num_tpu_cores} TPU cores') # COLAB_GPU is an env var available by default in Colab environments. start_method = 'fork' if self.on_colab_kaggle else 'spawn' @@ -879,10 +867,7 @@ def fit( self.model = model # train - if self.tpu_id is not None: - self.tpu_train(self.tpu_id, model) - else: - xmp.spawn(self.tpu_train, args=(model,), nprocs=self.tpu_cores, start_method=start_method) + xmp.spawn(self.tpu_train, args=(model,), nprocs=self.num_tpu_cores, start_method=start_method) # load weights if not interrupted self.load_spawn_weights(model) diff --git a/tests/base/model_optimizers.py b/tests/base/model_optimizers.py index 394ee69daee81..aebe0550a67ac 100644 --- a/tests/base/model_optimizers.py +++ b/tests/base/model_optimizers.py @@ -9,7 +9,7 @@ def configure_optimizers(self): return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) return optimizer def configure_optimizers__empty(self): @@ -20,7 +20,7 @@ def configure_optimizers__lbfgs(self): return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.LBFGS(self.parameters(), lr=self.hparams.learning_rate) + optimizer = optim.LBFGS(self.parameters(), lr=self.learning_rate) return optimizer def configure_optimizers__multiple_optimizers(self): @@ -29,26 +29,26 @@ def configure_optimizers__multiple_optimizers(self): :return: list of optimizers """ # try no scheduler for this model (testing purposes) - optimizer1 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) return optimizer1, optimizer2 def configure_optimizers__single_scheduler(self): - optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1) return [optimizer], [lr_scheduler] def configure_optimizers__multiple_schedulers(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 1, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2] def configure_optimizers__mixed_scheduling(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 4, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) @@ -56,14 +56,14 @@ def configure_optimizers__mixed_scheduling(self): [{'scheduler': lr_scheduler1, 'interval': 'step'}, lr_scheduler2] def configure_optimizers__reduce_lr_on_plateau(self): - optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer) return [optimizer], [lr_scheduler] def configure_optimizers__param_groups(self): param_groups = [ - {'params': list(self.parameters())[:2], 'lr': self.hparams.learning_rate * 0.1}, - {'params': list(self.parameters())[2:], 'lr': self.hparams.learning_rate} + {'params': list(self.parameters())[:2], 'lr': self.learning_rate * 0.1}, + {'params': list(self.parameters())[2:], 'lr': self.learning_rate} ] optimizer = optim.Adam(param_groups) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index d530fa4a97b12..9ab186ab54da8 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -37,13 +37,21 @@ class EvalModelTemplate( >>> model = EvalModelTemplate() """ - def __init__(self, hparams: object = None) -> object: - """Pass in parsed HyperOptArgumentParser to the model.""" - if hparams is None: - hparams = EvalModelTemplate.get_default_hparams() + + def __init__(self, + drop_prob=0.2, + batch_size=32, + in_features=28 * 28, + learning_rate=0.001 * 8, + optimizer_name='adam', + data_root=PATH_DATASETS, + out_features=10, + hidden_dim=1000, + b1=0.5, + b2=0.999, + *args, **kwargs) -> object: # init superclass super().__init__() - self.hparams = Namespace(**hparams) if isinstance(hparams, dict) else hparams # if you specify an example input, the summary will show input/output for each layer self.example_input_array = torch.rand(5, 28 * 28) @@ -57,15 +65,15 @@ def __build_model(self): :return: """ self.c_d1 = nn.Linear( - in_features=self.hparams.in_features, - out_features=self.hparams.hidden_dim + in_features=self.in_features, + out_features=self.hidden_dim ) - self.c_d1_bn = nn.BatchNorm1d(self.hparams.hidden_dim) - self.c_d1_drop = nn.Dropout(self.hparams.drop_prob) + self.c_d1_bn = nn.BatchNorm1d(self.hidden_dim) + self.c_d1_drop = nn.Dropout(self.drop_prob) self.c_d2 = nn.Linear( - in_features=self.hparams.hidden_dim, - out_features=self.hparams.out_features + in_features=self.hidden_dim, + out_features=self.out_features ) def forward(self, x): @@ -84,7 +92,7 @@ def loss(self, labels, logits): return nll def prepare_data(self): - _ = TrialMNIST(root=self.hparams.data_root, train=True, download=True) + _ = TrialMNIST(root=self.data_root, train=True, download=True) @staticmethod def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0) -> Namespace: diff --git a/tests/base/model_utilities.py b/tests/base/model_utilities.py index e1a40f95b804f..ce34b39b162f8 100644 --- a/tests/base/model_utilities.py +++ b/tests/base/model_utilities.py @@ -7,11 +7,11 @@ class ModelTemplateData: hparams: ... def dataloader(self, train): - dataset = TrialMNIST(root=self.hparams.data_root, train=train, download=True) + dataset = TrialMNIST(root=self.data_root, train=train, download=True) loader = DataLoader( dataset=dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, # test and valid shall not be shuffled shuffle=train, ) diff --git a/tests/base/models.py b/tests/base/models.py index fed694891c291..8bb4015cd904f 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -69,7 +69,7 @@ class TestGAN(LightningModule): def __init__(self, hparams): super().__init__() - self.hparams = hparams + self = hparams # networks mnist_shape = (1, 28, 28) @@ -93,7 +93,7 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): # train generator if optimizer_idx == 0: # sample noise - z = torch.randn(imgs.shape[0], self.hparams.hidden_dim) + z = torch.randn(imgs.shape[0], self.hidden_dim) z = z.type_as(imgs) # generate images @@ -142,9 +142,9 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): return output def configure_optimizers(self): - lr = self.hparams.learning_rate - b1 = self.hparams.b1 - b2 = self.hparams.b2 + lr = self.learning_rate + b1 = self.b1 + b2 = self.b2 opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2)) opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2)) diff --git a/tests/loggers/test_base.py b/tests/loggers/test_base.py index 4d0b869a5d398..60a1a026caec0 100644 --- a/tests/loggers/test_base.py +++ b/tests/loggers/test_base.py @@ -30,7 +30,7 @@ def test_logger_collection(): class CustomLogger(LightningLoggerBase): def __init__(self): super().__init__() - self.hparams_logged = None + self_logged = None self.metrics_logged = None self.finalized = False @@ -40,7 +40,7 @@ def experiment(self): @rank_zero_only def log_hyperparams(self, params): - self.hparams_logged = params + self_logged = params @rank_zero_only def log_metrics(self, metrics, step): diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index f7a197708d0db..ba78d31fcf5dd 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -423,7 +423,7 @@ def train_dataloader(self): dataset = Subset(dataloader.dataset, range(size)) dataloader = DataLoader( dataset, - batch_size=self.hparams.batch_size, + batch_size=self.batch_size, drop_last=False, ) return dataloader diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 1c2c169191564..52f713661eee3 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -19,6 +19,10 @@ from tests.base import EvalModelTemplate +def test_no_hparams(tmpdir): + + model = EvalModelTemplate() + def test_model_pickle(tmpdir): import pickle From 29d3e0aa830e23ee1e760f4350952efe9e307a52 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:14:02 -0400 Subject: [PATCH 006/100] replace self.hparams --- docs/source/hyperparameters.rst | 40 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index fe3d94b8cc668..d2d390348a3df 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -105,15 +105,12 @@ modify the network and read those values in the LightningModule class LitMNIST(LightningModule): - def __init__(self, hparams): + def __init__(self, layer_1_dim): super().__init__() - # do this to save all arguments in any logger (tensorboard) - self = hparams - - self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) - self.layer_2 = torch.nn.Linear(hparams.layer_1_dim, hparams.layer_2_dim) - self.layer_3 = torch.nn.Linear(hparams.layer_2_dim, 10) + self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) + self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.layer_2_dim) + self.layer_3 = torch.nn.Linear(self.layer_2_dim, 10) def train_dataloader(self): return DataLoader(mnist_train, batch_size=self.batch_size) @@ -136,14 +133,31 @@ Now pass in the params when you init your model parser = ArgumentParser() parser = LitMNIST.add_model_specific_args(parser) - hparams = parser.parse_args() - model = LitMNIST(hparams) + args = parser.parse_args() + model = LitMNIST(**args) + +Within any LightningModule all the arguments you pass into your `__init__` will be available +simply with `self.arg`. However, we won't overwrite any other arguments you have already defined. +We will also add all of those values to the tensorboard hparams tab (unless it's an object which +we won't). We also will store those values into checkpoints for you which you can use to init your +models. + +.. code-block:: python + + class LitMNIST(LightningModule): + + def __init__(self, layer_1_dim, some_other_param): + super().__init__() + + self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) + + # self.some_other_param is automatically available + self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.some_other_param) + self.layer_3 = torch.nn.Linear(self.some_other_param, 10) -The line `self = hparams` is very special. This line assigns your hparams to the LightningModule. -This does two things: + self.some_other_param = 12 + # but you can override it as normal -1. It adds them automatically to TensorBoard logs under the hparams tab. -2. Lightning will save those hparams to the checkpoint and use them to restore the module correctly. Trainer args ^^^^^^^^^^^^ From f50842479c45698daae7673183c50b005f759f81 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:19:49 -0400 Subject: [PATCH 007/100] replace self.hparams --- pytorch_lightning/core/lightning.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index d2a06d13f6082..c47231acceee6 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1784,10 +1784,31 @@ def _auto_register_hparams(self): # don't add self if name not in ['self']: + # only track some things + is_trackable = self._is_allowed_hparam_value(value) + # don't overwrite something already set - if not hasattr(child, name): + if not hasattr(child, name) and is_trackable: setattr(child, name, value) - module_arguments[name] = value + + if is_trackable: + module_arguments[name] = value # set module_arguments in child setattr(child, 'module_arguments', module_arguments) + + def _is_allowed_hparam_value(self, value): + # allow all types of lists + is_dict = isinstance(value, dict) + is_list = isinstance(value, list) + is_tuple = isinstance(value, tuple) + + if is_dict or is_list or is_tuple: + return True + + # don't allow other objects + if isinstance(value, object): + return False + + # allow everything else + return True From 28b85bd4ccea51d9c5a4c814bdb8e6884e9059bb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:21:56 -0400 Subject: [PATCH 008/100] replace self.hparams --- docs/source/lr_finder.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index f426438f4d55b..a6c62212481a0 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -22,12 +22,12 @@ Warnings: - For the moment, this feature only works with models having a single optimizer. - LR support for DDP is not implemented yet, it is comming soon. -Using Lightnings build-in LR finder +Using Lightnings built-in LR finder ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the most basic use case, this feature can be enabled during trainer construction with ``Trainer(auto_lr_find=True)``. When ``.fit(model)`` is called, the lr finder -will automatically be run before any training is done. The ``lr`` that is found +will automatically be run before any training is done. The ``LR`` that is found and used will be written to the console and logged together with all other hyperparameters of the model. From 355eb7a36c0fcbcc43dbefa89fe12616a7349a03 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:43:33 -0400 Subject: [PATCH 009/100] replace self.hparams --- docs/source/lr_finder.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index a6c62212481a0..783b926f2f1ba 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -22,19 +22,19 @@ Warnings: - For the moment, this feature only works with models having a single optimizer. - LR support for DDP is not implemented yet, it is comming soon. -Using Lightnings built-in LR finder -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Using Lightning's built-in LR finder +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the most basic use case, this feature can be enabled during trainer construction -with ``Trainer(auto_lr_find=True)``. When ``.fit(model)`` is called, the lr finder -will automatically be run before any training is done. The ``LR`` that is found +with ``Trainer(auto_lr_find=True)``. When ``.fit(model)`` is called, the LR finder +will automatically be run before any training is done. The ``lr`` that is found and used will be written to the console and logged together with all other hyperparameters of the model. .. testcode:: # default, no automatic learning rate finder - trainer = Trainer(auto_lr_find=True) + trainer = Trainer(auto_lr_find=False) When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate. In both cases, if the respective fields are not found, an error will be thrown. From 5cc272a3d1a47ee66853638e96b799a76ce49ed3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:43:51 -0400 Subject: [PATCH 010/100] replace self.hparams --- docs/source/lr_finder.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index 783b926f2f1ba..b79a70f693d0e 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -33,7 +33,7 @@ hyperparameters of the model. .. testcode:: - # default, no automatic learning rate finder + # default: no automatic learning rate finder trainer = Trainer(auto_lr_find=False) When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate. From a5bcd1c49508a8b2d37edd8eda5dca5db0b33530 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:46:56 -0400 Subject: [PATCH 011/100] replace self.hparams --- docs/source/lr_finder.rst | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index b79a70f693d0e..63c632e05cf89 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -36,16 +36,12 @@ hyperparameters of the model. # default: no automatic learning rate finder trainer = Trainer(auto_lr_find=False) -When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate. -In both cases, if the respective fields are not found, an error will be thrown. - +This flag sets your learning_rate which can be accessed via `self.lr|self.learning_rate`. + .. testcode:: class LitModel(LightningModule): - def __init__(self, hparams): - self = hparams - def configure_optimizers(self): return Adam(self.parameters(), lr=self.lr|self.learning_rate) @@ -57,7 +53,7 @@ To use an arbitrary value set it in the parameter. .. testcode:: - # to set to your own hparams.my_value + # to set to your own `self.my_value` trainer = Trainer(auto_lr_find='my_value') Under the hood, when you call fit, this is what happens. @@ -79,7 +75,7 @@ of this would look like .. code-block:: python - model = MyModelClass(hparams) + model = MyModelClass() trainer = Trainer() # Run learning rate finder @@ -96,8 +92,9 @@ of this would look like new_lr = lr_finder.suggestion() # update hparams of the model - model.hparams.lr = new_lr - + model.lr = new_lr + model.learning_rate = new_lr + # Fit model trainer.fit(model) From a4a7407abebd1d41e3de28eb8cef99f6b9b0a815 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:55:21 -0400 Subject: [PATCH 012/100] replace self.hparams --- docs/source/weights_loading.rst | 51 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index d99db851ceb2e..a69e1557dffcc 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -59,24 +59,20 @@ Or disable it by passing trainer = Trainer(checkpoint_callback=False) -The Lightning checkpoint also saves the hparams (hyperparams) passed into the LightningModule init. - -.. note:: hparams is a `Namespace `_. +The Lightning checkpoint also saves the arguments passed into the LightningModule init +under the `module_arguments` key in the checkpoint. .. testcode:: - from argparse import Namespace - - # usually these come from command line args - args = Namespace(learning_rate=0.001) - - # define you module to have hparams as the first arg - # this means your checkpoint will have everything that went into making - # this model (in this case, learning rate) class MyLightningModule(LightningModule): - def __init__(self, hparams, *args, **kwargs): - self = hparams + def __init__(self, learning_rate, *args, **kwargs): + super().__init__() + + # all init args were saved to the checkpoint + checkpoint = torch.load(CKPT_PATH) + print(checkpoint['module_arguments']) + # {'learning_rate': the_value} Manual saving ^^^^^^^^^^^^^ @@ -92,37 +88,40 @@ You can manually save checkpoints and restore your model from the checkpointed s Checkpoint Loading ------------------ -To load a model along with its weights, biases and hyperparameters use following method. +To load a model along with its weights, biases and model_arguments use following method. .. code-block:: python model = MyLightingModule.load_from_checkpoint(PATH) - model.eval() - y_hat = model(x) - -The above only works if you used `hparams` in your model definition -.. testcode:: - - class LitModel(LightningModule): + print(model.learning_rate) + # prints the learning_rate you used in this checkpoint - def __init__(self, hparams): - self = hparams - self.l1 = nn.Linear(hparams.in_dim, hparams.out_dim) + model.eval() + y_hat = model(x) -But if you don't and instead pass individual parameters +But if you don't want to use the values saved in the checkpoint, pass in your own here .. testcode:: class LitModel(LightningModule): def __init__(self, in_dim, out_dim): - self.l1 = nn.Linear(in_dim, out_dim) + super().__init__() + self.l1 = nn.Linear(self.in_dim, self.out_dim) you can restore the model like this .. code-block:: python + # if you train and save the model like this it will use these values when loading + # the weights. But you can overwrite this + LitModel(in_dim=32, out_dim=10) + + # uses in_dim=32, out_dim=10 + model = LitModel.load_from_checkpoint(PATH) + + # uses in_dim=128, out_dim=10 model = LitModel.load_from_checkpoint(PATH, in_dim=128, out_dim=10) From 8f7e8a2b3a774f64c9f7ab044241541decb415e5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 16:57:38 -0400 Subject: [PATCH 013/100] replace self.hparams --- pl_examples/domain_templates/computer_vision_fine_tuning.py | 2 +- pl_examples/domain_templates/generative_adversarial_net.py | 2 +- pl_examples/domain_templates/imagenet.py | 2 +- pl_examples/domain_templates/reinforce_learn_Qnet.py | 2 +- pl_examples/domain_templates/semantic_segmentation.py | 2 +- pl_examples/models/lightning_template.py | 2 +- pytorch_lightning/profiler/__init__.py | 2 +- tests/base/models.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 6c69f4956e350..0595630a06197 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -151,7 +151,7 @@ def __init__(self, hparams: argparse.Namespace, dl_path: Union[str, Path]) -> None: super().__init__() - self = hparams + self.dl_path = dl_path self.__build_model() diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index e8467739635b8..5ed213a154819 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -74,7 +74,7 @@ class GAN(LightningModule): def __init__(self, hparams): super().__init__() - self = hparams + # networks mnist_shape = (1, 28, 28) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 46162676718b3..9629f1c50f381 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -34,7 +34,7 @@ def __init__(self, hparams): TODO: add docstring here """ super().__init__() - self = hparams + self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 10600047517b3..301b9cc81a5af 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -192,7 +192,7 @@ class DQNLightning(pl.LightningModule): def __init__(self, hparams: argparse.Namespace) -> None: super().__init__() - self = hparams + self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 8a96bcd861037..86f6fced01970 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -130,7 +130,7 @@ class SegModel(pl.LightningModule): def __init__(self, hparams): super().__init__() - self = hparams + self.data_path = hparams.data_path self.batch_size = hparams.batch_size self.learning_rate = hparams.lr diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index 3e8ed72f1ca6e..c7e6c08b41620 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -45,7 +45,7 @@ def __init__(self, hparams): """ # init superclass super().__init__() - self = hparams + self.c_d1 = nn.Linear(in_features=self.in_features, out_features=self.hidden_dim) self.c_d1_bn = nn.BatchNorm1d(self.hidden_dim) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 85d531fdaccc1..3bf0ad4e8bfac 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -99,7 +99,7 @@ class MyModel(LightningModule): def __init__(self, hparams, profiler=None): - self = hparams + self.profiler = profiler or PassThroughProfiler() def custom_processing_step(self, data): diff --git a/tests/base/models.py b/tests/base/models.py index 8bb4015cd904f..0ff3157efab08 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -69,7 +69,7 @@ class TestGAN(LightningModule): def __init__(self, hparams): super().__init__() - self = hparams + # networks mnist_shape = (1, 28, 28) From b1cd0b50710bae8ba01bd8227c8fa934b58edd2d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:02:31 -0400 Subject: [PATCH 014/100] replace self.hparams --- .../domain_templates/computer_vision_fine_tuning.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 0595630a06197..55168b1827bf6 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -148,7 +148,13 @@ class TransferLearningModel(pl.LightningModule): dl_path: Path where the data will be downloaded """ def __init__(self, - hparams: argparse.Namespace, + backbone='resnet50', + train_bn=True, + milestones=(5, 10), + batch_size=8, + lr=1e-2, + lr_scheduler_gamma=1e-1, + num_workers=6, dl_path: Union[str, Path]) -> None: super().__init__() From e97237eaa89026ebbc419017749adadf12fdc887 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:05:02 -0400 Subject: [PATCH 015/100] replace self.hparams --- pl_examples/domain_templates/generative_adversarial_net.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 5ed213a154819..2c22599ca1d17 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -72,13 +72,12 @@ def forward(self, img): class GAN(LightningModule): - def __init__(self, hparams): + def __init__(self, latent_dim, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): super().__init__() - # networks mnist_shape = (1, 28, 28) - self.generator = Generator(latent_dim=hparams.latent_dim, img_shape=mnist_shape) + self.generator = Generator(latent_dim=latent_dim, img_shape=mnist_shape) self.discriminator = Discriminator(img_shape=mnist_shape) # cache for generated images From a2f6cb5eae26bf9a3e261d8afe3615efb59c81b1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:05:43 -0400 Subject: [PATCH 016/100] replace self.hparams --- pl_examples/domain_templates/generative_adversarial_net.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 2c22599ca1d17..59f3fdf5bd3b5 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -72,7 +72,7 @@ def forward(self, img): class GAN(LightningModule): - def __init__(self, latent_dim, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): + def __init__(self, latent_dim=100, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): super().__init__() # networks From 9216d286ba2a9eb44efa563afc67314aceab480a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:09:01 -0400 Subject: [PATCH 017/100] replace self.hparams --- .../computer_vision_fine_tuning.py | 14 +++++++------- pl_examples/domain_templates/imagenet.py | 3 +-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 55168b1827bf6..eb1e000cd0ed4 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -403,28 +403,28 @@ def add_model_specific_args(parent_parser): return parser -def main(hparams: argparse.Namespace) -> None: +def main(args: argparse.Namespace) -> None: """Train the model. Args: - hparams: Model hyper-parameters + args: Model hyper-parameters Note: For the sake of the example, the images dataset will be downloaded to a temporary directory. """ - with TemporaryDirectory(dir=hparams.root_data_path) as tmp_dir: + with TemporaryDirectory(dir=args.root_data_path) as tmp_dir: - model = TransferLearningModel(hparams, dl_path=tmp_dir) + model = TransferLearningModel(**args, dl_path=tmp_dir) trainer = pl.Trainer( weights_summary=None, show_progress_bar=True, num_sanity_val_steps=0, - gpus=hparams.gpus, - min_epochs=hparams.nb_epochs, - max_epochs=hparams.nb_epochs) + gpus=args.gpus, + min_epochs=args.nb_epochs, + max_epochs=args.nb_epochs) trainer.fit(model) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 9629f1c50f381..231e0a8834570 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -29,12 +29,11 @@ class ImageNetLightningModel(LightningModule): - def __init__(self, hparams): + def __init__(self, arch, pretrained, lr, momentum, weight_decay, data_path, batch_size): """ TODO: add docstring here """ super().__init__() - self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): From 7cbc1b2e70adc7f44ab94a2ac73d616d8ef1f1f7 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:11:26 -0400 Subject: [PATCH 018/100] replace self.hparams --- pl_examples/domain_templates/reinforce_learn_Qnet.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 301b9cc81a5af..f76ce54abf68c 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -190,10 +190,18 @@ def play_step(self, net: nn.Module, epsilon: float = 0.0, device: str = 'cpu') - class DQNLightning(pl.LightningModule): """ Basic DQN Model """ - def __init__(self, hparams: argparse.Namespace) -> None: + def __init__(self, + replay_size, + warm_start_steps, + gamma, eps_start, + eps_end, + eps_last_frame, + sync_rate, + lr, + episode_length, + batch_size) -> None: super().__init__() - self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] n_actions = self.env.action_space.n From 137ae1326d89ae43994df0653af8d88c00b5ae1a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:12:00 -0400 Subject: [PATCH 019/100] replace self.hparams --- pl_examples/domain_templates/reinforce_learn_Qnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index f76ce54abf68c..b29913c12f0b8 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -322,8 +322,8 @@ def get_device(self, batch) -> str: return batch[0].device.index if self.on_gpu else 'cpu' -def main(hparams) -> None: - model = DQNLightning(hparams) +def main(args) -> None: + model = DQNLightning(**args) trainer = pl.Trainer( gpus=1, From b6a93369b679ead707914d78c5076967b736657a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:13:53 -0400 Subject: [PATCH 020/100] replace self.hparams --- pl_examples/domain_templates/semantic_segmentation.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 86f6fced01970..b1ac9cbe63727 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -128,14 +128,11 @@ class SegModel(pl.LightningModule): Adam optimizer is used along with Cosine Annealing learning rate scheduler. """ - def __init__(self, hparams): + def __init__(self, data_path, batch_size, lr, num_layers, features_start, bilinear): super().__init__() - self.data_path = hparams.data_path - self.batch_size = hparams.batch_size - self.learning_rate = hparams.lr - self.net = UNet(num_classes=19, num_layers=hparams.num_layers, - features_start=hparams.features_start, bilinear=hparams.bilinear) + self.net = UNet(num_classes=19, num_layers=self.num_layers, + features_start=self.features_start, bilinear=self.bilinear) self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.35675976, 0.37380189, 0.3764753], From 6ea138c8135e05f87b21b4dce00603cf278c77c4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:14:14 -0400 Subject: [PATCH 021/100] replace self.hparams --- pl_examples/domain_templates/semantic_segmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index b1ac9cbe63727..df031dfb47ad0 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -182,7 +182,7 @@ def main(hparams): # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ - model = SegModel(hparams) + model = SegModel(**hparams) # ------------------------ # 2 SET LOGGER From 485ce2028b92a0c001444fadf15d38a45b205e95 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:16:43 -0400 Subject: [PATCH 022/100] replace self.hparams --- pl_examples/models/lightning_template.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index c7e6c08b41620..337d5ec0805b9 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -3,7 +3,6 @@ """ import os from argparse import ArgumentParser -from collections import OrderedDict import torch import torch.nn as nn @@ -34,12 +33,19 @@ class LightningTemplateModel(LightningModule): ... out_features=10, ... hidden_dim=1000, ... ) - >>> from argparse import Namespace - >>> hparams = Namespace(**params) - >>> model = LightningTemplateModel(hparams) + >>> model = LightningTemplateModel(**params) """ - def __init__(self, hparams): + def __init__(self, + drop_prob=0.2, + batch_size=2, + in_features=28 * 28, + learning_rate=0.001 * 8, + optimizer_name='adam', + data_root='./datasets', + out_features=10, + hidden_dim=1000, + ): """ Pass in hyperparameters as a `argparse.Namespace` or a `dict` to the model. """ From 14dab1bde0a25d6ab6dc1b78e3c81262b9e0dd3e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:17:48 -0400 Subject: [PATCH 023/100] replace self.hparams --- pl_examples/models/lightning_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index 337d5ec0805b9..e5023c20443b1 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -153,7 +153,7 @@ def test_dataloader(self): @staticmethod def add_model_specific_args(parent_parser, root_dir): # pragma: no-cover """ - Parameters you define here will be available to your model through `self`. + Define parameters that only apply to this model """ parser = ArgumentParser(parents=[parent_parser]) From 268277a4e3138bbfba3979e58c8a12db6c0db5ff Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:18:31 -0400 Subject: [PATCH 024/100] replace self.hparams --- pytorch_lightning/core/lightning.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c47231acceee6..a5947c6139177 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -65,8 +65,6 @@ def __init__(self, *args, **kwargs): #: True if using amp self.use_amp = False - self = None - #: Current dtype self._dtype = torch.float From 2111e4bc7d0f83979941107ddf444aaa90d2e33c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:24:09 -0400 Subject: [PATCH 025/100] replace self.hparams --- pytorch_lightning/core/lightning.py | 32 +++++------------------------ 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index a5947c6139177..fd9686d0705c8 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1606,35 +1606,13 @@ def __init__(self, hparams: dict): @classmethod def _load_model_state(cls, checkpoint: Dict[str, Any], *args, **kwargs) -> 'LightningModule': - cls_takes_hparams = 'hparams' in inspect.signature(cls.__init__).parameters - ckpt_hparams = checkpoint.get('hparams') - - if cls_takes_hparams: - if ckpt_hparams is not None: - hparams_type = checkpoint.get('hparams_type', 'Namespace') - if hparams_type.lower() == 'dict': - hparams = ckpt_hparams - elif hparams_type.lower() == 'namespace': - hparams = Namespace(**ckpt_hparams) - else: - rank_zero_warn( - f"Checkpoint does not contain hyperparameters but {cls.__name__}'s __init__" - " contains argument 'hparams'. Will pass in an empty Namespace instead." - " Did you forget to store your model hyperparameters in self?" - ) - hparams = {} - else: # The user's LightningModule does not define a hparams argument - if ckpt_hparams is None: - hparams = None - else: - raise MisconfigurationException( - f"Checkpoint contains hyperparameters but {cls.__name__}'s __init__ " - f"is missing the argument 'hparams'. Are you loading the correct checkpoint?" - ) + + # pass in the values we saved automatically + if 'module_arguments' in checkpoint: + model_args = checkpoint['module_arguments'] + kwargs.update(**model_args) # load the state_dict on the model automatically - if cls_takes_hparams: - kwargs.update(hparams=hparams) model = cls(*args, **kwargs) model.load_state_dict(checkpoint['state_dict']) From 07a1c00e43bf0f8732799cf332074725a2e0b262 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:27:24 -0400 Subject: [PATCH 026/100] replace self.hparams --- pytorch_lightning/loggers/tensorboard.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py index 0b6b53a22c26b..62965d783231c 100644 --- a/pytorch_lightning/loggers/tensorboard.py +++ b/pytorch_lightning/loggers/tensorboard.py @@ -56,7 +56,7 @@ def __init__(self, self._version = version self._experiment = None - self = {} + self.hparams = {} self._kwargs = kwargs @property @@ -107,7 +107,7 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], params = self._convert_params(params) # store params to output - self.update(params) + self.hparams.update(params) # format params into the suitable for tensorboard params = self._flatten_dict(params) @@ -158,7 +158,7 @@ def save(self) -> None: hparams_file = os.path.join(dir_path, self.NAME_HPARAMS_FILE) # save the metatags file - save_hparams_to_yaml(hparams_file, self) + save_hparams_to_yaml(hparams_file, self.hparams) @rank_zero_only def finalize(self, status: str) -> None: From 90a1226c069d6fc94a8748caad159dc7fb776514 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:28:47 -0400 Subject: [PATCH 027/100] replace self.hparams --- pytorch_lightning/profiler/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 3bf0ad4e8bfac..fc684d143e4b8 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -98,8 +98,7 @@ from pytorch_lightning.profiler import Profiler, PassThroughProfiler class MyModel(LightningModule): - def __init__(self, hparams, profiler=None): - + def __init__(self, profiler=None): self.profiler = profiler or PassThroughProfiler() def custom_processing_step(self, data): @@ -108,7 +107,7 @@ def custom_processing_step(self, data): return data profiler = Profiler() - model = MyModel(hparams, profiler) + model = MyModel(profiler) trainer = Trainer(profiler=profiler, max_epochs=1) """ From 4429d222f7085fc62641dd05dcbd247797860001 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:30:34 -0400 Subject: [PATCH 028/100] replace self.hparams --- pytorch_lightning/trainer/trainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index e4a22e6de3589..f2ef291d1deda 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -82,7 +82,7 @@ class Trainer( 'gradient_clip', 'nb_gpu_nodes', 'max_nb_epochs', 'min_nb_epochs', 'add_row_log_interval', 'nb_sanity_val_steps', 'tng_tqdm_dic', ) - DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict') + DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') def __init__( self, @@ -97,7 +97,7 @@ def __init__( num_processes: int = 1, gpus: Optional[Union[List[int], str, int]] = None, auto_select_gpus: bool = False, - num_tpu_cores: Optional[int] = None, + tpu_cores: Optional[Union[List[int], int]] = None, log_gpu_memory: Optional[str] = None, progress_bar_refresh_rate: int = 1, overfit_pct: float = 0.0, @@ -133,6 +133,7 @@ def __init__( progress_bar_callback: Optional[Union[ProgressBarBase, bool]] = True, terminate_on_nan: bool = False, auto_scale_batch_size: Union[str, bool] = False, + num_tpu_cores: Optional[int] = None, # backward compatible, todo: remove in v0.9.0 amp_level: str = 'O1', # backward compatible, todo: remove in v0.8.0 default_save_path=None, # backward compatible, todo: remove in v0.8.0 gradient_clip=None, # backward compatible, todo: remove in v0.8.0 From 6060a028527c81ad21febaea62059f68ccd88bcf Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:32:53 -0400 Subject: [PATCH 029/100] replace self.hparams --- pytorch_lightning/trainer/trainer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index f2ef291d1deda..9b57b0bd81038 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -189,7 +189,10 @@ def __init__( GPUs are configured to be in "exclusive mode", such that only one process at a time can access them. - num_tpu_cores: How many TPU cores to train on (1 or 8). + tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1] + + num_tpu_cores: How many TPU cores to train on (1 or 8) + .. warning:: .. deprecated:: 0.7.6. Will remove 0.9.0. log_gpu_memory: None, 'min_max', 'all'. Might slow performance From 6f856df228248d9e5bdc5743e7407d3b7dc868d2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:36:14 -0400 Subject: [PATCH 030/100] replace self.hparams --- pytorch_lightning/trainer/trainer.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 9b57b0bd81038..f44df74559832 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -346,9 +346,19 @@ def __init__( self.on_gpu = True if (gpus and torch.cuda.is_available()) else False # tpu config - self.on_tpu = num_tpu_cores is not None - self.num_tpu_cores = num_tpu_cores - assert num_tpu_cores in [1, 8, None], 'num_tpu_cores can only be 1 or 8' + if num_tpu_cores is not None: + rank_zero_warn("Argument `num_tpu_cores` is now set by `tpu_cores` since v0.7.6" + " and this argument will be removed in v0.9.0", DeprecationWarning) + + if tpu_cores is None: + tpu_cores = num_tpu_cores + self.on_tpu = tpu_cores is not None + self.tpu_cores = tpu_cores + assert self.tpu_cores in (1, 8, None) or ( + isinstance(self.tpu_cores, (list, tuple, set)) and len(self.tpu_cores) == 1 + ), '`tpu_cores` can only be 1, 8 or [<1-8>]' + + self.tpu_id = tpu_cores[0] if isinstance(tpu_cores, list) else None if num_processes != 1 and distributed_backend != "ddp_cpu": rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.") From da385fe4bed5ae66be7faf1f8daaa2dddce1b6a8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:37:45 -0400 Subject: [PATCH 031/100] replace self.hparams --- pytorch_lightning/trainer/trainer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index f44df74559832..10e6b9d7a0568 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -872,7 +872,7 @@ def fit( self.single_gpu_train(model) elif self.use_tpu: # pragma: no-cover - log.info(f'training on {self.num_tpu_cores} TPU cores') + log.info(f'training on {self.tpu_cores} TPU cores') # COLAB_GPU is an env var available by default in Colab environments. start_method = 'fork' if self.on_colab_kaggle else 'spawn' @@ -881,7 +881,10 @@ def fit( self.model = model # train - xmp.spawn(self.tpu_train, args=(model,), nprocs=self.num_tpu_cores, start_method=start_method) + if self.tpu_id is not None: + self.tpu_train(self.tpu_id, model) + else: + xmp.spawn(self.tpu_train, args=(model,), nprocs=self.tpu_cores, start_method=start_method) # load weights if not interrupted self.load_spawn_weights(model) From 065226d6806b8327d0d41670b27ae8d3e595232e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:38:21 -0400 Subject: [PATCH 032/100] replace self.hparams --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 10e6b9d7a0568..75361a1ac1e35 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -491,7 +491,6 @@ def __init__( # override dist backend when using tpus if self.on_tpu: self.init_tpu() - self.current_tpu_idx = None # init flags for SLURM+ddp to work self.proc_rank = 0 From f634a8edcce626f84f95f8a79df717b0dfdb3cbd Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:39:58 -0400 Subject: [PATCH 033/100] replace self.hparams --- tests/base/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/base/models.py b/tests/base/models.py index 0ff3157efab08..b4ae936334dcb 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -70,7 +70,6 @@ class TestGAN(LightningModule): def __init__(self, hparams): super().__init__() - # networks mnist_shape = (1, 28, 28) self.generator = Generator(latent_dim=hparams.hidden_dim, img_shape=mnist_shape) From 34055b51df9173f93891d06eac028dd7c33cf8bc Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:41:02 -0400 Subject: [PATCH 034/100] replace self.hparams --- tests/loggers/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/loggers/test_base.py b/tests/loggers/test_base.py index 60a1a026caec0..4d0b869a5d398 100644 --- a/tests/loggers/test_base.py +++ b/tests/loggers/test_base.py @@ -30,7 +30,7 @@ def test_logger_collection(): class CustomLogger(LightningLoggerBase): def __init__(self): super().__init__() - self_logged = None + self.hparams_logged = None self.metrics_logged = None self.finalized = False @@ -40,7 +40,7 @@ def experiment(self): @rank_zero_only def log_hyperparams(self, params): - self_logged = params + self.hparams_logged = params @rank_zero_only def log_metrics(self, metrics, step): From e05c11bd5343c053272d8034c82832bfd44f1c2d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 17:47:37 -0400 Subject: [PATCH 035/100] replace self.hparams --- tests/trainer/test_trainer.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 52f713661eee3..1c4a3c674b871 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -19,9 +19,31 @@ from tests.base import EvalModelTemplate -def test_no_hparams(tmpdir): +def test_auto_hparams(tmpdir): + # test that the model automatically sets the args passed into init as attrs model = EvalModelTemplate() + assert model.batch_size == 32 + model = EvalModelTemplate(batch_size=179) + assert model.batch_size == 179 + + # verify that the checkpoint saved the correct values + trainer = Trainer(max_steps=20) + trainer.fit(model) + raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) + raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] + raw_checkpoint = torch.load(raw_checkpoint_path) + assert 'model_arguments' in raw_checkpoint + assert raw_checkpoint['model_arguments']['batch_size'] == 179 + + # verify that model loads correctly + model = EvalModelTemplate.load_from_checkpoint(raw_checkpoint_path) + assert model.batch_size == 179 + + # verify that we can overwrite whatever we want + model = EvalModelTemplate.load_from_checkpoint(raw_checkpoint_path, batch_size=99) + assert model.batch_size == 99 + def test_model_pickle(tmpdir): import pickle From 0937108ecb4c6894c379af82ff405eed73c8ec70 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:02:31 -0400 Subject: [PATCH 036/100] replace self.hparams --- .run_local_tests.sh | 3 +++ docs/source/slurm.rst | 2 +- pl_examples/__init__.py | 2 +- tests/callbacks/test_callbacks.py | 2 +- tests/loggers/test_base.py | 4 ++-- tests/models/test_cpu.py | 2 +- tests/models/test_gpu.py | 4 ++-- tests/models/test_restore.py | 6 ++--- tests/trainer/test_checks.py | 31 ++++++++---------------- tests/trainer/test_lr_finder.py | 35 ++++++---------------------- tests/trainer/test_optimizers.py | 10 ++++---- tests/trainer/test_trainer.py | 10 ++++---- tests/trainer/test_trainer_tricks.py | 4 ++-- 13 files changed, 43 insertions(+), 72 deletions(-) diff --git a/.run_local_tests.sh b/.run_local_tests.sh index 20fe84ff22fcf..83012a3932a79 100644 --- a/.run_local_tests.sh +++ b/.run_local_tests.sh @@ -14,3 +14,6 @@ rm -rf ./tests/tests/* rm -rf ./lightning_logs python -m coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules --flake8 python -m coverage report -m + +# specific file +# python -m coverage run --source pytorch_lightning -m py.test -k test_trainer.py --flake8 diff --git a/docs/source/slurm.rst b/docs/source/slurm.rst index ed09e7509b571..b800e356f0a69 100644 --- a/docs/source/slurm.rst +++ b/docs/source/slurm.rst @@ -28,7 +28,7 @@ To train a model using multiple-nodes do the following: # train.py def main(hparams): - model = LightningTemplateModel(hparams) + model = LightningTemplateModel(**hparams) trainer = pl.Trainer( gpus=8, diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py index 1c5908539cfdc..267f797548dd7 100644 --- a/pl_examples/__init__.py +++ b/pl_examples/__init__.py @@ -52,7 +52,7 @@ def main(hparams, cluster, results_dict): # build model - model = MyLightningModule(hparams) + model = MyLightningModule(**hparams) # configure trainer trainer = Trainer() diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py index 52c03ada3bd62..57b0b537b4dca 100644 --- a/tests/callbacks/test_callbacks.py +++ b/tests/callbacks/test_callbacks.py @@ -13,7 +13,7 @@ def test_trainer_callback_system(tmpdir): """Test the callback system.""" hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) def _check_args(trainer, pl_module): assert isinstance(trainer, Trainer) diff --git a/tests/loggers/test_base.py b/tests/loggers/test_base.py index 4d0b869a5d398..e8c8ead2501c3 100644 --- a/tests/loggers/test_base.py +++ b/tests/loggers/test_base.py @@ -61,7 +61,7 @@ def version(self): def test_custom_logger(tmpdir): hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) logger = CustomLogger() @@ -80,7 +80,7 @@ def test_custom_logger(tmpdir): def test_multiple_loggers(tmpdir): hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) logger1 = CustomLogger() logger2 = CustomLogger() diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index f9eb4b9e5810e..c693d73cbc18d 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -71,7 +71,7 @@ def test_lbfgs_cpu_model(tmpdir): hparams = EvalModelTemplate.get_default_hparams() setattr(hparams, 'optimizer_name', 'lbfgs') setattr(hparams, 'learning_rate', 0.002) - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index f75b0a1f1a582..4746a494543c9 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -65,7 +65,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): def test_cpu_slurm_save_load(tmpdir): """Verify model save/load/checkpoint on CPU.""" hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -112,7 +112,7 @@ def test_cpu_slurm_save_load(tmpdir): logger=logger, checkpoint_callback=ModelCheckpoint(tmpdir), ) - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) # set the epoch start hook so we can predict before the model does the full training def assert_pred_same(): diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 73e7362655d2c..62ae4d361a046 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -104,7 +104,7 @@ def test_running_test_pretrained_model_cpu(tmpdir): def test_load_model_from_checkpoint(tmpdir): """Verify test() on pretrained model.""" hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) trainer_options = dict( progress_bar_refresh_rate=0, @@ -146,7 +146,7 @@ def test_load_model_from_checkpoint(tmpdir): def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) trainer_options = dict( max_epochs=1, @@ -204,7 +204,7 @@ def assert_good_acc(): tutils.run_prediction(dataloader, dp_model, dp=True) # new model - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.on_train_start = assert_good_acc # fit new model which should load hpc weights diff --git a/tests/trainer/test_checks.py b/tests/trainer/test_checks.py index 4d03035b460fd..c3106abc2a94f 100755 --- a/tests/trainer/test_checks.py +++ b/tests/trainer/test_checks.py @@ -19,12 +19,12 @@ def test_wrong_train_setting(tmpdir): trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.train_dataloader = None trainer.fit(model) with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.training_step = None trainer.fit(model) @@ -53,19 +53,19 @@ def test_wrong_validation_settings(tmpdir): # check val_dataloader -> val_step with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.validation_step = None trainer.fit(model) # check val_dataloader + val_step -> val_epoch_end with pytest.warns(RuntimeWarning): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.validation_epoch_end = None trainer.fit(model) # check val_step -> val_dataloader with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.val_dataloader = None trainer.fit(model) @@ -84,7 +84,7 @@ def test_wrong_test_settigs(tmpdir): # if have test_dataloader should have test_step # ---------------- with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_step = None trainer.fit(model) @@ -92,7 +92,7 @@ def test_wrong_test_settigs(tmpdir): # if have test_dataloader and test_step recommend test_epoch_end # ---------------- with pytest.warns(RuntimeWarning): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_epoch_end = None trainer.test(model) @@ -100,7 +100,7 @@ def test_wrong_test_settigs(tmpdir): # if have test_step and NO test_dataloader passed in tell user to pass test_dataloader # ---------------- with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_dataloader = LightningModule.test_dataloader trainer.test(model) @@ -108,7 +108,7 @@ def test_wrong_test_settigs(tmpdir): # if have test_dataloader and NO test_step tell user to implement test_step # ---------------- with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_dataloader = LightningModule.test_dataloader model.test_step = None trainer.test(model, test_dataloaders=model.dataloader(train=False)) @@ -117,18 +117,7 @@ def test_wrong_test_settigs(tmpdir): # if have test_dataloader and test_step but no test_epoch_end warn user # ---------------- with pytest.warns(RuntimeWarning): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_dataloader = LightningModule.test_dataloader model.test_epoch_end = None trainer.test(model, test_dataloaders=model.dataloader(train=False)) - - # ---------------- - # if we are just testing, no need for train_dataloader, train_step, val_dataloader, and val_step - # ---------------- - model = EvalModelTemplate(hparams) - model.test_dataloader = LightningModule.test_dataloader - model.train_dataloader = None - model.train_step = None - model.val_dataloader = None - model.val_step = None - trainer.test(model, test_dataloaders=model.dataloader(train=False)) diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index fe4894c3e49de..6902f69d04291 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -78,7 +78,7 @@ def test_trainer_reset_correctly(tmpdir): def test_trainer_arg_bool(tmpdir): """ Test that setting trainer arg to bool works """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_lr = hparams.learning_rate # logger file to get meta @@ -98,7 +98,7 @@ def test_trainer_arg_str(tmpdir): """ Test that setting trainer arg to string works """ hparams = EvalModelTemplate.get_default_hparams() hparams.__dict__['my_fancy_lr'] = 1.0 # update with non-standard field - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_lr = hparams.my_fancy_lr # logger file to get meta @@ -118,13 +118,13 @@ def test_call_to_trainer_method(tmpdir): """ Test that directly calling the trainer method works """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_lr = hparams.learning_rate # logger file to get meta trainer = Trainer( default_save_path=tmpdir, - max_epochs=5 + max_epochs=5, ) lrfinder = trainer.lr_find(model, mode='linear') @@ -141,7 +141,7 @@ def test_accumulation_and_early_stopping(tmpdir): accumulation also works for this feature """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_lr = hparams.learning_rate # logger file to get meta @@ -165,12 +165,12 @@ def test_suggestion_parameters_work(tmpdir): """ Test that default skipping does not alter results in basic case """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) # logger file to get meta trainer = Trainer( default_save_path=tmpdir, - max_epochs=10 + max_epochs=10, ) lrfinder = trainer.lr_find(model) @@ -179,24 +179,3 @@ def test_suggestion_parameters_work(tmpdir): assert lr1 != lr2, \ 'Skipping parameter did not influence learning rate' - - -def test_suggestion_with_non_finite_values(tmpdir): - """ Test that non-finite values does not alter results """ - - hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) - - # logger file to get meta - trainer = Trainer( - default_save_path=tmpdir, - max_epochs=10 - ) - - lrfinder = trainer.lr_find(model) - before_lr = lrfinder.suggestion() - lrfinder.results['loss'][-1] = float('nan') - after_lr = lrfinder.suggestion() - - assert before_lr == after_lr, \ - 'Learning rate was altered because of non-finite loss values' diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index 06ea784111153..85d70bc953da1 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -10,7 +10,7 @@ def test_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__single_scheduler # fit model @@ -41,7 +41,7 @@ def test_multi_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model @@ -76,7 +76,7 @@ def test_multi_optimizer_with_scheduling(tmpdir): def test_multi_optimizer_with_scheduling_stepping(tmpdir): hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model @@ -115,7 +115,7 @@ def test_multi_optimizer_with_scheduling_stepping(tmpdir): def test_reduce_lr_on_plateau_scheduling(tmpdir): hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model @@ -205,7 +205,7 @@ def test_none_optimizer_warning(): def test_none_optimizer(tmpdir): hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__empty # fit model diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 1c4a3c674b871..2070a98b1986a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -375,7 +375,7 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir): def _new_model(): # Create a model that tracks epochs and batches seen - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.num_epochs_seen = 0 model.num_batches_seen = 0 model.num_on_load_checkpoint_called = 0 @@ -552,22 +552,22 @@ def test_testpass_overrides(tmpdir): # Misconfig when neither test_step or test_end is implemented with pytest.raises(MisconfigurationException, match='.*not implement `test_dataloader`.*'): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_dataloader = LightningModule.test_dataloader Trainer().test(model) # Misconfig when neither test_step or test_end is implemented with pytest.raises(MisconfigurationException): - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_step = LightningModule.test_step Trainer().test(model) # No exceptions when one or both of test_step or test_end are implemented - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) model.test_step_end = LightningModule.test_step_end Trainer().test(model) - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) Trainer().test(model) diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py index 0b9b548c69c60..fc9d0a8fc0429 100755 --- a/tests/trainer/test_trainer_tricks.py +++ b/tests/trainer/test_trainer_tricks.py @@ -72,7 +72,7 @@ def test_trainer_arg(tmpdir, scale_arg): tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_batch_size = hparams.batch_size # logger file to get meta @@ -94,7 +94,7 @@ def test_call_to_trainer_method(tmpdir, scale_method): tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(hparams) + model = EvalModelTemplate(**hparams) before_batch_size = hparams.batch_size # logger file to get meta From f6587cebb75f81e3e4f1edb2372f194749b20b37 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:16:02 -0400 Subject: [PATCH 037/100] fixed --- pytorch_lightning/core/lightning.py | 21 ++++---------------- tests/trainer/test_trainer.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index fd9686d0705c8..dca379e758dc9 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1746,16 +1746,16 @@ def _auto_register_hparams(self): """ # two frames back is the init of the child module frame = inspect.currentframe() - args = frame.f_back.f_back.f_locals + frame_args = frame.f_back.f_back.f_locals # we'll save hparams automatically (renamed to module_arguments) module_arguments = {} # pull out the child itself to make sure we have no issues - child = args['self'] + child = frame_args['self'] # auto set the attr which enables self.attr anywhere in the code - for name, value in args.items(): + for name, value in frame_args.items(): # don't add self if name not in ['self']: @@ -1774,17 +1774,4 @@ def _auto_register_hparams(self): setattr(child, 'module_arguments', module_arguments) def _is_allowed_hparam_value(self, value): - # allow all types of lists - is_dict = isinstance(value, dict) - is_list = isinstance(value, list) - is_tuple = isinstance(value, tuple) - - if is_dict or is_list or is_tuple: - return True - - # don't allow other objects - if isinstance(value, object): - return False - - # allow everything else - return True + return not hasattr(value, '__dict__') diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 2070a98b1986a..faa14c4d0b74a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -32,6 +32,7 @@ def test_auto_hparams(tmpdir): trainer.fit(model) raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] + raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) raw_checkpoint = torch.load(raw_checkpoint_path) assert 'model_arguments' in raw_checkpoint assert raw_checkpoint['model_arguments']['batch_size'] == 179 @@ -45,6 +46,35 @@ def test_auto_hparams(tmpdir): assert model.batch_size == 99 +def test_auto_hparams_subclass(tmpdir): + class SubClassEvalModelTemplate(EvalModelTemplate): + pass + + # test that the model automatically sets the args passed into init as attrs + model = SubClassEvalModelTemplate() + assert model.batch_size == 32 + model = SubClassEvalModelTemplate(batch_size=179) + assert model.batch_size == 179 + + # verify that the checkpoint saved the correct values + trainer = Trainer(max_steps=20) + trainer.fit(model) + raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) + raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] + raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) + raw_checkpoint = torch.load(raw_checkpoint_path) + assert 'model_arguments' in raw_checkpoint + assert raw_checkpoint['model_arguments']['batch_size'] == 179 + + # verify that model loads correctly + model = SubClassEvalModelTemplate.load_from_checkpoint(raw_checkpoint_path) + assert model.batch_size == 179 + + # verify that we can overwrite whatever we want + model = SubClassEvalModelTemplate.load_from_checkpoint(raw_checkpoint_path, batch_size=99) + assert model.batch_size == 99 + + def test_model_pickle(tmpdir): import pickle From 0303695cb47d527c67f6fd343232792e317d79ca Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:17:32 -0400 Subject: [PATCH 038/100] fixed --- tests/trainer/test_trainer.py | 48 ++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index faa14c4d0b74a..6ee5408f796df 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -20,30 +20,38 @@ def test_auto_hparams(tmpdir): + class SubClassEvalModelTemplate(EvalModelTemplate): + pass - # test that the model automatically sets the args passed into init as attrs - model = EvalModelTemplate() - assert model.batch_size == 32 - model = EvalModelTemplate(batch_size=179) - assert model.batch_size == 179 + class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): + pass - # verify that the checkpoint saved the correct values - trainer = Trainer(max_steps=20) - trainer.fit(model) - raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) - raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] - raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) - raw_checkpoint = torch.load(raw_checkpoint_path) - assert 'model_arguments' in raw_checkpoint - assert raw_checkpoint['model_arguments']['batch_size'] == 179 + classes = [SubClassEvalModelTemplate, EvalModelTemplate, SubSubClassEvalModelTemplate] - # verify that model loads correctly - model = EvalModelTemplate.load_from_checkpoint(raw_checkpoint_path) - assert model.batch_size == 179 + for CLASS in classes: + # test that the model automatically sets the args passed into init as attrs + model = CLASS() + assert model.batch_size == 32 + model = CLASS(batch_size=179) + assert model.batch_size == 179 - # verify that we can overwrite whatever we want - model = EvalModelTemplate.load_from_checkpoint(raw_checkpoint_path, batch_size=99) - assert model.batch_size == 99 + # verify that the checkpoint saved the correct values + trainer = Trainer(max_steps=20) + trainer.fit(model) + raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) + raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] + raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) + raw_checkpoint = torch.load(raw_checkpoint_path) + assert 'model_arguments' in raw_checkpoint + assert raw_checkpoint['model_arguments']['batch_size'] == 179 + + # verify that model loads correctly + model = CLASS.load_from_checkpoint(raw_checkpoint_path) + assert model.batch_size == 179 + + # verify that we can overwrite whatever we want + model = CLASS.load_from_checkpoint(raw_checkpoint_path, batch_size=99) + assert model.batch_size == 99 def test_auto_hparams_subclass(tmpdir): From 2b0ceb847d40a585caace110222c8235166e9baf Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:18:02 -0400 Subject: [PATCH 039/100] fixed --- tests/trainer/test_trainer.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 6ee5408f796df..553879153484d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -54,35 +54,6 @@ class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): assert model.batch_size == 99 -def test_auto_hparams_subclass(tmpdir): - class SubClassEvalModelTemplate(EvalModelTemplate): - pass - - # test that the model automatically sets the args passed into init as attrs - model = SubClassEvalModelTemplate() - assert model.batch_size == 32 - model = SubClassEvalModelTemplate(batch_size=179) - assert model.batch_size == 179 - - # verify that the checkpoint saved the correct values - trainer = Trainer(max_steps=20) - trainer.fit(model) - raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) - raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] - raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) - raw_checkpoint = torch.load(raw_checkpoint_path) - assert 'model_arguments' in raw_checkpoint - assert raw_checkpoint['model_arguments']['batch_size'] == 179 - - # verify that model loads correctly - model = SubClassEvalModelTemplate.load_from_checkpoint(raw_checkpoint_path) - assert model.batch_size == 179 - - # verify that we can overwrite whatever we want - model = SubClassEvalModelTemplate.load_from_checkpoint(raw_checkpoint_path, batch_size=99) - assert model.batch_size == 99 - - def test_model_pickle(tmpdir): import pickle From e226c88d26e953512a5f4e4631ad9d4c49b73f1b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:20:28 -0400 Subject: [PATCH 040/100] fixed --- tests/trainer/test_trainer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 553879153484d..c6f8f5c1e63a2 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -61,8 +61,14 @@ def test_model_pickle(tmpdir): pickle.dumps(model) -def test_hparams_save_load(tmpdir): - model = EvalModelTemplate(vars(EvalModelTemplate.get_default_hparams())) +def test_dict_param_save_load(tmpdir): + dict_param = vars(EvalModelTemplate.get_default_hparams()) + + class SubClass(EvalModelTemplate): + def __init__(self, dict_param): + super().__init__() + + model = SubClass(dict_param) trainer = Trainer( default_root_dir=tmpdir, @@ -75,7 +81,7 @@ def test_hparams_save_load(tmpdir): # try to load the model now pretrained_model = tutils.load_model_from_checkpoint( trainer.checkpoint_callback.dirpath, - module_class=EvalModelTemplate + module_class=SubClass ) assert pretrained_model From ec005208aed0fd121c1803c9a1113552c7b81b2f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:24:08 -0400 Subject: [PATCH 041/100] fixed --- tests/base/model_template.py | 3 +-- tests/trainer/test_trainer.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 9ab186ab54da8..8f64b1efdf9cc 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -115,5 +115,4 @@ def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0 hpc_exp_number=hpc_exp_number, ) - hparams = Namespace(**args) - return hparams + return args diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index c6f8f5c1e63a2..600aa3c8002a2 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -61,11 +61,20 @@ def test_model_pickle(tmpdir): pickle.dumps(model) -def test_dict_param_save_load(tmpdir): +def test_dict_namespace_param_save_load(tmpdir): + """ + Verifies that a dict and a Namespace can be passed in as args to a model + Args: + tmpdir: + + Returns: + + """ dict_param = vars(EvalModelTemplate.get_default_hparams()) + namespace = Namespace(**dict_param) class SubClass(EvalModelTemplate): - def __init__(self, dict_param): + def __init__(self, dict_param, namespace): super().__init__() model = SubClass(dict_param) @@ -603,7 +612,7 @@ def validation_epoch_end(self, *args, **kwargs): return super().validation_epoch_end(*args, **kwargs) hparams = EvalModelTemplate.get_default_hparams() - model = CurrentModel(hparams) + model = CurrentModel(**hparams) trainer_options = dict( progress_bar_refresh_rate=0, From 72793c37fbbda6ec34ccb3ef004ca1e0d2c6e496 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:29:18 -0400 Subject: [PATCH 042/100] fixed --- pytorch_lightning/trainer/training_io.py | 33 +++++++----------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 11771b21961ed..084e4d31d08aa 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -264,9 +264,9 @@ def save_checkpoint(self, filepath, weights_only: bool = False): try: self._atomic_save(checkpoint, filepath) except AttributeError as e: - if 'hparams' in checkpoint: - del checkpoint['hparams'] - rank_zero_warn('warning, `hparams` dropped from checkpoint.' + if 'model_arguments' in checkpoint: + del checkpoint['model_arguments'] + rank_zero_warn('warning, `model_arguments` dropped from checkpoint.' f' An attribute is not picklable {e}') self._atomic_save(checkpoint, filepath) @@ -338,28 +338,13 @@ def dump_checkpoint(self, weights_only: bool = False): if self.use_amp and self.use_native_amp: checkpoint['native_amp_scaling_state'] = self.scaler.state_dict() - # add the hparams and state_dict from the model + # add the model_arguments and state_dict from the model model = self.get_model() checkpoint['state_dict'] = model.state_dict() - if hasattr(model, "hparams") and model.hparams is not None: - parsing.clean_namespace(model.hparams) - if isinstance(model.hparams, dict): - checkpoint['hparams_type'] = 'dict' - checkpoint['hparams'] = model.hparams - elif isinstance(model.hparams, Namespace): - checkpoint['hparams_type'] = 'Namespace' - checkpoint['hparams'] = vars(model.hparams) - else: - raise ValueError( - 'The acceptable hparams type is dict or argparse.Namespace,', - f' not {checkpoint["hparams_type"]}' - ) - else: - rank_zero_warn( - "Did not find hyperparameters at model hparams. Saving checkpoint without hyperparameters." - ) + if hasattr(model, 'module_arguments') and model.module_arguments is not None: + checkpoint['module_arguments'] = model.model_arguments # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -464,9 +449,9 @@ def hpc_save(self, folderpath: str, logger): try: self._atomic_save(checkpoint, filepath) except AttributeError as e: - if 'hparams' in checkpoint: - del checkpoint['hparams'] - rank_zero_warn('warning, `hparams` dropped from checkpoint.' + if 'model_arguments' in checkpoint: + del checkpoint['model_arguments'] + rank_zero_warn('warning, `model_arguments` dropped from checkpoint.' f' An attribute is not picklable {e}') self._atomic_save(checkpoint, filepath) From 840265d7066494b31dbdb353226dca63349f0d97 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:30:05 -0400 Subject: [PATCH 043/100] fixed --- tests/trainer/test_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 600aa3c8002a2..1b751628683fe 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -26,7 +26,7 @@ class SubClassEvalModelTemplate(EvalModelTemplate): class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): pass - classes = [SubClassEvalModelTemplate, EvalModelTemplate, SubSubClassEvalModelTemplate] + classes = [EvalModelTemplate, SubClassEvalModelTemplate, SubSubClassEvalModelTemplate] for CLASS in classes: # test that the model automatically sets the args passed into init as attrs From 4bb28fa19df97d078c08c3d41f552bb244ab9711 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:31:29 -0400 Subject: [PATCH 044/100] fixed --- docs/source/weights_loading.rst | 2 +- pytorch_lightning/trainer/training_io.py | 16 ++++++++-------- tests/trainer/test_trainer.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index a69e1557dffcc..1451c9cddae95 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -88,7 +88,7 @@ You can manually save checkpoints and restore your model from the checkpointed s Checkpoint Loading ------------------ -To load a model along with its weights, biases and model_arguments use following method. +To load a model along with its weights, biases and module_arguments use following method. .. code-block:: python diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 084e4d31d08aa..bedc9c5689368 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -264,9 +264,9 @@ def save_checkpoint(self, filepath, weights_only: bool = False): try: self._atomic_save(checkpoint, filepath) except AttributeError as e: - if 'model_arguments' in checkpoint: - del checkpoint['model_arguments'] - rank_zero_warn('warning, `model_arguments` dropped from checkpoint.' + if 'module_arguments' in checkpoint: + del checkpoint['module_arguments'] + rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' f' An attribute is not picklable {e}') self._atomic_save(checkpoint, filepath) @@ -338,13 +338,13 @@ def dump_checkpoint(self, weights_only: bool = False): if self.use_amp and self.use_native_amp: checkpoint['native_amp_scaling_state'] = self.scaler.state_dict() - # add the model_arguments and state_dict from the model + # add the module_arguments and state_dict from the model model = self.get_model() checkpoint['state_dict'] = model.state_dict() if hasattr(model, 'module_arguments') and model.module_arguments is not None: - checkpoint['module_arguments'] = model.model_arguments + checkpoint['module_arguments'] = model.module_arguments # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -449,9 +449,9 @@ def hpc_save(self, folderpath: str, logger): try: self._atomic_save(checkpoint, filepath) except AttributeError as e: - if 'model_arguments' in checkpoint: - del checkpoint['model_arguments'] - rank_zero_warn('warning, `model_arguments` dropped from checkpoint.' + if 'module_arguments' in checkpoint: + del checkpoint['module_arguments'] + rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' f' An attribute is not picklable {e}') self._atomic_save(checkpoint, filepath) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 1b751628683fe..6fedc66d1823c 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -42,8 +42,8 @@ class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) raw_checkpoint = torch.load(raw_checkpoint_path) - assert 'model_arguments' in raw_checkpoint - assert raw_checkpoint['model_arguments']['batch_size'] == 179 + assert 'module_arguments' in raw_checkpoint + assert raw_checkpoint['module_arguments']['batch_size'] == 179 # verify that model loads correctly model = CLASS.load_from_checkpoint(raw_checkpoint_path) From 91569a8a2f2e39fa75c305b6b68379fec5a890b8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:34:50 -0400 Subject: [PATCH 045/100] fixed --- pytorch_lightning/core/lightning.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index dca379e758dc9..8525b5ac041dd 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1578,6 +1578,9 @@ def __init__(self, hparams: dict): else: checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) + if 'module_arguments' in checkpoint: + hparams = checkpoint['module_arguments'] + # add the hparams from csv file to checkpoint if tags_csv is not None: hparams_file = tags_csv @@ -1595,7 +1598,7 @@ def __init__(self, hparams: dict): hparams['on_gpu'] = False # overwrite hparams by the given file - checkpoint['hparams'] = hparams + checkpoint['module_arguments'] = hparams # override the hparam keys that were passed in if hparam_overrides is not None: From 509036eb56e89ea3c8114ae2b916938f0832c3b5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 18:47:13 -0400 Subject: [PATCH 046/100] fixed --- pytorch_lightning/core/lightning.py | 58 ++++------------------------- 1 file changed, 8 insertions(+), 50 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 8525b5ac041dd..5923c58e847c9 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1449,46 +1449,13 @@ def load_from_checkpoint( map_location: Optional[Union[Dict[str, str], str, torch.device, int, Callable]] = None, hparams_file: Optional[str] = None, tags_csv: Optional[str] = None, # backward compatible, todo: remove in v0.9.0 - hparam_overrides: Optional[Dict] = None, **kwargs ) -> 'LightningModule': r""" Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint - it stores the hyperparameters in the checkpoint if you initialized your :class:`LightningModule` - with an argument called ``hparams`` which is an object of :class:`~dict` or - :class:`~argparse.Namespace` (output of :meth:`~argparse.ArgumentParser.parse_args` - when parsing command line arguments). - If you want `hparams` to have a hierarchical structure, you have to define it as :class:`~dict`. - Any other arguments specified through \*args and \*\*kwargs will be passed to the model. + it stores the items in `__init__` in the checkpoint under `module_arguments` - Example: - .. code-block:: python - - # define hparams as Namespace - from argparse import Namespace - hparams = Namespace(**{'learning_rate': 0.1}) - - model = MyModel(hparams) - - class MyModel(LightningModule): - def __init__(self, hparams: Namespace): - self.learning_rate = hparams.learning_rate - - # ---------- - - # define hparams as dict - hparams = { - drop_prob: 0.2, - dataloader: { - batch_size: 32 - } - } - - model = MyModel(hparams) - - class MyModel(LightningModule): - def __init__(self, hparams: dict): - self.learning_rate = hparams['learning_rate'] + Any arguments specified through \*args and \*\*kwargs will override args stored in `module_arguments`. Args: checkpoint_path: Path to checkpoint. @@ -1557,15 +1524,8 @@ def __init__(self, hparams: dict): # override some of the params with new values MyLightningModule.load_from_checkpoint( PATH, - hparam_overrides={'num_layers': 128, 'pretrained_ckpt_path': NEW_PATH} - ) - - # or load passing whatever args the model takes to load - MyLightningModule.load_from_checkpoint( - 'path/to/checkpoint.ckpt', - learning_rate=0.1, # These arguments will be passed to the model using **kwargs - layers=2, - pretrained_model=some_model + num_layers=128, + pretrained_ckpt_path: NEW_PATH, ) # predict @@ -1578,9 +1538,6 @@ def __init__(self, hparams: dict): else: checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) - if 'module_arguments' in checkpoint: - hparams = checkpoint['module_arguments'] - # add the hparams from csv file to checkpoint if tags_csv is not None: hparams_file = tags_csv @@ -1600,9 +1557,10 @@ def __init__(self, hparams: dict): # overwrite hparams by the given file checkpoint['module_arguments'] = hparams - # override the hparam keys that were passed in - if hparam_overrides is not None: - update_hparams(hparams, hparam_overrides) + # override the module_arguments with values that were passed in + for k, v in kwargs.items(): + if k in checkpoint['module_arguments']: + checkpoint['module_arguments'][k] = v model = cls._load_model_state(checkpoint, *args, **kwargs) return model From a99ffb73d1d1e04a6edf17681e9c7b5b5af6b0f1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 19:02:16 -0400 Subject: [PATCH 047/100] fixed --- pytorch_lightning/core/lightning.py | 2 ++ tests/trainer/test_trainer.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 5923c58e847c9..e5b353586954e 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1735,4 +1735,6 @@ def _auto_register_hparams(self): setattr(child, 'module_arguments', module_arguments) def _is_allowed_hparam_value(self, value): + if isinstance(value, Namespace): + return True return not hasattr(value, '__dict__') diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 6fedc66d1823c..66d9cb03a6e78 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -21,7 +21,8 @@ def test_auto_hparams(tmpdir): class SubClassEvalModelTemplate(EvalModelTemplate): - pass + def __init__(self, subclass_arg=1200): + super().__init__() class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): pass @@ -35,6 +36,9 @@ class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): model = CLASS(batch_size=179) assert model.batch_size == 179 + if isinstance(model, SubClassEvalModelTemplate): + assert model.subclass_arg == 1200 + # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=20) trainer.fit(model) @@ -70,14 +74,14 @@ def test_dict_namespace_param_save_load(tmpdir): Returns: """ - dict_param = vars(EvalModelTemplate.get_default_hparams()) + dict_param = EvalModelTemplate.get_default_hparams() namespace = Namespace(**dict_param) class SubClass(EvalModelTemplate): def __init__(self, dict_param, namespace): super().__init__() - model = SubClass(dict_param) + model = SubClass(dict_param, namespace) trainer = Trainer( default_root_dir=tmpdir, @@ -93,6 +97,7 @@ def __init__(self, dict_param, namespace): module_class=SubClass ) assert pretrained_model + assert hasattr(pretrained_model, 'namespace') def test_no_val_module(tmpdir): From 5c3ea20d29e120a42d1e040de01cd65f9c4e9df6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 19:03:44 -0400 Subject: [PATCH 048/100] fixed --- tests/trainer/test_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 66d9cb03a6e78..b85227c7b97a3 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -27,7 +27,7 @@ def __init__(self, subclass_arg=1200): class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): pass - classes = [EvalModelTemplate, SubClassEvalModelTemplate, SubSubClassEvalModelTemplate] + classes = [SubClassEvalModelTemplate, EvalModelTemplate, SubSubClassEvalModelTemplate] for CLASS in classes: # test that the model automatically sets the args passed into init as attrs From 9d08be3afe51dd6f9cd4407a85239d269b4695b4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 19:24:12 -0400 Subject: [PATCH 049/100] fixed --- tests/trainer/test_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index b85227c7b97a3..f897eed3f07f8 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -124,7 +124,7 @@ def test_no_val_module(tmpdir): # assert ckpt has hparams ckpt = torch.load(new_weights_path) - assert 'hparams' in ckpt.keys(), 'hparams missing from checkpoints' + assert 'module_arguments' in ckpt.keys(), 'module_arguments missing from checkpoints' # load new model hparams_path = tutils.get_data_path(logger, path_dir=tmpdir) @@ -639,7 +639,7 @@ def validation_epoch_end(self, *args, **kwargs): '`validation_epoch_end` should not run when `val_percent_check=0`' # check that val_percent_check has no influence when fast_dev_run is turned on - model = CurrentModel(hparams) + model = CurrentModel(**hparams) trainer_options.update(fast_dev_run=True) trainer = Trainer(**trainer_options) result = trainer.fit(model) From 0452418560ce7015dde2bbda326f6204bb109165 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 19 May 2020 19:24:33 -0400 Subject: [PATCH 050/100] fixed --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cb8fd278c5c4f..aa9778a6c6738 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,5 @@ mnist/ # pl tests ml-runs/ *.zip -pytorch\ lightning \ No newline at end of file +pytorch\ lightning +test-reports/ \ No newline at end of file From 0b5557f9b953f761cc1fb985d609e5c39718ac37 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 20 May 2020 09:01:21 -0400 Subject: [PATCH 051/100] finished moco --- pytorch_lightning/core/lightning.py | 3 +++ tests/trainer/test_trainer.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index e5b353586954e..8147355afcfa2 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -62,6 +62,9 @@ def __init__(self, *args, **kwargs): #: True if using ddp2 self.use_ddp2 = False + # True if on tpu + self.use_tpu = False + #: True if using amp self.use_amp = False diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f897eed3f07f8..60c9b391b2dba 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -21,7 +21,8 @@ def test_auto_hparams(tmpdir): class SubClassEvalModelTemplate(EvalModelTemplate): - def __init__(self, subclass_arg=1200): + object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() + def __init__(self, object_that_should_not_be_saved, subclass_arg=1200): super().__init__() class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): From 6cd5ea920c5886db059bfaaf8612bde053d41345 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 20 May 2020 14:24:55 -0400 Subject: [PATCH 052/100] basic --- tests/trainer/test_trainer.py | 37 +++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 60c9b391b2dba..9690ffd0965fa 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -21,8 +21,7 @@ def test_auto_hparams(tmpdir): class SubClassEvalModelTemplate(EvalModelTemplate): - object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() - def __init__(self, object_that_should_not_be_saved, subclass_arg=1200): + def __init__(self, subclass_arg=1200): super().__init__() class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): @@ -101,6 +100,40 @@ def __init__(self, dict_param, namespace): assert hasattr(pretrained_model, 'namespace') +def test_invalid_param_save_load(tmpdir): + """ + Verifies that a dict and a Namespace can be passed in as args to a model + Args: + tmpdir: + + Returns: + """ + dict_param = EvalModelTemplate.get_default_hparams() + namespace = Namespace(**dict_param) + + class SubClass(EvalModelTemplate): + def __init__(self, dict_param, namespace): + super().__init__() + + model = SubClass(dict_param, namespace) + + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + ) + # fit model + result = trainer.fit(model) + assert result == 1 + + # try to load the model now + pretrained_model = tutils.load_model_from_checkpoint( + trainer.checkpoint_callback.dirpath, + module_class=SubClass + ) + assert pretrained_model + assert hasattr(pretrained_model, 'namespace') + + def test_no_val_module(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" From ed1090c818a99be3de951ba8917e149380cc8c5f Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 20:52:51 +0200 Subject: [PATCH 053/100] testing --- tests/base/model_template.py | 3 ++- tests/models/test_hparams.py | 48 +++++++++++++++++++++++++++++++++++ tests/models/test_restore.py | 7 +++++ tests/trainer/test_trainer.py | 47 ---------------------------------- 4 files changed, 57 insertions(+), 48 deletions(-) create mode 100644 tests/models/test_hparams.py diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 8f64b1efdf9cc..9730575820f05 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -39,6 +39,7 @@ class EvalModelTemplate( """ def __init__(self, + *args, drop_prob=0.2, batch_size=32, in_features=28 * 28, @@ -49,7 +50,7 @@ def __init__(self, hidden_dim=1000, b1=0.5, b2=0.999, - *args, **kwargs) -> object: + **kwargs) -> object: # init superclass super().__init__() diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py new file mode 100644 index 0000000000000..a72c5fc264742 --- /dev/null +++ b/tests/models/test_hparams.py @@ -0,0 +1,48 @@ +import os + +import pytest +import torch + +from pytorch_lightning import Trainer +from tests.base import EvalModelTemplate + + +class SubClassEvalModelTemplate(EvalModelTemplate): + object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() + + def __init__(self, *args, subclass_arg=1200, **kwargs): + super().__init__(*args, subclass_arg=subclass_arg, **kwargs) + + +class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): + pass + + +@pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModelTemplate, SubSubClassEvalModelTemplate]) +def test_auto_hparams(tmpdir, cls): + # test that the model automatically sets the args passed into init as attrs + model = cls() + assert model.batch_size == 32 + model = cls(batch_size=179) + assert model.batch_size == 179 + + if isinstance(model, SubClassEvalModelTemplate): + assert model.subclass_arg == 1200 + + # verify that the checkpoint saved the correct values + trainer = Trainer(max_steps=5) + trainer.fit(model) + raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) + raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] + raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) + raw_checkpoint = torch.load(raw_checkpoint_path) + assert 'module_arguments' in raw_checkpoint + assert raw_checkpoint['module_arguments']['batch_size'] == 179 + + # verify that model loads correctly + model = cls.load_from_checkpoint(raw_checkpoint_path) + assert model.batch_size == 179 + + # verify that we can overwrite whatever we want + model = cls.load_from_checkpoint(raw_checkpoint_path, batch_size=99) + assert model.batch_size == 99 diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 62ae4d361a046..c9684e3db72fe 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -309,3 +309,10 @@ def __init__(self, hparams): # warn if user's model has hparams argument with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): CurrentModelUnusedHparams.load_from_checkpoint(last_checkpoint) + + +def test_model_pickle(tmpdir): + import pickle + + model = EvalModelTemplate() + pickle.dumps(model) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 9690ffd0965fa..3e93301a02c49 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -6,7 +6,6 @@ import pytest import torch -import yaml import tests.base.utils as tutils from pytorch_lightning import Callback, LightningModule @@ -19,52 +18,6 @@ from tests.base import EvalModelTemplate -def test_auto_hparams(tmpdir): - class SubClassEvalModelTemplate(EvalModelTemplate): - def __init__(self, subclass_arg=1200): - super().__init__() - - class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): - pass - - classes = [SubClassEvalModelTemplate, EvalModelTemplate, SubSubClassEvalModelTemplate] - - for CLASS in classes: - # test that the model automatically sets the args passed into init as attrs - model = CLASS() - assert model.batch_size == 32 - model = CLASS(batch_size=179) - assert model.batch_size == 179 - - if isinstance(model, SubClassEvalModelTemplate): - assert model.subclass_arg == 1200 - - # verify that the checkpoint saved the correct values - trainer = Trainer(max_steps=20) - trainer.fit(model) - raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) - raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] - raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) - raw_checkpoint = torch.load(raw_checkpoint_path) - assert 'module_arguments' in raw_checkpoint - assert raw_checkpoint['module_arguments']['batch_size'] == 179 - - # verify that model loads correctly - model = CLASS.load_from_checkpoint(raw_checkpoint_path) - assert model.batch_size == 179 - - # verify that we can overwrite whatever we want - model = CLASS.load_from_checkpoint(raw_checkpoint_path, batch_size=99) - assert model.batch_size == 99 - - -def test_model_pickle(tmpdir): - import pickle - - model = EvalModelTemplate() - pickle.dumps(model) - - def test_dict_namespace_param_save_load(tmpdir): """ Verifies that a dict and a Namespace can be passed in as args to a model From 295654e68753a2058ef1b41bd6c7ec3bf75b81a0 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 21:04:08 +0200 Subject: [PATCH 054/100] todo --- pytorch_lightning/core/lightning.py | 1 + tests/models/test_hparams.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 8147355afcfa2..1a405735c163d 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1710,6 +1710,7 @@ def _auto_register_hparams(self): """ # two frames back is the init of the child module frame = inspect.currentframe() + # todo: this shall be flexible to find all init in the path, recursion? frame_args = frame.f_back.f_back.f_locals # we'll save hparams automatically (renamed to module_arguments) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index a72c5fc264742..dadbfd9d8249d 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -11,7 +11,7 @@ class SubClassEvalModelTemplate(EvalModelTemplate): object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): - super().__init__(*args, subclass_arg=subclass_arg, **kwargs) + super().__init__() class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): From 1465a03d6a5a9e690e65408816cb33b078879a2a Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 22:06:55 +0200 Subject: [PATCH 055/100] recurse --- pytorch_lightning/core/lightning.py | 51 ++++++++++++++++------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 1a405735c163d..6c31fc2c04903 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1713,32 +1713,39 @@ def _auto_register_hparams(self): # todo: this shall be flexible to find all init in the path, recursion? frame_args = frame.f_back.f_back.f_locals - # we'll save hparams automatically (renamed to module_arguments) - module_arguments = {} - - # pull out the child itself to make sure we have no issues - child = frame_args['self'] + frame_args = _collect_init_args(frame) + init_args = {k: v for k, v in frame_args.items() + if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} + child = _get_latest_child(frame) - # auto set the attr which enables self.attr anywhere in the code - for name, value in frame_args.items(): + # we'll save hparams automatically (renamed to module_arguments) + for arg, val in init_args.items(): + # don't overwrite something already set + if hasattr(child, arg): + continue + setattr(child, arg, val) - # don't add self - if name not in ['self']: + # set module_arguments in child + setattr(child, 'module_arguments', init_args) - # only track some things - is_trackable = self._is_allowed_hparam_value(value) + # def _is_allowed_hparam_value(self, value): + # if isinstance(value, Namespace): + # return True + # return not hasattr(value, '__dict__') - # don't overwrite something already set - if not hasattr(child, name) and is_trackable: - setattr(child, name, value) - if is_trackable: - module_arguments[name] = value +def _collect_init_args(frame, args={}): + if any(k in frame.f_locals for k in ['self', '__class__']): + local_args = frame.f_locals # .get('frame_args') + local_args.update(local_args.get('kwargs', {})) + args.update(local_args) + return _collect_init_args(frame.f_back, args) + else: + return args - # set module_arguments in child - setattr(child, 'module_arguments', module_arguments) - def _is_allowed_hparam_value(self, value): - if isinstance(value, Namespace): - return True - return not hasattr(value, '__dict__') +def _get_latest_child(frame, child=None): + if 'self' in frame.f_locals: + return _get_latest_child(frame.f_back, frame.f_locals['self']) + else: + return child From 91ab93efdd2de5064c7a6b1d6541cec2ce0c820b Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 22:18:19 +0200 Subject: [PATCH 056/100] hparams --- pytorch_lightning/core/lightning.py | 11 +++++++++-- tests/models/test_hparams.py | 14 +++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 6c31fc2c04903..45da4039285ec 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1734,17 +1734,24 @@ def _auto_register_hparams(self): # return not hasattr(value, '__dict__') -def _collect_init_args(frame, args={}): +def _collect_init_args(frame, args: dict = {}) -> dict: + """Recursive search for all children.""" if any(k in frame.f_locals for k in ['self', '__class__']): local_args = frame.f_locals # .get('frame_args') local_args.update(local_args.get('kwargs', {})) + # back compatible hparsm as single argument + hparams = local_args.get('hparams') + if hparams: + local_args.update(vars(hparams) if isinstance(hparams, Namespace) else hparams) + # recursive update args.update(local_args) return _collect_init_args(frame.f_back, args) else: return args -def _get_latest_child(frame, child=None): +def _get_latest_child(frame, child: object = None) -> object: + """Recursive search for lowest child.""" if 'self' in frame.f_locals: return _get_latest_child(frame.f_back, frame.f_locals['self']) else: diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index dadbfd9d8249d..6777d17e25c19 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -14,11 +14,20 @@ def __init__(self, *args, subclass_arg=1200, **kwargs): super().__init__() +class HparamsClassEvalModelTemplate(EvalModelTemplate): + + def __init__(self, *args, hparams=dict(hparam_arg=123), **kwargs): + super().__init__() + + class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): pass -@pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModelTemplate, SubSubClassEvalModelTemplate]) +@pytest.mark.parametrize("cls", [EvalModelTemplate, + SubClassEvalModelTemplate, + SubSubClassEvalModelTemplate, + HparamsClassEvalModelTemplate]) def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() @@ -29,6 +38,9 @@ def test_auto_hparams(tmpdir, cls): if isinstance(model, SubClassEvalModelTemplate): assert model.subclass_arg == 1200 + if isinstance(model, HparamsClassEvalModelTemplate): + assert model.hparam_arg == 123 + # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5) trainer.fit(model) From 051972303c74033d62a28634098d8e4343ef9484 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 22:22:43 +0200 Subject: [PATCH 057/100] persist --- tests/models/test_hparams.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 6777d17e25c19..5e8cbf0e8ea71 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -15,7 +15,6 @@ def __init__(self, *args, subclass_arg=1200, **kwargs): class HparamsClassEvalModelTemplate(EvalModelTemplate): - def __init__(self, *args, hparams=dict(hparam_arg=123), **kwargs): super().__init__() @@ -24,10 +23,17 @@ class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): pass +class PersistClassEvalModelTemplate(SubClassEvalModelTemplate): + def __init__(self, *args, skip_arg=450, **kwargs): + self.skip_arg = 15 + super().__init__() + + @pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModelTemplate, SubSubClassEvalModelTemplate, - HparamsClassEvalModelTemplate]) + HparamsClassEvalModelTemplate, + PersistClassEvalModelTemplate]) def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() @@ -41,6 +47,9 @@ def test_auto_hparams(tmpdir, cls): if isinstance(model, HparamsClassEvalModelTemplate): assert model.hparam_arg == 123 + if isinstance(model, PersistClassEvalModelTemplate): + assert model.skip_arg == 15 + # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5) trainer.fit(model) From a19df1d3ac1024083a3fd3bf80e55355616f1844 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 22:28:32 +0200 Subject: [PATCH 058/100] hparams --- pytorch_lightning/core/lightning.py | 5 ----- tests/models/test_hparams.py | 29 ++++++++++++++++++----------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 45da4039285ec..d20494efd6b40 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1728,11 +1728,6 @@ def _auto_register_hparams(self): # set module_arguments in child setattr(child, 'module_arguments', init_args) - # def _is_allowed_hparam_value(self, value): - # if isinstance(value, Namespace): - # return True - # return not hasattr(value, '__dict__') - def _collect_init_args(frame, args: dict = {}) -> dict: """Recursive search for all children.""" diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 5e8cbf0e8ea71..3e3005c3bb8b3 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -1,4 +1,5 @@ import os +from argparse import Namespace import pytest import torch @@ -7,33 +8,39 @@ from tests.base import EvalModelTemplate -class SubClassEvalModelTemplate(EvalModelTemplate): +class SubClassEvalModel(EvalModelTemplate): object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): super().__init__() -class HparamsClassEvalModelTemplate(EvalModelTemplate): +class HparamsNamespaceEvalModel(EvalModelTemplate): + def __init__(self, *args, hparams=Namespace(hparam_arg=123), **kwargs): + super().__init__() + + +class HparamsDictEvalModel(EvalModelTemplate): def __init__(self, *args, hparams=dict(hparam_arg=123), **kwargs): super().__init__() -class SubSubClassEvalModelTemplate(SubClassEvalModelTemplate): +class SubSubClassEvalModel(SubClassEvalModel): pass -class PersistClassEvalModelTemplate(SubClassEvalModelTemplate): +class PersistClassEvalModel(SubClassEvalModel): def __init__(self, *args, skip_arg=450, **kwargs): self.skip_arg = 15 super().__init__() @pytest.mark.parametrize("cls", [EvalModelTemplate, - SubClassEvalModelTemplate, - SubSubClassEvalModelTemplate, - HparamsClassEvalModelTemplate, - PersistClassEvalModelTemplate]) + SubClassEvalModel, + SubSubClassEvalModel, + HparamsNamespaceEvalModel, + HparamsDictEvalModel, + PersistClassEvalModel]) def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() @@ -41,13 +48,13 @@ def test_auto_hparams(tmpdir, cls): model = cls(batch_size=179) assert model.batch_size == 179 - if isinstance(model, SubClassEvalModelTemplate): + if isinstance(model, SubClassEvalModel): assert model.subclass_arg == 1200 - if isinstance(model, HparamsClassEvalModelTemplate): + if isinstance(model, (HparamsNamespaceEvalModel, HparamsDictEvalModel)): assert model.hparam_arg == 123 - if isinstance(model, PersistClassEvalModelTemplate): + if isinstance(model, PersistClassEvalModel): assert model.skip_arg == 15 # verify that the checkpoint saved the correct values From 1f872632d8cd2c5dd96620a58b2ee0eb507521fa Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 22:34:39 +0200 Subject: [PATCH 059/100] chlog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f893a298b177..396222dc5964e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/PyTorchLightning/pytorch-lightning/pull/1862)) +- Allow passing Model hyper parameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896)) + ### Deprecated - Dropped official support/testing for older PyTorch versions <1.3 ([#1917](https://github.com/PyTorchLightning/pytorch-lightning/pull/1917)) From f35eab0895527a6aab526e871b3480c4b7ae0c0c Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 23:36:17 +0200 Subject: [PATCH 060/100] tests --- tests/models/test_restore.py | 70 +++++++++++++++++++++++++++++++++++ tests/trainer/test_trainer.py | 69 ---------------------------------- 2 files changed, 70 insertions(+), 69 deletions(-) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index c9684e3db72fe..6c4d33374f109 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -1,6 +1,7 @@ import glob import logging as log import os +from argparse import Namespace import pytest import torch @@ -316,3 +317,72 @@ def test_model_pickle(tmpdir): model = EvalModelTemplate() pickle.dumps(model) + + +def test_dict_namespace_param_save_load(tmpdir): + """ + Verifies that a dict and a Namespace can be passed in as args to a model + Args: + tmpdir: + + Returns: + + """ + dict_param = EvalModelTemplate.get_default_hparams() + namespace = Namespace(**dict_param) + + class SubClass(EvalModelTemplate): + def __init__(self, dict_param, namespace): + super().__init__() + + model = SubClass(dict_param, namespace) + + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + ) + # fit model + result = trainer.fit(model) + assert result == 1 + + # try to load the model now + pretrained_model = tutils.load_model_from_checkpoint( + trainer.checkpoint_callback.dirpath, + module_class=SubClass + ) + assert pretrained_model + assert hasattr(pretrained_model, 'namespace') + + +def test_invalid_param_save_load(tmpdir): + """ + Verifies that a dict and a Namespace can be passed in as args to a model + Args: + tmpdir: + + Returns: + """ + dict_param = EvalModelTemplate.get_default_hparams() + namespace = Namespace(**dict_param) + + class SubClass(EvalModelTemplate): + def __init__(self, dict_param, namespace): + super().__init__() + + model = SubClass(dict_param, namespace) + + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + ) + # fit model + result = trainer.fit(model) + assert result == 1 + + # try to load the model now + pretrained_model = tutils.load_model_from_checkpoint( + trainer.checkpoint_callback.dirpath, + module_class=SubClass + ) + assert pretrained_model + assert hasattr(pretrained_model, 'namespace') diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 3e93301a02c49..7fdc1d00d95b2 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -18,75 +18,6 @@ from tests.base import EvalModelTemplate -def test_dict_namespace_param_save_load(tmpdir): - """ - Verifies that a dict and a Namespace can be passed in as args to a model - Args: - tmpdir: - - Returns: - - """ - dict_param = EvalModelTemplate.get_default_hparams() - namespace = Namespace(**dict_param) - - class SubClass(EvalModelTemplate): - def __init__(self, dict_param, namespace): - super().__init__() - - model = SubClass(dict_param, namespace) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - ) - # fit model - result = trainer.fit(model) - assert result == 1 - - # try to load the model now - pretrained_model = tutils.load_model_from_checkpoint( - trainer.checkpoint_callback.dirpath, - module_class=SubClass - ) - assert pretrained_model - assert hasattr(pretrained_model, 'namespace') - - -def test_invalid_param_save_load(tmpdir): - """ - Verifies that a dict and a Namespace can be passed in as args to a model - Args: - tmpdir: - - Returns: - """ - dict_param = EvalModelTemplate.get_default_hparams() - namespace = Namespace(**dict_param) - - class SubClass(EvalModelTemplate): - def __init__(self, dict_param, namespace): - super().__init__() - - model = SubClass(dict_param, namespace) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - ) - # fit model - result = trainer.fit(model) - assert result == 1 - - # try to load the model now - pretrained_model = tutils.load_model_from_checkpoint( - trainer.checkpoint_callback.dirpath, - module_class=SubClass - ) - assert pretrained_model - assert hasattr(pretrained_model, 'namespace') - - def test_no_val_module(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" From 3555e834ea283102f6cbf1ffe205a612528f28e5 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 20 May 2020 23:44:10 +0200 Subject: [PATCH 061/100] tests --- tests/models/test_cpu.py | 14 +++++---- tests/models/test_hooks.py | 43 +++++++++++++++++++++++++++- tests/models/test_module_hooks.py | 47 ------------------------------- 3 files changed, 50 insertions(+), 54 deletions(-) delete mode 100644 tests/models/test_module_hooks.py diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index c693d73cbc18d..c3487289af38c 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -69,8 +69,8 @@ def test_lbfgs_cpu_model(tmpdir): ) hparams = EvalModelTemplate.get_default_hparams() - setattr(hparams, 'optimizer_name', 'lbfgs') - setattr(hparams, 'learning_rate', 0.002) + hparams.update(optimizer_name='lbfgs', + learning_rate=0.002) model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5) @@ -303,10 +303,12 @@ def train_dataloader(self): ) hparams = EvalModelTemplate.get_default_hparams() - hparams.batch_size = batch_size - hparams.in_features = truncated_bptt_steps - hparams.hidden_dim = truncated_bptt_steps - hparams.out_features = truncated_bptt_steps + hparams.update( + batch_size=batch_size, + in_features=truncated_bptt_steps, + hidden_dim=truncated_bptt_steps, + out_features=truncated_bptt_steps + ) model = BpttTestModel(hparams) diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 90a0468f576af..568a8eae437c2 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -1,6 +1,6 @@ import pytest +import torch -import tests.base.utils as tutils from pytorch_lightning import Trainer from tests.base import EvalModelTemplate @@ -27,3 +27,44 @@ def on_before_zero_grad(self, optimizer): model.on_before_zero_grad_called = 0 trainer.test(model) assert 0 == model.on_before_zero_grad_called + + +def test_training_epoch_end_metrics_collection(tmpdir): + """ Test that progress bar metrics also get collected at the end of an epoch. """ + num_epochs = 3 + + class CurrentModel(EvalModelTemplate): + + def training_step(self, *args, **kwargs): + output = super().training_step(*args, **kwargs) + output['progress_bar'].update({'step_metric': torch.tensor(-1)}) + output['progress_bar'].update({'shared_metric': 100}) + return output + + def training_epoch_end(self, outputs): + epoch = self.current_epoch + # both scalar tensors and Python numbers are accepted + return { + 'progress_bar': { + f'epoch_metric_{epoch}': torch.tensor(epoch), # add a new metric key every epoch + 'shared_metric': 111, + } + } + + model = CurrentModel() + trainer = Trainer( + max_epochs=num_epochs, + default_root_dir=tmpdir, + overfit_pct=0.1, + ) + result = trainer.fit(model) + assert result == 1 + metrics = trainer.progress_bar_dict + + # metrics added in training step should be unchanged by epoch end method + assert metrics['step_metric'] == -1 + # a metric shared in both methods gets overwritten by epoch_end + assert metrics['shared_metric'] == 111 + # metrics are kept after each epoch + for i in range(num_epochs): + assert metrics[f'epoch_metric_{i}'] == i diff --git a/tests/models/test_module_hooks.py b/tests/models/test_module_hooks.py deleted file mode 100644 index 8b855ba4a70d7..0000000000000 --- a/tests/models/test_module_hooks.py +++ /dev/null @@ -1,47 +0,0 @@ -import torch - -from pytorch_lightning import Trainer -from tests.base import EvalModelTemplate - -import tests.base.utils as tutils - - -def test_training_epoch_end_metrics_collection(tmpdir): - """ Test that progress bar metrics also get collected at the end of an epoch. """ - num_epochs = 3 - - class CurrentModel(EvalModelTemplate): - - def training_step(self, *args, **kwargs): - output = super().training_step(*args, **kwargs) - output['progress_bar'].update({'step_metric': torch.tensor(-1)}) - output['progress_bar'].update({'shared_metric': 100}) - return output - - def training_epoch_end(self, outputs): - epoch = self.current_epoch - # both scalar tensors and Python numbers are accepted - return { - 'progress_bar': { - f'epoch_metric_{epoch}': torch.tensor(epoch), # add a new metric key every epoch - 'shared_metric': 111, - } - } - - model = CurrentModel() - trainer = Trainer( - max_epochs=num_epochs, - default_root_dir=tmpdir, - overfit_pct=0.1, - ) - result = trainer.fit(model) - assert result == 1 - metrics = trainer.progress_bar_dict - - # metrics added in training step should be unchanged by epoch end method - assert metrics['step_metric'] == -1 - # a metric shared in both methods gets overwritten by epoch_end - assert metrics['shared_metric'] == 111 - # metrics are kept after each epoch - for i in range(num_epochs): - assert metrics[f'epoch_metric_{i}'] == i From 2a1b2dcaaa6ed8899adc37ea1247c861d5fb825c Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 21 May 2020 01:52:05 +0200 Subject: [PATCH 062/100] tests --- tests/models/test_restore.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 6c4d33374f109..6b20061d920df 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -129,8 +129,8 @@ def test_load_model_from_checkpoint(tmpdir): pretrained_model = EvalModelTemplate.load_from_checkpoint(last_checkpoint) # test that hparams loaded correctly - for k, v in vars(hparams).items(): - assert getattr(pretrained_model.hparams, k) == v + for k, v in hparams.items(): + assert getattr(pretrained_model, k) == v # assert weights are the same for (old_name, old_p), (new_name, new_p) in zip(model.named_parameters(), pretrained_model.named_parameters()): @@ -272,7 +272,7 @@ def test_model_saving_loading(tmpdir): def test_load_model_with_missing_hparams(tmpdir): - trainer_options = dict( + trainer = Trainer( progress_bar_refresh_rate=0, max_epochs=1, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), @@ -280,9 +280,6 @@ def test_load_model_with_missing_hparams(tmpdir): default_root_dir=tmpdir, ) - # fit model - trainer = Trainer(**trainer_options) - class CurrentModelWithoutHparams(EvalModelTemplate): def __init__(self): super().__init__() From 3c79ae32328d15ee285b58d5353330a3bc2b4fec Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 21 May 2020 14:01:29 +0200 Subject: [PATCH 063/100] tests --- tests/base/models.py | 8 ++--- tests/models/test_restore.py | 69 ------------------------------------ 2 files changed, 4 insertions(+), 73 deletions(-) diff --git a/tests/base/models.py b/tests/base/models.py index b4ae936334dcb..1e9cbccde3a09 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -18,7 +18,7 @@ class Generator(nn.Module): - def __init__(self, latent_dim, img_shape): + def __init__(self, latent_dim: tuple, img_shape: tuple): super().__init__() self.img_shape = img_shape @@ -45,7 +45,7 @@ def forward(self, z): class Discriminator(nn.Module): - def __init__(self, img_shape): + def __init__(self, img_shape: tuple): super().__init__() self.model = nn.Sequential( @@ -67,12 +67,12 @@ def forward(self, img): class TestGAN(LightningModule): """Implements a basic GAN for the purpose of illustrating multiple optimizers.""" - def __init__(self, hparams): + def __init__(self, hparams: dict): super().__init__() # networks mnist_shape = (1, 28, 28) - self.generator = Generator(latent_dim=hparams.hidden_dim, img_shape=mnist_shape) + self.generator = Generator(latent_dim=self.hidden_dim, img_shape=mnist_shape) self.discriminator = Discriminator(img_shape=mnist_shape) # cache for generated images diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 6b20061d920df..a6b6eac073635 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -314,72 +314,3 @@ def test_model_pickle(tmpdir): model = EvalModelTemplate() pickle.dumps(model) - - -def test_dict_namespace_param_save_load(tmpdir): - """ - Verifies that a dict and a Namespace can be passed in as args to a model - Args: - tmpdir: - - Returns: - - """ - dict_param = EvalModelTemplate.get_default_hparams() - namespace = Namespace(**dict_param) - - class SubClass(EvalModelTemplate): - def __init__(self, dict_param, namespace): - super().__init__() - - model = SubClass(dict_param, namespace) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - ) - # fit model - result = trainer.fit(model) - assert result == 1 - - # try to load the model now - pretrained_model = tutils.load_model_from_checkpoint( - trainer.checkpoint_callback.dirpath, - module_class=SubClass - ) - assert pretrained_model - assert hasattr(pretrained_model, 'namespace') - - -def test_invalid_param_save_load(tmpdir): - """ - Verifies that a dict and a Namespace can be passed in as args to a model - Args: - tmpdir: - - Returns: - """ - dict_param = EvalModelTemplate.get_default_hparams() - namespace = Namespace(**dict_param) - - class SubClass(EvalModelTemplate): - def __init__(self, dict_param, namespace): - super().__init__() - - model = SubClass(dict_param, namespace) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - ) - # fit model - result = trainer.fit(model) - assert result == 1 - - # try to load the model now - pretrained_model = tutils.load_model_from_checkpoint( - trainer.checkpoint_callback.dirpath, - module_class=SubClass - ) - assert pretrained_model - assert hasattr(pretrained_model, 'namespace') From 57671881ec6bff4bc08a6b55c2a121db44e97105 Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 21 May 2020 14:03:33 +0200 Subject: [PATCH 064/100] tests --- tests/trainer/test_lr_finder.py | 6 +++--- tests/trainer/test_trainer_tricks.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index 6902f69d04291..fadc629e3a6e8 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -89,7 +89,7 @@ def test_trainer_arg_bool(tmpdir): ) trainer.fit(model) - after_lr = model.hparams.learning_rate + after_lr = model.learning_rate assert before_lr != after_lr, \ 'Learning rate was not altered after running learning rate finder' @@ -109,7 +109,7 @@ def test_trainer_arg_str(tmpdir): ) trainer.fit(model) - after_lr = model.hparams.my_fancy_lr + after_lr = model.my_fancy_lr assert before_lr != after_lr, \ 'Learning rate was not altered after running learning rate finder' @@ -129,7 +129,7 @@ def test_call_to_trainer_method(tmpdir): lrfinder = trainer.lr_find(model, mode='linear') after_lr = lrfinder.suggestion() - model.hparams.learning_rate = after_lr + model.learning_rate = after_lr trainer.fit(model) assert before_lr != after_lr, \ diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py index fc9d0a8fc0429..6e848899d9765 100755 --- a/tests/trainer/test_trainer_tricks.py +++ b/tests/trainer/test_trainer_tricks.py @@ -83,7 +83,7 @@ def test_trainer_arg(tmpdir, scale_arg): ) trainer.fit(model) - after_batch_size = model.hparams.batch_size + after_batch_size = model.batch_size assert before_batch_size != after_batch_size, \ 'Batch size was not altered after running auto scaling of batch size' @@ -104,7 +104,7 @@ def test_call_to_trainer_method(tmpdir, scale_method): ) after_batch_size = trainer.scale_batch_size(model, mode=scale_method, max_trials=5) - model.hparams.batch_size = after_batch_size + model.batch_size = after_batch_size trainer.fit(model) assert before_batch_size != after_batch_size, \ From cbb00b52bbca4eb486c873cfaa5a981ff03aa724 Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 21 May 2020 14:08:30 +0200 Subject: [PATCH 065/100] tests --- tests/base/model_template.py | 2 +- tests/trainer/test_dataloaders.py | 2 +- tests/trainer/test_lr_finder.py | 8 ++++---- tests/trainer/test_optimizers.py | 6 +++--- tests/trainer/test_trainer_tricks.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 9730575820f05..927158d5e7919 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -96,7 +96,7 @@ def prepare_data(self): _ = TrialMNIST(root=self.data_root, train=True, download=True) @staticmethod - def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0) -> Namespace: + def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0) -> dict: args = dict( drop_prob=0.2, batch_size=32, diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index ba78d31fcf5dd..e310648c115f9 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -429,7 +429,7 @@ def train_dataloader(self): return dataloader hparams = EvalModelTemplate.get_default_hparams() - hparams.batch_size = batch_size + hparams['batch_size'] = batch_size model = CurrentTestModel(hparams) trainer = Trainer( diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index fadc629e3a6e8..0c5b92db6cb51 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -97,10 +97,10 @@ def test_trainer_arg_bool(tmpdir): def test_trainer_arg_str(tmpdir): """ Test that setting trainer arg to string works """ hparams = EvalModelTemplate.get_default_hparams() - hparams.__dict__['my_fancy_lr'] = 1.0 # update with non-standard field + hparams['my_fancy_lr'] = 1.0 # update with non-standard field model = EvalModelTemplate(**hparams) - before_lr = hparams.my_fancy_lr + before_lr = hparams.get('my_fancy_lr') # logger file to get meta trainer = Trainer( default_save_path=tmpdir, @@ -120,7 +120,7 @@ def test_call_to_trainer_method(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - before_lr = hparams.learning_rate + before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( default_save_path=tmpdir, @@ -143,7 +143,7 @@ def test_accumulation_and_early_stopping(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - before_lr = hparams.learning_rate + before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( default_save_path=tmpdir, diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index 85d70bc953da1..fcd07fbb77b19 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -23,7 +23,7 @@ def test_optimizer_with_scheduling(tmpdir): results = trainer.fit(model) assert results == 1 - init_lr = hparams.learning_rate + init_lr = hparams.get('learning_rate') adjusted_lr = [pg['lr'] for pg in trainer.optimizers[0].param_groups] assert len(trainer.lr_schedulers) == 1, \ @@ -54,7 +54,7 @@ def test_multi_optimizer_with_scheduling(tmpdir): results = trainer.fit(model) assert results == 1 - init_lr = hparams.learning_rate + init_lr = hparams.get('learning_rate') adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups] @@ -89,7 +89,7 @@ def test_multi_optimizer_with_scheduling_stepping(tmpdir): results = trainer.fit(model) assert results == 1 - init_lr = hparams.learning_rate + init_lr = hparams.get('learning_rate') adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups] diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py index 6e848899d9765..a66e8bbde8b7f 100755 --- a/tests/trainer/test_trainer_tricks.py +++ b/tests/trainer/test_trainer_tricks.py @@ -74,7 +74,7 @@ def test_trainer_arg(tmpdir, scale_arg): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - before_batch_size = hparams.batch_size + before_batch_size = hparams.get('batch_size') # logger file to get meta trainer = Trainer( default_save_path=tmpdir, @@ -96,7 +96,7 @@ def test_call_to_trainer_method(tmpdir, scale_method): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - before_batch_size = hparams.batch_size + before_batch_size = hparams.get('batch_size') # logger file to get meta trainer = Trainer( default_save_path=tmpdir, From acc020fd98716b0941f3ec9d5e90551c10918d59 Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 21 May 2020 22:47:55 +0200 Subject: [PATCH 066/100] review --- pytorch_lightning/core/lightning.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index d20494efd6b40..41f507c8c55b1 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1713,7 +1713,7 @@ def _auto_register_hparams(self): # todo: this shall be flexible to find all init in the path, recursion? frame_args = frame.f_back.f_back.f_locals - frame_args = _collect_init_args(frame) + frame_args = _collect_init_args(frame, {}) init_args = {k: v for k, v in frame_args.items() if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} child = _get_latest_child(frame) @@ -1722,6 +1722,8 @@ def _auto_register_hparams(self): for arg, val in init_args.items(): # don't overwrite something already set if hasattr(child, arg): + log.warning(f'init argument `{arg}` was skipped while auto `hparams` registering,' + ' because ut match already existing attribute of this class.') continue setattr(child, arg, val) @@ -1729,7 +1731,7 @@ def _auto_register_hparams(self): setattr(child, 'module_arguments', init_args) -def _collect_init_args(frame, args: dict = {}) -> dict: +def _collect_init_args(frame, args: dict) -> dict: """Recursive search for all children.""" if any(k in frame.f_locals for k in ['self', '__class__']): local_args = frame.f_locals # .get('frame_args') From b3b623611d1fb0bdd613f6705b8aec9926018b38 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 00:53:14 +0200 Subject: [PATCH 067/100] saving --- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/trainer/training_io.py | 15 ++++++++------- tests/models/test_hparams.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 41f507c8c55b1..0c70cea11cfee 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1728,7 +1728,7 @@ def _auto_register_hparams(self): setattr(child, arg, val) # set module_arguments in child - setattr(child, 'module_arguments', init_args) + setattr(child, 'module_arguments', [k for k in init_args]) def _collect_init_args(frame, args: dict) -> dict: diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index bedc9c5689368..0be86d8d01c1d 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -263,12 +263,11 @@ def save_checkpoint(self, filepath, weights_only: bool = False): # do the actual save try: self._atomic_save(checkpoint, filepath) - except AttributeError as e: + except AttributeError as err: if 'module_arguments' in checkpoint: del checkpoint['module_arguments'] - rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' - f' An attribute is not picklable {e}') - + rank_zero_warn('Warning, `module_arguments` dropped from checkpoint.' + f' An attribute is not picklable {err}') self._atomic_save(checkpoint, filepath) def restore(self, checkpoint_path: str, on_gpu: bool): @@ -343,8 +342,11 @@ def dump_checkpoint(self, weights_only: bool = False): checkpoint['state_dict'] = model.state_dict() - if hasattr(model, 'module_arguments') and model.module_arguments is not None: - checkpoint['module_arguments'] = model.module_arguments + if hasattr(model, 'module_arguments') and model.module_arguments: + # copy the actual values from model according the list + module_args = {k: getattr(model, k) for k in model.module_arguments} + # add arguments to the checkpoint + checkpoint['module_arguments'] = module_args # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -453,7 +455,6 @@ def hpc_save(self, folderpath: str, logger): del checkpoint['module_arguments'] rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' f' An attribute is not picklable {e}') - self._atomic_save(checkpoint, filepath) return filepath diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 3e3005c3bb8b3..b9a9b6ef91d91 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -58,7 +58,7 @@ def test_auto_hparams(tmpdir, cls): assert model.skip_arg == 15 # verify that the checkpoint saved the correct values - trainer = Trainer(max_steps=5) + trainer = Trainer(max_steps=5, default_root_dir=tmpdir) trainer.fit(model) raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] From b97e0b15d6e732f19b624ceb9c9b49e0c0fb61f1 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 12:44:00 +0200 Subject: [PATCH 068/100] tests --- pytorch_lightning/trainer/lr_finder.py | 22 +++++++++++++------- pytorch_lightning/trainer/training_tricks.py | 12 ++++++----- tests/trainer/test_lr_finder.py | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/trainer/lr_finder.py b/pytorch_lightning/trainer/lr_finder.py index 6a76523f17fb4..abc71ece2ac3d 100755 --- a/pytorch_lightning/trainer/lr_finder.py +++ b/pytorch_lightning/trainer/lr_finder.py @@ -2,7 +2,7 @@ Trainer Learning Rate Finder """ from abc import ABC, abstractmethod -from typing import Optional +from typing import Optional, Sequence import numpy as np import torch @@ -20,6 +20,8 @@ class TrainerLRFinderMixin(ABC): + default_root_dir: str + @abstractmethod def save_checkpoint(self, *args): """Warning: this is just empty shell for code implemented in other class.""" @@ -35,17 +37,17 @@ def _run_lr_finder_internally(self, model: LightningModule): # TODO: log lr.results to self.logger if isinstance(self.auto_lr_find, str): # Try to find requested field, may be nested - if _nested_hasattr(model.hparams, self.auto_lr_find): - _nested_setattr(model.hparams, self.auto_lr_find, lr) + if _nested_hasattr(model, self.auto_lr_find): + _nested_setattr(model, self.auto_lr_find, lr) else: raise MisconfigurationException( f'`auto_lr_find` was set to {self.auto_lr_find}, however' ' could not find this as a field in `model.hparams`.') else: - if hasattr(model.hparams, 'lr'): - model.hparams.lr = lr - elif hasattr(model.hparams, 'learning_rate'): - model.hparams.learning_rate = lr + if hasattr(model, 'lr'): + model.lr = lr + elif hasattr(model, 'learning_rate'): + model.learning_rate = lr else: raise MisconfigurationException( 'When auto_lr_find is set to True, expects that hparams' @@ -350,7 +352,7 @@ class _LRCallback(Callback): """ def __init__(self, num_training: int, early_stop_threshold: float = 4.0, - progress_bar_refresh_rate: bool = False, + progress_bar_refresh_rate: int = 0, beta: float = 0.98): self.num_training = num_training self.early_stop_threshold = early_stop_threshold @@ -414,6 +416,8 @@ class _LinearLR(_LRScheduler): last_epoch: the index of last epoch. Default: -1. """ + last_epoch: int + base_lrs: Sequence def __init__(self, optimizer: torch.optim.Optimizer, @@ -454,6 +458,8 @@ class _ExponentialLR(_LRScheduler): last_epoch: the index of last epoch. Default: -1. """ + last_epoch: int + base_lrs: Sequence def __init__(self, optimizer: torch.optim.Optimizer, diff --git a/pytorch_lightning/trainer/training_tricks.py b/pytorch_lightning/trainer/training_tricks.py index 2a9adaf568f90..d4a3b3864eb08 100644 --- a/pytorch_lightning/trainer/training_tricks.py +++ b/pytorch_lightning/trainer/training_tricks.py @@ -25,7 +25,9 @@ class TrainerTrainingTricksMixin(ABC): # this is just a summary on variables used in this abstract class, # the proper values/initialisation should be done in child class gradient_clip_val: ... - precision: ... + precision: int + default_root_dir: str + progress_bar_callback: ... on_gpu: bool @abstractmethod @@ -133,7 +135,7 @@ def scale_batch_size(self, algorithm is terminated """ - if not hasattr(model.hparams, batch_arg_name): + if not hasattr(model, batch_arg_name): raise MisconfigurationException(f'Field {batch_arg_name} not found in `model.hparams`') if hasattr(model.train_dataloader, 'patch_loader_code'): @@ -243,9 +245,9 @@ def _adjust_batch_size(trainer, """ model = trainer.get_model() - batch_size = getattr(model.hparams, batch_arg_name) + batch_size = getattr(model, batch_arg_name) if value: - setattr(model.hparams, batch_arg_name, value) + setattr(model, batch_arg_name, value) new_size = value if desc: log.info(f'Batch size {batch_size} {desc}, trying batch size {new_size}') @@ -253,7 +255,7 @@ def _adjust_batch_size(trainer, new_size = int(batch_size * factor) if desc: log.info(f'Batch size {batch_size} {desc}, trying batch size {new_size}') - setattr(model.hparams, batch_arg_name, new_size) + setattr(model, batch_arg_name, new_size) return new_size diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index 0c5b92db6cb51..95e839d0b23eb 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -79,7 +79,7 @@ def test_trainer_arg_bool(tmpdir): """ Test that setting trainer arg to bool works """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - before_lr = hparams.learning_rate + before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( From 5a4740a5c18e63b668d52685641b4b4f71e9158f Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 13:12:07 +0200 Subject: [PATCH 069/100] tests --- pytorch_lightning/core/lightning.py | 1 + pytorch_lightning/trainer/trainer.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 0c70cea11cfee..5fa086966593b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -74,6 +74,7 @@ def __init__(self, *args, **kwargs): #: device reference self._device = torch.device('cpu') + self.module_arguments = [] # register all params passed into the child module in __init__ self._auto_register_hparams() diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 75361a1ac1e35..5fd5f9e59165b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -952,8 +952,8 @@ def run_pretrain_routine(self, model: LightningModule): # log hyper-parameters if self.logger is not None: # save exp to get started - if hasattr(ref_model, "hparams"): - self.logger.log_hyperparams(ref_model.hparams) + if hasattr(ref_model, "module_arguments"): + self.logger.log_hyperparams({k: getattr(ref_model, k) for k in ref_model.module_arguments}) self.logger.save() From 2a6be208fb8518152eb460596f20ce41e05318c2 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 14:06:27 +0200 Subject: [PATCH 070/100] tests --- .../computer_vision_fine_tuning.py | 6 +- pytorch_lightning/trainer/training_io.py | 34 ++++++++- tests/models/test_restore.py | 71 +++++++++---------- 3 files changed, 69 insertions(+), 42 deletions(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index eb1e000cd0ed4..8b6026b31bb85 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -148,14 +148,14 @@ class TransferLearningModel(pl.LightningModule): dl_path: Path where the data will be downloaded """ def __init__(self, + dl_path: Union[str, Path], backbone='resnet50', train_bn=True, milestones=(5, 10), batch_size=8, lr=1e-2, lr_scheduler_gamma=1e-1, - num_workers=6, - dl_path: Union[str, Path]) -> None: + num_workers=6) -> None: super().__init__() self.dl_path = dl_path @@ -416,7 +416,7 @@ def main(args: argparse.Namespace) -> None: with TemporaryDirectory(dir=args.root_data_path) as tmp_dir: - model = TransferLearningModel(**args, dl_path=tmp_dir) + model = TransferLearningModel(dl_path=tmp_dir, **vars(args)) trainer = pl.Trainer( weights_summary=None, diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 0be86d8d01c1d..8996a6bdd5b82 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -85,9 +85,9 @@ import os import re +import pickle import signal from abc import ABC -from argparse import Namespace from subprocess import call from typing import Union @@ -305,7 +305,16 @@ def restore(self, checkpoint_path: str, on_gpu: bool): # load training state (affects trainer only) self.restore_training_state(checkpoint) - def dump_checkpoint(self, weights_only: bool = False): + def dump_checkpoint(self, weights_only: bool = False, save_mode: bool = True) -> dict: + """Creating model checkpoint. + + Args: + weights_only: saving model weights only + save_mode: drop all init argument which are not primitives + + Return: + structured dictionary + """ checkpoint = { 'epoch': self.current_epoch + 1, 'global_step': self.global_step + 1, @@ -344,7 +353,12 @@ def dump_checkpoint(self, weights_only: bool = False): if hasattr(model, 'module_arguments') and model.module_arguments: # copy the actual values from model according the list - module_args = {k: getattr(model, k) for k in model.module_arguments} + module_args = {} + for k in model.module_arguments: + val = getattr(model, k) + if save_mode and not _is_serializable(val): + continue + module_args[k] = val # add arguments to the checkpoint checkpoint['module_arguments'] = module_args @@ -499,3 +513,17 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'): ckpt_vs.append(int(name)) return max(ckpt_vs) + + +def _is_serializable(obj) -> bool: + """Try if the object is serializable + + >>> _is_serializable(5) + True + """ + try: + pickle.dumps(obj) + except Exception: + return False + else: + return True diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index a6b6eac073635..970b2f7d78209 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -271,42 +271,41 @@ def test_model_saving_loading(tmpdir): assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 -def test_load_model_with_missing_hparams(tmpdir): - trainer = Trainer( - progress_bar_refresh_rate=0, - max_epochs=1, - checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), - logger=False, - default_root_dir=tmpdir, - ) - - class CurrentModelWithoutHparams(EvalModelTemplate): - def __init__(self): - super().__init__() - - class CurrentModelUnusedHparams(EvalModelTemplate): - def __init__(self, hparams): - super().__init__() - - model = CurrentModelWithoutHparams() - trainer.fit(model) - last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] - - # try to load a checkpoint that has hparams but model is missing hparams arg - with pytest.raises(MisconfigurationException, match=r".*__init__ is missing the argument 'hparams'.*"): - CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) - - # create a checkpoint without hyperparameters - # if the model does not take a hparams argument, it should not throw an error - ckpt = torch.load(last_checkpoint) - del(ckpt['hparams']) - torch.save(ckpt, last_checkpoint) - CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) - - # load checkpoint without hparams again - # warn if user's model has hparams argument - with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): - CurrentModelUnusedHparams.load_from_checkpoint(last_checkpoint) +# def test_load_model_with_missing_hparams(tmpdir): +# trainer = Trainer( +# progress_bar_refresh_rate=0, +# max_epochs=1, +# checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), +# logger=False, +# default_root_dir=tmpdir, +# ) +# +# class CurrentModelWithoutHparams(EvalModelTemplate): +# def __init__(self, *args, **kwargs): +# super().__init__() +# +# class CurrentModelUnusedHparams(EvalModelTemplate): +# def __init__(self, hparams={}, *args, **kwargs): +# super().__init__() +# +# model = CurrentModelWithoutHparams() +# trainer.fit(model) +# last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] +# +# # try to load a checkpoint that has hparams but model is missing hparams arg +# with pytest.raises(MisconfigurationException, match=r".*__init__ is missing the argument 'hparams'.*"): +# CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) +# +# # create a checkpoint without hyperparameters +# # if the model does not take a hparams argument, it should not throw an error +# ckpt = torch.load(last_checkpoint) +# # del(ckpt['hparams']) +# torch.save(ckpt, last_checkpoint) +# CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) +# +# # load checkpoint without hparams again warn if user's model has hparams argument +# with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): +# CurrentModelUnusedHparams.load_from_checkpoint(last_checkpoint) def test_model_pickle(tmpdir): From e80b0060038ab600bac76418825c6f4c811817bc Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 14:44:39 +0200 Subject: [PATCH 071/100] docs --- docs/source/weights_loading.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 1451c9cddae95..b1d2dee39922a 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -64,7 +64,7 @@ under the `module_arguments` key in the checkpoint. .. testcode:: - class MyLightningModule(LightningModule): + class MyLightningModule(LightningModule): def __init__(self, learning_rate, *args, **kwargs): super().__init__() From e50b78fabf1695dcd65f403b3d8825c3d953893a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 22 May 2020 11:01:01 -0400 Subject: [PATCH 072/100] finished moco --- docs/source/weights_loading.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index b1d2dee39922a..a45c8735c40f3 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -62,7 +62,7 @@ Or disable it by passing The Lightning checkpoint also saves the arguments passed into the LightningModule init under the `module_arguments` key in the checkpoint. -.. testcode:: +.. code-block:: python class MyLightningModule(LightningModule): From fd7be0dac4c200eac11e7c98cd6ccb3b32c749f6 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 17:38:10 +0200 Subject: [PATCH 073/100] hparams --- docs/source/hyperparameters.rst | 8 +++---- pytorch_lightning/core/lightning.py | 26 ++++++++++++++++++++++- pytorch_lightning/core/saving.py | 16 ++++++++++++++ pytorch_lightning/trainer/training_io.py | 27 ++++-------------------- 4 files changed, 49 insertions(+), 28 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index d2d390348a3df..4357166d4ca5b 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -88,8 +88,8 @@ Finally, make sure to start the training like so: .. code-block:: python # YES - model = LitModel(hparams) - trainer = Trainer.from_argparse_args(hparams, early_stopping_callback=...) + model = LitModel(**hparams) + trainer = Trainer.from_argparse_args(**hparams, early_stopping_callback=...) # NO # model = LitModel(learning_rate=hparams.learning_rate, ...) @@ -169,10 +169,10 @@ To recap, add ALL possible trainer flags to the argparser and init the Trainer t parser = Trainer.add_argparse_args(parser) hparams = parser.parse_args() - trainer = Trainer.from_argparse_args(hparams) + trainer = Trainer.from_argparse_args(**hparams) # or if you need to pass in callbacks - trainer = Trainer.from_argparse_args(hparams, checkpoint_callback=..., callbacks=[...]) + trainer = Trainer.from_argparse_args(**hparams, checkpoint_callback=..., callbacks=[...]) Multiple Lightning Modules diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 5fa086966593b..074cbd456fbd3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -17,7 +17,8 @@ from pytorch_lightning.core.grads import GradInformation from pytorch_lightning.core.hooks import ModelHooks from pytorch_lightning.core.memory import ModelSummary -from pytorch_lightning.core.saving import ModelIO, load_hparams_from_tags_csv, load_hparams_from_yaml, update_hparams +from pytorch_lightning.core.saving import ModelIO, load_hparams_from_tags_csv, load_hparams_from_yaml, update_hparams, \ + is_picklable from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -1731,6 +1732,29 @@ def _auto_register_hparams(self): # set module_arguments in child setattr(child, 'module_arguments', [k for k in init_args]) + def get_hyper_params(self, save_mode: True) -> dict: + """Copy the actual values from model according the list + + Args: + weights_only: saving model weights only + save_mode: drop all init argument which are not primitives + + Return: + structured dictionary + """ + module_args = {} + for k in self.module_arguments: + val = getattr(self, k) + if save_mode and not is_picklable(val): + continue + module_args[k] = val + return module_args + + @property + def hparams(self) -> dict: + """Imitate rhe pas `hparams` attribute.""" + return self.get_hyper_params(save_mode=True) + def _collect_init_args(frame, args: dict) -> dict: """Recursive search for all children.""" diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index adf782e6d4f21..9f37cc4b65ed2 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -1,6 +1,8 @@ import ast import csv import os +import pickle + import yaml from argparse import Namespace from typing import Union, Dict, Any @@ -157,3 +159,17 @@ def convert(val: str) -> Union[int, float, bool, str]: except (ValueError, SyntaxError) as e: log.debug(e) return val + + +def is_picklable(obj) -> bool: + """Try if the object is serializable + + >>> is_picklable(5) + True + """ + try: + pickle.dumps(obj) + except Exception: + return False + else: + return True diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 8996a6bdd5b82..1daa426463100 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -85,7 +85,6 @@ import os import re -import pickle import signal from abc import ABC from subprocess import call @@ -141,6 +140,9 @@ class TrainerIOMixin(ABC): on_tpu: bool num_training_batches: int accumulate_grad_batches: int + use_amp: bool + use_native_amp: bool + scaler: ... def get_model(self): is_dp_module = isinstance(self.model, (LightningDistributedDataParallel, @@ -352,15 +354,8 @@ def dump_checkpoint(self, weights_only: bool = False, save_mode: bool = True) -> checkpoint['state_dict'] = model.state_dict() if hasattr(model, 'module_arguments') and model.module_arguments: - # copy the actual values from model according the list - module_args = {} - for k in model.module_arguments: - val = getattr(model, k) - if save_mode and not _is_serializable(val): - continue - module_args[k] = val # add arguments to the checkpoint - checkpoint['module_arguments'] = module_args + checkpoint['module_arguments'] = model.get_hyper_params(save_mode=save_mode) # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -513,17 +508,3 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'): ckpt_vs.append(int(name)) return max(ckpt_vs) - - -def _is_serializable(obj) -> bool: - """Try if the object is serializable - - >>> _is_serializable(5) - True - """ - try: - pickle.dumps(obj) - except Exception: - return False - else: - return True From c31952871cd72c309edc330730f1273c804b79db Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 17:58:43 +0200 Subject: [PATCH 074/100] review --- pytorch_lightning/core/lightning.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 074cbd456fbd3..975cdfbb56635 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1710,10 +1710,7 @@ def _auto_register_hparams(self): 1. we don't overwrite the property if it already exists 2. we also store a module_arguments property for model loading and saving """ - # two frames back is the init of the child module frame = inspect.currentframe() - # todo: this shall be flexible to find all init in the path, recursion? - frame_args = frame.f_back.f_back.f_locals frame_args = _collect_init_args(frame, {}) init_args = {k: v for k, v in frame_args.items() @@ -1752,7 +1749,7 @@ def get_hyper_params(self, save_mode: True) -> dict: @property def hparams(self) -> dict: - """Imitate rhe pas `hparams` attribute.""" + """Imitate the past `hparams` attribute.""" return self.get_hyper_params(save_mode=True) From b313477e3ac11840a2021939c368d4a9f9c75f33 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 22 May 2020 19:01:01 +0200 Subject: [PATCH 075/100] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- CHANGELOG.md | 2 +- docs/source/hyperparameters.rst | 12 ++++++------ docs/source/lr_finder.rst | 4 ++-- docs/source/slurm.rst | 2 +- docs/source/weights_loading.rst | 2 +- pl_examples/__init__.py | 2 +- pl_examples/domain_templates/reinforce_learn_Qnet.py | 2 +- .../domain_templates/semantic_segmentation.py | 2 +- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/trainer/trainer.py | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 396222dc5964e..5d56f4a9287c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/PyTorchLightning/pytorch-lightning/pull/1862)) -- Allow passing Model hyper parameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896)) +- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896)) ### Deprecated diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 4357166d4ca5b..7ac7ceacff3ed 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -88,8 +88,8 @@ Finally, make sure to start the training like so: .. code-block:: python # YES - model = LitModel(**hparams) - trainer = Trainer.from_argparse_args(**hparams, early_stopping_callback=...) + model = LitModel(hparams) + trainer = Trainer.from_argparse_args(hparams, early_stopping_callback=...) # NO # model = LitModel(learning_rate=hparams.learning_rate, ...) @@ -134,11 +134,11 @@ Now pass in the params when you init your model parser = ArgumentParser() parser = LitMNIST.add_model_specific_args(parser) args = parser.parse_args() - model = LitMNIST(**args) + model = LitMNIST(args) Within any LightningModule all the arguments you pass into your `__init__` will be available simply with `self.arg`. However, we won't overwrite any other arguments you have already defined. -We will also add all of those values to the tensorboard hparams tab (unless it's an object which +We will also add all of those values to the TensorBoard hparams tab (unless it's an object which we won't). We also will store those values into checkpoints for you which you can use to init your models. @@ -169,10 +169,10 @@ To recap, add ALL possible trainer flags to the argparser and init the Trainer t parser = Trainer.add_argparse_args(parser) hparams = parser.parse_args() - trainer = Trainer.from_argparse_args(**hparams) + trainer = Trainer.from_argparse_args(hparams) # or if you need to pass in callbacks - trainer = Trainer.from_argparse_args(**hparams, checkpoint_callback=..., callbacks=[...]) + trainer = Trainer.from_argparse_args(hparams, checkpoint_callback=..., callbacks=[...]) Multiple Lightning Modules diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index 63c632e05cf89..04ebc083c5a59 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -36,14 +36,14 @@ hyperparameters of the model. # default: no automatic learning rate finder trainer = Trainer(auto_lr_find=False) -This flag sets your learning_rate which can be accessed via `self.lr|self.learning_rate`. +This flag sets your learning rate which can be accessed via ``self.lr`` or ``self.learning_rate``. .. testcode:: class LitModel(LightningModule): def configure_optimizers(self): - return Adam(self.parameters(), lr=self.lr|self.learning_rate) + return Adam(self.parameters(), lr=(self.lr or self.learning_rate)) # finds learning rate automatically # sets hparams.lr or hparams.learning_rate to that learning rate diff --git a/docs/source/slurm.rst b/docs/source/slurm.rst index b800e356f0a69..ed09e7509b571 100644 --- a/docs/source/slurm.rst +++ b/docs/source/slurm.rst @@ -28,7 +28,7 @@ To train a model using multiple-nodes do the following: # train.py def main(hparams): - model = LightningTemplateModel(**hparams) + model = LightningTemplateModel(hparams) trainer = pl.Trainer( gpus=8, diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index a45c8735c40f3..1d8c1f2e2eaa6 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -88,7 +88,7 @@ You can manually save checkpoints and restore your model from the checkpointed s Checkpoint Loading ------------------ -To load a model along with its weights, biases and module_arguments use following method. +To load a model along with its weights, biases and `module_arguments` use following method. .. code-block:: python diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py index 267f797548dd7..1c5908539cfdc 100644 --- a/pl_examples/__init__.py +++ b/pl_examples/__init__.py @@ -52,7 +52,7 @@ def main(hparams, cluster, results_dict): # build model - model = MyLightningModule(**hparams) + model = MyLightningModule(hparams) # configure trainer trainer = Trainer() diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index b29913c12f0b8..1f4ab1671b340 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -323,7 +323,7 @@ def get_device(self, batch) -> str: def main(args) -> None: - model = DQNLightning(**args) + model = DQNLightning(args) trainer = pl.Trainer( gpus=1, diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index df031dfb47ad0..b1ac9cbe63727 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -182,7 +182,7 @@ def main(hparams): # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ - model = SegModel(**hparams) + model = SegModel(hparams) # ------------------------ # 2 SET LOGGER diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 975cdfbb56635..e2ef334e6ccfc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1458,7 +1458,7 @@ def load_from_checkpoint( ) -> 'LightningModule': r""" Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint - it stores the items in `__init__` in the checkpoint under `module_arguments` + it stores the arguments passed to `__init__` in the checkpoint under `module_arguments` Any arguments specified through \*args and \*\*kwargs will override args stored in `module_arguments`. diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 5fd5f9e59165b..e7abe330dba8b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -289,7 +289,7 @@ def __init__( auto_lr_find: If set to True, will `initially` run a learning rate finder, trying to optimize initial learning for faster convergence. Sets learning - rate in self.lr | self.learning_rate in the lightning module. + rate in self.lr or self.learning_rate in the LightningModule. To use a different key, set a string instead of True with the key name. replace_sampler_ddp: Explicitly enables or disables sampler replacement. From 488d18a09dce93d5665b0e12e30700f25e900652 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 19:30:36 +0200 Subject: [PATCH 076/100] hparams --- pytorch_lightning/core/lightning.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index e2ef334e6ccfc..b0a44a37bc99b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1727,7 +1727,8 @@ def _auto_register_hparams(self): setattr(child, arg, val) # set module_arguments in child - setattr(child, 'module_arguments', [k for k in init_args]) + # skip `hparams` to uncycle with property + setattr(child, 'module_arguments', [k for k in init_args if k != 'hparams']) def get_hyper_params(self, save_mode: True) -> dict: """Copy the actual values from model according the list From d24b78edc855881fa7a52b9cfbb1e474d709e978 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 22 May 2020 21:21:36 +0200 Subject: [PATCH 077/100] overwrite --- pytorch_lightning/core/lightning.py | 4 +--- pytorch_lightning/core/saving.py | 4 ++-- pytorch_lightning/loggers/comet.py | 2 +- pytorch_lightning/overrides/data_parallel.py | 4 ++-- pytorch_lightning/trainer/distrib_data_parallel.py | 4 ++-- pytorch_lightning/trainer/distrib_parts.py | 4 ++-- pytorch_lightning/trainer/training_io.py | 6 +++--- 7 files changed, 13 insertions(+), 15 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index b0a44a37bc99b..69dbbd9293c43 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1563,9 +1563,7 @@ def load_from_checkpoint( checkpoint['module_arguments'] = hparams # override the module_arguments with values that were passed in - for k, v in kwargs.items(): - if k in checkpoint['module_arguments']: - checkpoint['module_arguments'][k] = v + checkpoint['module_arguments'].update(kwargs) model = cls._load_model_state(checkpoint, *args, **kwargs) return model diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index 9f37cc4b65ed2..0c59bde9b12f6 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -156,8 +156,8 @@ def save_hparams_to_yaml(config_yaml, hparams: Union[dict, Namespace]) -> None: def convert(val: str) -> Union[int, float, bool, str]: try: return ast.literal_eval(val) - except (ValueError, SyntaxError) as e: - log.debug(e) + except (ValueError, SyntaxError) as err: + log.debug(err) return val diff --git a/pytorch_lightning/loggers/comet.py b/pytorch_lightning/loggers/comet.py index 1d24676d8018a..fc81c6b5b6dfb 100644 --- a/pytorch_lightning/loggers/comet.py +++ b/pytorch_lightning/loggers/comet.py @@ -125,7 +125,7 @@ def __init__(self, if experiment_name: try: self.name = experiment_name - except TypeError as e: + except TypeError: log.exception("Failed to set experiment name for comet.ml logger") self._kwargs = kwargs diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py index b2f7816ec0ac2..f2a23b188e068 100644 --- a/pytorch_lightning/overrides/data_parallel.py +++ b/pytorch_lightning/overrides/data_parallel.py @@ -177,9 +177,9 @@ def _worker(i, module, input, kwargs, device=None): with lock: results[i] = output - except Exception as e: + except Exception as ex: with lock: - results[i] = e + results[i] = ex # TODO: fix hack (maybe not a hack) # make sure each module knows what training state it's in... diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 3844896ba2e90..28b6c7dbbade1 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -277,12 +277,12 @@ def configure_slurm_ddp(self, num_gpu_nodes): should_fake = int(os.environ['FAKE_SLURM_MANAGING_TASKS']) if should_fake: self.is_slurm_managing_tasks = True - except Exception as e: + except Exception: pass # notify user the that slurm is managing tasks if self.is_slurm_managing_tasks: - log.info('Multi-processing is handled by Slurm.') + log.info('Multi-processing is handled by SLURM.') def determine_ddp_node_rank(self): if self.is_slurm_managing_tasks: diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index dabe72f27d823..b20ac09918085 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -758,9 +758,9 @@ def retry_jittered_backoff(f, num_retries=5): for i in range(num_retries): try: return f() - except RuntimeError as e: + except RuntimeError as err: if i == num_retries - 1: - raise e + raise err else: continue time.sleep(sleep) diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 1daa426463100..c3b2bdf73d2b2 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -203,7 +203,7 @@ def register_slurm_signal_handlers(self): job_name = os.environ['SLURM_JOB_NAME'] if job_name != 'bash': on_slurm = True - except Exception as e: + except Exception: pass if on_slurm: @@ -459,11 +459,11 @@ def hpc_save(self, folderpath: str, logger): # TODO: fix for anything with multiprocess DP, DDP, DDP2 try: self._atomic_save(checkpoint, filepath) - except AttributeError as e: + except AttributeError as err: if 'module_arguments' in checkpoint: del checkpoint['module_arguments'] rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' - f' An attribute is not picklable {e}') + f' An attribute is not picklable {err}') self._atomic_save(checkpoint, filepath) return filepath From 0d7ee371a4635aff13445c484b684a82a35ee2c8 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 00:43:03 +0200 Subject: [PATCH 078/100] transform --- pytorch_lightning/core/lightning.py | 66 +++++++----------------- pytorch_lightning/core/saving.py | 15 ------ pytorch_lightning/trainer/training_io.py | 22 ++++++-- 3 files changed, 39 insertions(+), 64 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 69dbbd9293c43..844830bf896ef 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1705,67 +1705,41 @@ def _auto_register_hparams(self): """ Removes the need to pass in hparams. Instead, we register every argument in init to the module with some caveats: - 1. we don't overwrite the property if it already exists - 2. we also store a module_arguments property for model loading and saving """ frame = inspect.currentframe() - frame_args = _collect_init_args(frame, {}) - init_args = {k: v for k, v in frame_args.items() - if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} + frame_args = _collect_init_args(frame, []) child = _get_latest_child(frame) - # we'll save hparams automatically (renamed to module_arguments) - for arg, val in init_args.items(): - # don't overwrite something already set - if hasattr(child, arg): - log.warning(f'init argument `{arg}` was skipped while auto `hparams` registering,' - ' because ut match already existing attribute of this class.') - continue - setattr(child, arg, val) - # set module_arguments in child - # skip `hparams` to uncycle with property - setattr(child, 'module_arguments', [k for k in init_args if k != 'hparams']) - - def get_hyper_params(self, save_mode: True) -> dict: - """Copy the actual values from model according the list - - Args: - weights_only: saving model weights only - save_mode: drop all init argument which are not primitives - - Return: - structured dictionary - """ - module_args = {} - for k in self.module_arguments: - val = getattr(self, k) - if save_mode and not is_picklable(val): - continue - module_args[k] = val - return module_args + child.module_self_arguments = frame_args[-1] + child.module_parents_arguments = {} + for args in frame_args[-1]: + child.module_parents_arguments.update(args) @property - def hparams(self) -> dict: - """Imitate the past `hparams` attribute.""" - return self.get_hyper_params(save_mode=True) + def module_arguments(self) -> dict: + """Aggregate this module and ll parents arguments.""" + args = dict(self.module_self_arguments) + args.update(self.module_parents_arguments) + return args -def _collect_init_args(frame, args: dict) -> dict: +def _collect_init_args(frame, path_args: list) -> list: """Recursive search for all children.""" - if any(k in frame.f_locals for k in ['self', '__class__']): - local_args = frame.f_locals # .get('frame_args') + if 'self' in frame.f_locals: + local_args = {k: v for k, v in frame.f_locals.items() + if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} local_args.update(local_args.get('kwargs', {})) - # back compatible hparsm as single argument - hparams = local_args.get('hparams') - if hparams: + if 'hparams' in local_args: + # back compatible hparams as single argument + hparams = local_args.get('hparams') local_args.update(vars(hparams) if isinstance(hparams, Namespace) else hparams) # recursive update - args.update(local_args) - return _collect_init_args(frame.f_back, args) + path_args.append(local_args) + return _collect_init_args(frame.f_back, path_args) else: - return args + return path_args def _get_latest_child(frame, child: object = None) -> object: diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index 0c59bde9b12f6..d8bca1db1338f 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -1,7 +1,6 @@ import ast import csv import os -import pickle import yaml from argparse import Namespace @@ -159,17 +158,3 @@ def convert(val: str) -> Union[int, float, bool, str]: except (ValueError, SyntaxError) as err: log.debug(err) return val - - -def is_picklable(obj) -> bool: - """Try if the object is serializable - - >>> is_picklable(5) - True - """ - try: - pickle.dumps(obj) - except Exception: - return False - else: - return True diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index c3b2bdf73d2b2..f8ea530c69fba 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -84,6 +84,7 @@ """ import os +import pickle import re import signal from abc import ABC @@ -95,6 +96,7 @@ from pytorch_lightning import _logger as log from pytorch_lightning.core.lightning import LightningModule +from pytorch_lightning.core.saving import is_picklable from pytorch_lightning.loggers import LightningLoggerBase from pytorch_lightning.overrides.data_parallel import ( LightningDistributedDataParallel, @@ -307,12 +309,11 @@ def restore(self, checkpoint_path: str, on_gpu: bool): # load training state (affects trainer only) self.restore_training_state(checkpoint) - def dump_checkpoint(self, weights_only: bool = False, save_mode: bool = True) -> dict: + def dump_checkpoint(self, weights_only: bool = False) -> dict: """Creating model checkpoint. Args: weights_only: saving model weights only - save_mode: drop all init argument which are not primitives Return: structured dictionary @@ -355,7 +356,8 @@ def dump_checkpoint(self, weights_only: bool = False, save_mode: bool = True) -> if hasattr(model, 'module_arguments') and model.module_arguments: # add arguments to the checkpoint - checkpoint['module_arguments'] = model.get_hyper_params(save_mode=save_mode) + checkpoint['module_arguments'] = {k: v for k, v in model.module_arguments.items() + if is_picklable(v)} # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -508,3 +510,17 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'): ckpt_vs.append(int(name)) return max(ckpt_vs) + + +def is_picklable(obj) -> bool: + """Try if the object is serializable + + >>> is_picklable(5) + True + """ + try: + pickle.dumps(obj) + except Exception: + return False + else: + return True From fb7898a5785eb44842fbcad60a017c12b2abd7d8 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 01:07:28 +0200 Subject: [PATCH 079/100] transform --- pytorch_lightning/core/lightning.py | 22 +++++++++++-------- pytorch_lightning/trainer/trainer.py | 2 +- pytorch_lightning/trainer/training_io.py | 1 - tests/base/model_optimizers.py | 28 +++++++++++++----------- tests/base/model_template.py | 14 ++++++------ tests/base/model_utilities.py | 4 ++-- tests/models/test_hparams.py | 14 ++++++------ 7 files changed, 45 insertions(+), 40 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 844830bf896ef..02b824db6fad4 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -17,8 +17,7 @@ from pytorch_lightning.core.grads import GradInformation from pytorch_lightning.core.hooks import ModelHooks from pytorch_lightning.core.memory import ModelSummary -from pytorch_lightning.core.saving import ModelIO, load_hparams_from_tags_csv, load_hparams_from_yaml, update_hparams, \ - is_picklable +from pytorch_lightning.core.saving import ModelIO, load_hparams_from_tags_csv, load_hparams_from_yaml from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -75,7 +74,6 @@ def __init__(self, *args, **kwargs): #: device reference self._device = torch.device('cpu') - self.module_arguments = [] # register all params passed into the child module in __init__ self._auto_register_hparams() @@ -1712,18 +1710,24 @@ def _auto_register_hparams(self): child = _get_latest_child(frame) # set module_arguments in child - child.module_self_arguments = frame_args[-1] - child.module_parents_arguments = {} - for args in frame_args[-1]: + if not hasattr(child, 'module_self_arguments') or not child.module_self_arguments: + child.module_self_arguments = frame_args[-1] + if not hasattr(child, 'module_parents_arguments') or not child.module_parents_arguments: + child.module_parents_arguments = {} + for args in frame_args[:-1]: child.module_parents_arguments.update(args) @property def module_arguments(self) -> dict: - """Aggregate this module and ll parents arguments.""" - args = dict(self.module_self_arguments) - args.update(self.module_parents_arguments) + """Aggregate this module and all parents arguments.""" + args = dict(self.module_parents_arguments) + args.update(self.module_self_arguments) return args + @property + def module_hparams(self) -> Namespace: + return Namespace(**self.module_arguments) + def _collect_init_args(frame, path_args: list) -> list: """Recursive search for all children.""" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index e7abe330dba8b..90e303206b0c2 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -953,7 +953,7 @@ def run_pretrain_routine(self, model: LightningModule): if self.logger is not None: # save exp to get started if hasattr(ref_model, "module_arguments"): - self.logger.log_hyperparams({k: getattr(ref_model, k) for k in ref_model.module_arguments}) + self.logger.log_hyperparams(ref_model.module_arguments) self.logger.save() diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index f8ea530c69fba..9d343233d8b3c 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -96,7 +96,6 @@ from pytorch_lightning import _logger as log from pytorch_lightning.core.lightning import LightningModule -from pytorch_lightning.core.saving import is_picklable from pytorch_lightning.loggers import LightningLoggerBase from pytorch_lightning.overrides.data_parallel import ( LightningDistributedDataParallel, diff --git a/tests/base/model_optimizers.py b/tests/base/model_optimizers.py index aebe0550a67ac..d6e3e0c98fa8d 100644 --- a/tests/base/model_optimizers.py +++ b/tests/base/model_optimizers.py @@ -1,15 +1,17 @@ -from abc import ABC +from abc import ABC, abstractmethod +from argparse import Namespace from torch import optim class ConfigureOptimizersPool(ABC): + def configure_optimizers(self): """ return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) return optimizer def configure_optimizers__empty(self): @@ -20,7 +22,7 @@ def configure_optimizers__lbfgs(self): return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.LBFGS(self.parameters(), lr=self.learning_rate) + optimizer = optim.LBFGS(self.parameters(), lr=self.module_hparams.learning_rate) return optimizer def configure_optimizers__multiple_optimizers(self): @@ -29,26 +31,26 @@ def configure_optimizers__multiple_optimizers(self): :return: list of optimizers """ # try no scheduler for this model (testing purposes) - optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) return optimizer1, optimizer2 def configure_optimizers__single_scheduler(self): - optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1) return [optimizer], [lr_scheduler] def configure_optimizers__multiple_schedulers(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 1, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2] def configure_optimizers__mixed_scheduling(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 4, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) @@ -56,14 +58,14 @@ def configure_optimizers__mixed_scheduling(self): [{'scheduler': lr_scheduler1, 'interval': 'step'}, lr_scheduler2] def configure_optimizers__reduce_lr_on_plateau(self): - optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer) return [optimizer], [lr_scheduler] def configure_optimizers__param_groups(self): param_groups = [ - {'params': list(self.parameters())[:2], 'lr': self.learning_rate * 0.1}, - {'params': list(self.parameters())[2:], 'lr': self.learning_rate} + {'params': list(self.parameters())[:2], 'lr': self.module_hparams.learning_rate * 0.1}, + {'params': list(self.parameters())[2:], 'lr': self.module_hparams.learning_rate} ] optimizer = optim.Adam(param_groups) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 927158d5e7919..85663758962d1 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -66,15 +66,15 @@ def __build_model(self): :return: """ self.c_d1 = nn.Linear( - in_features=self.in_features, - out_features=self.hidden_dim + in_features=self.module_hparams.in_features, + out_features=self.module_hparams.hidden_dim ) - self.c_d1_bn = nn.BatchNorm1d(self.hidden_dim) - self.c_d1_drop = nn.Dropout(self.drop_prob) + self.c_d1_bn = nn.BatchNorm1d(self.module_hparams.hidden_dim) + self.c_d1_drop = nn.Dropout(self.module_hparams.drop_prob) self.c_d2 = nn.Linear( - in_features=self.hidden_dim, - out_features=self.out_features + in_features=self.module_hparams.hidden_dim, + out_features=self.module_hparams.out_features ) def forward(self, x): @@ -93,7 +93,7 @@ def loss(self, labels, logits): return nll def prepare_data(self): - _ = TrialMNIST(root=self.data_root, train=True, download=True) + _ = TrialMNIST(root=self.module_hparams.data_root, train=True, download=True) @staticmethod def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0) -> dict: diff --git a/tests/base/model_utilities.py b/tests/base/model_utilities.py index ce34b39b162f8..a30fda3e1ef2d 100644 --- a/tests/base/model_utilities.py +++ b/tests/base/model_utilities.py @@ -7,11 +7,11 @@ class ModelTemplateData: hparams: ... def dataloader(self, train): - dataset = TrialMNIST(root=self.data_root, train=train, download=True) + dataset = TrialMNIST(root=self.module_hparams.data_root, train=train, download=True) loader = DataLoader( dataset=dataset, - batch_size=self.batch_size, + batch_size=self.module_hparams.batch_size, # test and valid shall not be shuffled shuffle=train, ) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index b9a9b6ef91d91..8b8e9aa299b2d 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -44,18 +44,18 @@ def __init__(self, *args, skip_arg=450, **kwargs): def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() - assert model.batch_size == 32 + assert model.module_hparams.batch_size == 32 model = cls(batch_size=179) - assert model.batch_size == 179 + assert model.module_hparams.batch_size == 179 if isinstance(model, SubClassEvalModel): - assert model.subclass_arg == 1200 + assert model.module_hparams.subclass_arg == 1200 if isinstance(model, (HparamsNamespaceEvalModel, HparamsDictEvalModel)): - assert model.hparam_arg == 123 + assert model.module_hparams.hparam_arg == 123 if isinstance(model, PersistClassEvalModel): - assert model.skip_arg == 15 + assert model.module_hparams.skip_arg == 15 # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5, default_root_dir=tmpdir) @@ -69,8 +69,8 @@ def test_auto_hparams(tmpdir, cls): # verify that model loads correctly model = cls.load_from_checkpoint(raw_checkpoint_path) - assert model.batch_size == 179 + assert model.module_hparams.batch_size == 179 # verify that we can overwrite whatever we want model = cls.load_from_checkpoint(raw_checkpoint_path, batch_size=99) - assert model.batch_size == 99 + assert model.module_hparams.batch_size == 99 From 3d8a3db8f0716da6d3e38256a2945422a1dd9734 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 01:16:54 +0200 Subject: [PATCH 080/100] transform --- pytorch_lightning/core/lightning.py | 5 +++-- tests/models/test_hparams.py | 9 +-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 02b824db6fad4..ff44ab21da125 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1732,9 +1732,10 @@ def module_hparams(self) -> Namespace: def _collect_init_args(frame, path_args: list) -> list: """Recursive search for all children.""" if 'self' in frame.f_locals: - local_args = {k: v for k, v in frame.f_locals.items() - if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} + local_args = dict(frame.f_locals) local_args.update(local_args.get('kwargs', {})) + local_args = {k: v for k, v in local_args.items() + if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} if 'hparams' in local_args: # back compatible hparams as single argument hparams = local_args.get('hparams') diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 8b8e9aa299b2d..0dd3112722a45 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -29,18 +29,11 @@ class SubSubClassEvalModel(SubClassEvalModel): pass -class PersistClassEvalModel(SubClassEvalModel): - def __init__(self, *args, skip_arg=450, **kwargs): - self.skip_arg = 15 - super().__init__() - - @pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModel, SubSubClassEvalModel, HparamsNamespaceEvalModel, - HparamsDictEvalModel, - PersistClassEvalModel]) + HparamsDictEvalModel]) def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() From db6f9436f765d31e473a852e1da807a85ae2f1f5 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 13:56:46 +0200 Subject: [PATCH 081/100] transform --- pytorch_lightning/core/lightning.py | 16 +++++++--------- tests/models/test_hparams.py | 3 --- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ff44ab21da125..9b607ae5cf189 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1710,12 +1710,10 @@ def _auto_register_hparams(self): child = _get_latest_child(frame) # set module_arguments in child - if not hasattr(child, 'module_self_arguments') or not child.module_self_arguments: - child.module_self_arguments = frame_args[-1] - if not hasattr(child, 'module_parents_arguments') or not child.module_parents_arguments: - child.module_parents_arguments = {} + child._module_self_arguments = frame_args[-1] + child._module_parents_arguments = {} for args in frame_args[:-1]: - child.module_parents_arguments.update(args) + child._module_parents_arguments.update(args) @property def module_arguments(self) -> dict: @@ -1736,10 +1734,10 @@ def _collect_init_args(frame, path_args: list) -> list: local_args.update(local_args.get('kwargs', {})) local_args = {k: v for k, v in local_args.items() if k not in ('args', 'kwargs', 'self', '__class__', 'frame', 'frame_args')} - if 'hparams' in local_args: - # back compatible hparams as single argument - hparams = local_args.get('hparams') - local_args.update(vars(hparams) if isinstance(hparams, Namespace) else hparams) + # if 'hparams' in local_args: + # # back compatible hparams as single argument + # hparams = local_args.get('hparams') + # local_args.update(vars(hparams) if isinstance(hparams, Namespace) else hparams) # recursive update path_args.append(local_args) return _collect_init_args(frame.f_back, path_args) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 0dd3112722a45..89b3789d8dde0 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -47,9 +47,6 @@ def test_auto_hparams(tmpdir, cls): if isinstance(model, (HparamsNamespaceEvalModel, HparamsDictEvalModel)): assert model.module_hparams.hparam_arg == 123 - if isinstance(model, PersistClassEvalModel): - assert model.module_hparams.skip_arg == 15 - # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5, default_root_dir=tmpdir) trainer.fit(model) From 66717dacc4b82552460dd17c23f90a2ad32ad9bb Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 14:51:08 +0200 Subject: [PATCH 082/100] cleaning --- docs/source/hyperparameters.rst | 11 ++++-- docs/source/lr_finder.rst | 12 ++++--- docs/source/optimizers.rst | 2 +- docs/source/weights_loading.rst | 1 + .../computer_vision_fine_tuning.py | 1 + .../generative_adversarial_net.py | 3 +- pl_examples/domain_templates/imagenet.py | 1 + .../domain_templates/reinforce_learn_Qnet.py | 21 ++++++----- .../domain_templates/semantic_segmentation.py | 15 +++++--- pl_examples/models/lightning_template.py | 17 ++++----- pytorch_lightning/core/lightning.py | 27 +++++++------- pytorch_lightning/core/saving.py | 5 ++- pytorch_lightning/loggers/comet.py | 2 +- pytorch_lightning/overrides/data_parallel.py | 4 +-- .../trainer/distrib_data_parallel.py | 4 +-- pytorch_lightning/trainer/distrib_parts.py | 4 +-- pytorch_lightning/trainer/trainer.py | 3 +- pytorch_lightning/trainer/training_io.py | 2 +- tests/base/model_optimizers.py | 27 +++++++------- tests/base/model_template.py | 35 ++++++++++--------- tests/base/model_utilities.py | 4 +-- tests/models/test_restore.py | 2 -- tests/trainer/test_dataloaders.py | 2 +- 23 files changed, 114 insertions(+), 91 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 7ac7ceacff3ed..5e5ea3e217325 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -105,8 +105,9 @@ modify the network and read those values in the LightningModule class LitMNIST(LightningModule): - def __init__(self, layer_1_dim): + def __init__(self, layer_1_dim, layer_2_dim, learning_rate, batch_size): super().__init__() + self._auto_reguster_arguments() self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.layer_2_dim) @@ -127,6 +128,8 @@ modify the network and read those values in the LightningModule parser.add_argument('--learning_rate', type=float, default=0.002) return parser + model = LitMNIST(10, 20, 0.0001, 5) + Now pass in the params when you init your model .. code-block:: python @@ -137,7 +140,7 @@ Now pass in the params when you init your model model = LitMNIST(args) Within any LightningModule all the arguments you pass into your `__init__` will be available -simply with `self.arg`. However, we won't overwrite any other arguments you have already defined. +simply with `self._module_arguments`. However, we won't overwrite any other arguments you have already defined. We will also add all of those values to the TensorBoard hparams tab (unless it's an object which we won't). We also will store those values into checkpoints for you which you can use to init your models. @@ -148,6 +151,8 @@ models. def __init__(self, layer_1_dim, some_other_param): super().__init__() + self.layer_1_dim = layer_1_dim + self.some_other_param = some_other_param self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) @@ -158,6 +163,8 @@ models. self.some_other_param = 12 # but you can override it as normal + model = LitMNIST(10, 20) + Trainer args ^^^^^^^^^^^^ diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index 04ebc083c5a59..926a52c5cf1b9 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -42,8 +42,11 @@ This flag sets your learning rate which can be accessed via ``self.lr`` or ``sel class LitModel(LightningModule): + def __init__(self, hparams): + self._auto_register_arguments() + def configure_optimizers(self): - return Adam(self.parameters(), lr=(self.lr or self.learning_rate)) + return Adam(self.parameters(), lr=(self.hparams.lr or self.hparams.learning_rate)) # finds learning rate automatically # sets hparams.lr or hparams.learning_rate to that learning rate @@ -53,7 +56,7 @@ To use an arbitrary value set it in the parameter. .. testcode:: - # to set to your own `self.my_value` + # to set to your own hparams.my_value trainer = Trainer(auto_lr_find='my_value') Under the hood, when you call fit, this is what happens. @@ -75,7 +78,7 @@ of this would look like .. code-block:: python - model = MyModelClass() + model = MyModelClass(hparams) trainer = Trainer() # Run learning rate finder @@ -92,8 +95,7 @@ of this would look like new_lr = lr_finder.suggestion() # update hparams of the model - model.lr = new_lr - model.learning_rate = new_lr + model.hparams.lr = new_lr # Fit model trainer.fit(model) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 7f5a56f1a5994..8f8715a09e7b3 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -112,7 +112,7 @@ Here we add a learning-rate warm up if self.trainer.global_step < 500: lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) for pg in optimizer.param_groups: - pg['lr'] = lr_scale * self.learning_rate + pg['lr'] = lr_scale * self.hparams.learning_rate # update params optimizer.step() diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 1d8c1f2e2eaa6..01ebe47f68295 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -108,6 +108,7 @@ But if you don't want to use the values saved in the checkpoint, pass in your ow def __init__(self, in_dim, out_dim): super().__init__() + self._auto_register_arguments() self.l1 = nn.Linear(self.in_dim, self.out_dim) you can restore the model like this diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 8b6026b31bb85..5370feb72c4c5 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -157,6 +157,7 @@ def __init__(self, lr_scheduler_gamma=1e-1, num_workers=6) -> None: super().__init__() + self._auto_register_arguments() self.dl_path = dl_path self.__build_model() diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 59f3fdf5bd3b5..c009f689bb4d1 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -74,10 +74,11 @@ class GAN(LightningModule): def __init__(self, latent_dim=100, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): super().__init__() + self._auto_register_arguments() # networks mnist_shape = (1, 28, 28) - self.generator = Generator(latent_dim=latent_dim, img_shape=mnist_shape) + self.generator = Generator(latent_dim=self.latent_dim, img_shape=mnist_shape) self.discriminator = Discriminator(img_shape=mnist_shape) # cache for generated images diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 231e0a8834570..68a5bc0f0627d 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -34,6 +34,7 @@ def __init__(self, arch, pretrained, lr, momentum, weight_decay, data_path, batc TODO: add docstring here """ super().__init__() + self._auto_register_arguments() self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 1f4ab1671b340..5ab9ea8433dcd 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -192,15 +192,17 @@ class DQNLightning(pl.LightningModule): def __init__(self, replay_size, - warm_start_steps, - gamma, eps_start, - eps_end, - eps_last_frame, + warm_start_steps: int, + gamma: float, + eps_start: int, + eps_end: int, + eps_last_frame: int, sync_rate, - lr, + lr: float, episode_length, batch_size) -> None: super().__init__() + self._auto_register_arguments() self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] @@ -307,10 +309,11 @@ def configure_optimizers(self) -> List[Optimizer]: def __dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" dataset = RLDataset(self.buffer, self.episode_length) - dataloader = DataLoader(dataset=dataset, - batch_size=self.batch_size, - sampler=None - ) + dataloader = DataLoader( + dataset=dataset, + batch_size=self.batch_size, + sampler=None, + ) return dataloader def train_dataloader(self) -> DataLoader: diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index b1ac9cbe63727..ac6ecc409d4ac 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -1,5 +1,5 @@ import os -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace import numpy as np import torch @@ -128,8 +128,15 @@ class SegModel(pl.LightningModule): Adam optimizer is used along with Cosine Annealing learning rate scheduler. """ - def __init__(self, data_path, batch_size, lr, num_layers, features_start, bilinear): + def __init__(self, + data_path: str, + batch_size: int, + lr: float, + num_layers: int, + features_start: int, + bilinear: bool): super().__init__() + self._auto_register_arguments() self.net = UNet(num_classes=19, num_layers=self.num_layers, features_start=self.features_start, bilinear=self.bilinear) @@ -178,11 +185,11 @@ def val_dataloader(self): return DataLoader(self.validset, batch_size=self.batch_size, shuffle=False) -def main(hparams): +def main(hparams: Namespace): # ------------------------ # 1 INIT LIGHTNING MODEL # ------------------------ - model = SegModel(hparams) + model = SegModel(**vars(hparams)) # ------------------------ # 2 SET LOGGER diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index e5023c20443b1..25f6e9a109fc4 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -37,20 +37,21 @@ class LightningTemplateModel(LightningModule): """ def __init__(self, - drop_prob=0.2, - batch_size=2, - in_features=28 * 28, - learning_rate=0.001 * 8, - optimizer_name='adam', - data_root='./datasets', - out_features=10, - hidden_dim=1000, + drop_prob: float = 0.2, + batch_size: int = 2, + in_features: int = 28 * 28, + learning_rate: float = 0.001 * 8, + optimizer_name: str = 'adam', + data_root: str = './datasets', + out_features: int = 10, + hidden_dim: int = 1000, ): """ Pass in hyperparameters as a `argparse.Namespace` or a `dict` to the model. """ # init superclass super().__init__() + self._auto_register_arguments() self.c_d1 = nn.Linear(in_features=self.in_features, out_features=self.hidden_dim) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 9b607ae5cf189..073ad6c656d4a 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -75,7 +75,7 @@ def __init__(self, *args, **kwargs): self._device = torch.device('cpu') # register all params passed into the child module in __init__ - self._auto_register_hparams() + self._auto_collect_arguments() @property def on_gpu(self): @@ -1699,11 +1699,18 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]: " and this method will be removed in v1.0.0", DeprecationWarning) return self.get_progress_bar_dict() - def _auto_register_hparams(self): - """ - Removes the need to pass in hparams. Instead, we register every argument in init - to the module with some caveats: - """ + def _auto_register_arguments(self, include_parents=False): + """Automatically register all init arguments to `self`.""" + if not hasattr(self, '_module_self_arguments'): + self._auto_collect_arguments() + + args = dict(self._module_parents_arguments) if include_parents else {} + args.update(self._module_self_arguments) + for k, v in ((k, v ) for k, v in args.items() if not hasattr(self, k)): + setattr(self, 'k', v) + + def _auto_collect_arguments(self): + """Collect all arguments module arguments.""" frame = inspect.currentframe() frame_args = _collect_init_args(frame, []) @@ -1718,14 +1725,10 @@ def _auto_register_hparams(self): @property def module_arguments(self) -> dict: """Aggregate this module and all parents arguments.""" - args = dict(self.module_parents_arguments) - args.update(self.module_self_arguments) + args = dict(self._module_parents_arguments) + args.update(self._module_self_arguments) return args - @property - def module_hparams(self) -> Namespace: - return Namespace(**self.module_arguments) - def _collect_init_args(frame, path_args: list) -> list: """Recursive search for all children.""" diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index d8bca1db1338f..adf782e6d4f21 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -1,7 +1,6 @@ import ast import csv import os - import yaml from argparse import Namespace from typing import Union, Dict, Any @@ -155,6 +154,6 @@ def save_hparams_to_yaml(config_yaml, hparams: Union[dict, Namespace]) -> None: def convert(val: str) -> Union[int, float, bool, str]: try: return ast.literal_eval(val) - except (ValueError, SyntaxError) as err: - log.debug(err) + except (ValueError, SyntaxError) as e: + log.debug(e) return val diff --git a/pytorch_lightning/loggers/comet.py b/pytorch_lightning/loggers/comet.py index fc81c6b5b6dfb..1d24676d8018a 100644 --- a/pytorch_lightning/loggers/comet.py +++ b/pytorch_lightning/loggers/comet.py @@ -125,7 +125,7 @@ def __init__(self, if experiment_name: try: self.name = experiment_name - except TypeError: + except TypeError as e: log.exception("Failed to set experiment name for comet.ml logger") self._kwargs = kwargs diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py index f2a23b188e068..b2f7816ec0ac2 100644 --- a/pytorch_lightning/overrides/data_parallel.py +++ b/pytorch_lightning/overrides/data_parallel.py @@ -177,9 +177,9 @@ def _worker(i, module, input, kwargs, device=None): with lock: results[i] = output - except Exception as ex: + except Exception as e: with lock: - results[i] = ex + results[i] = e # TODO: fix hack (maybe not a hack) # make sure each module knows what training state it's in... diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 28b6c7dbbade1..3844896ba2e90 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -277,12 +277,12 @@ def configure_slurm_ddp(self, num_gpu_nodes): should_fake = int(os.environ['FAKE_SLURM_MANAGING_TASKS']) if should_fake: self.is_slurm_managing_tasks = True - except Exception: + except Exception as e: pass # notify user the that slurm is managing tasks if self.is_slurm_managing_tasks: - log.info('Multi-processing is handled by SLURM.') + log.info('Multi-processing is handled by Slurm.') def determine_ddp_node_rank(self): if self.is_slurm_managing_tasks: diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index b20ac09918085..dabe72f27d823 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -758,9 +758,9 @@ def retry_jittered_backoff(f, num_retries=5): for i in range(num_retries): try: return f() - except RuntimeError as err: + except RuntimeError as e: if i == num_retries - 1: - raise err + raise e else: continue time.sleep(sleep) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 90e303206b0c2..3f51a168add75 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -952,8 +952,7 @@ def run_pretrain_routine(self, model: LightningModule): # log hyper-parameters if self.logger is not None: # save exp to get started - if hasattr(ref_model, "module_arguments"): - self.logger.log_hyperparams(ref_model.module_arguments) + self.logger.log_hyperparams(ref_model.module_arguments) self.logger.save() diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 9d343233d8b3c..e1baca64f1132 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -204,7 +204,7 @@ def register_slurm_signal_handlers(self): job_name = os.environ['SLURM_JOB_NAME'] if job_name != 'bash': on_slurm = True - except Exception: + except Exception as e: pass if on_slurm: diff --git a/tests/base/model_optimizers.py b/tests/base/model_optimizers.py index d6e3e0c98fa8d..6386d925bdb13 100644 --- a/tests/base/model_optimizers.py +++ b/tests/base/model_optimizers.py @@ -1,5 +1,4 @@ -from abc import ABC, abstractmethod -from argparse import Namespace +from abc import ABC from torch import optim @@ -11,7 +10,7 @@ def configure_optimizers(self): return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) return optimizer def configure_optimizers__empty(self): @@ -22,7 +21,7 @@ def configure_optimizers__lbfgs(self): return whatever optimizers we want here. :return: list of optimizers """ - optimizer = optim.LBFGS(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer = optim.LBFGS(self.parameters(), lr=self.learning_rate) return optimizer def configure_optimizers__multiple_optimizers(self): @@ -31,26 +30,26 @@ def configure_optimizers__multiple_optimizers(self): :return: list of optimizers """ # try no scheduler for this model (testing purposes) - optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) return optimizer1, optimizer2 def configure_optimizers__single_scheduler(self): - optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1) return [optimizer], [lr_scheduler] def configure_optimizers__multiple_schedulers(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 1, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2] def configure_optimizers__mixed_scheduling(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) - optimizer2 = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) + optimizer2 = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 4, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) @@ -58,14 +57,14 @@ def configure_optimizers__mixed_scheduling(self): [{'scheduler': lr_scheduler1, 'interval': 'step'}, lr_scheduler2] def configure_optimizers__reduce_lr_on_plateau(self): - optimizer = optim.Adam(self.parameters(), lr=self.module_hparams.learning_rate) + optimizer = optim.Adam(self.parameters(), lr=self.learning_rate) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer) return [optimizer], [lr_scheduler] def configure_optimizers__param_groups(self): param_groups = [ - {'params': list(self.parameters())[:2], 'lr': self.module_hparams.learning_rate * 0.1}, - {'params': list(self.parameters())[2:], 'lr': self.module_hparams.learning_rate} + {'params': list(self.parameters())[:2], 'lr': self.learning_rate * 0.1}, + {'params': list(self.parameters())[2:], 'lr': self.learning_rate} ] optimizer = optim.Adam(param_groups) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 85663758962d1..a117f63a0d616 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -40,19 +40,20 @@ class EvalModelTemplate( def __init__(self, *args, - drop_prob=0.2, - batch_size=32, - in_features=28 * 28, - learning_rate=0.001 * 8, - optimizer_name='adam', - data_root=PATH_DATASETS, - out_features=10, - hidden_dim=1000, - b1=0.5, - b2=0.999, + drop_prob: float = 0.2, + batch_size: int = 32, + in_features: int = 28 * 28, + learning_rate: float = 0.001 * 8, + optimizer_name: str = 'adam', + data_root: str = PATH_DATASETS, + out_features: int = 10, + hidden_dim: int = 1000, + b1: float = 0.5, + b2: float = 0.999, **kwargs) -> object: # init superclass super().__init__() + self._auto_register_arguments() # if you specify an example input, the summary will show input/output for each layer self.example_input_array = torch.rand(5, 28 * 28) @@ -66,15 +67,15 @@ def __build_model(self): :return: """ self.c_d1 = nn.Linear( - in_features=self.module_hparams.in_features, - out_features=self.module_hparams.hidden_dim + in_features=self.in_features, + out_features=self.hidden_dim ) - self.c_d1_bn = nn.BatchNorm1d(self.module_hparams.hidden_dim) - self.c_d1_drop = nn.Dropout(self.module_hparams.drop_prob) + self.c_d1_bn = nn.BatchNorm1d(self.hidden_dim) + self.c_d1_drop = nn.Dropout(self.drop_prob) self.c_d2 = nn.Linear( - in_features=self.module_hparams.hidden_dim, - out_features=self.module_hparams.out_features + in_features=self.hidden_dim, + out_features=self.out_features ) def forward(self, x): @@ -93,7 +94,7 @@ def loss(self, labels, logits): return nll def prepare_data(self): - _ = TrialMNIST(root=self.module_hparams.data_root, train=True, download=True) + _ = TrialMNIST(root=self.data_root, train=True, download=True) @staticmethod def get_default_hparams(continue_training: bool = False, hpc_exp_number: int = 0) -> dict: diff --git a/tests/base/model_utilities.py b/tests/base/model_utilities.py index a30fda3e1ef2d..ce34b39b162f8 100644 --- a/tests/base/model_utilities.py +++ b/tests/base/model_utilities.py @@ -7,11 +7,11 @@ class ModelTemplateData: hparams: ... def dataloader(self, train): - dataset = TrialMNIST(root=self.module_hparams.data_root, train=train, download=True) + dataset = TrialMNIST(root=self.data_root, train=train, download=True) loader = DataLoader( dataset=dataset, - batch_size=self.module_hparams.batch_size, + batch_size=self.batch_size, # test and valid shall not be shuffled shuffle=train, ) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 970b2f7d78209..870ba61889e18 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -1,7 +1,6 @@ import glob import logging as log import os -from argparse import Namespace import pytest import torch @@ -9,7 +8,6 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import EvalModelTemplate diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index e310648c115f9..7c0cf0bf95b79 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -430,7 +430,7 @@ def train_dataloader(self): hparams = EvalModelTemplate.get_default_hparams() hparams['batch_size'] = batch_size - model = CurrentTestModel(hparams) + model = CurrentTestModel(**hparams) trainer = Trainer( max_epochs=1, From 088c3bd71d90614783438f402f59908a52aceb35 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 19:59:07 +0200 Subject: [PATCH 083/100] cleaning --- docs/source/lr_finder.rst | 2 +- docs/source/weights_loading.rst | 2 +- .../computer_vision_fine_tuning.py | 2 +- .../generative_adversarial_net.py | 2 +- pl_examples/domain_templates/imagenet.py | 2 +- .../domain_templates/reinforce_learn_Qnet.py | 2 +- .../domain_templates/semantic_segmentation.py | 2 +- pl_examples/models/lightning_template.py | 2 +- pytorch_lightning/core/lightning.py | 18 +++++------ tests/base/model_template.py | 11 ++++++- tests/models/test_hparams.py | 31 +++++-------------- 11 files changed, 35 insertions(+), 41 deletions(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index 926a52c5cf1b9..0ea8551000f6c 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -43,7 +43,7 @@ This flag sets your learning rate which can be accessed via ``self.lr`` or ``sel class LitModel(LightningModule): def __init__(self, hparams): - self._auto_register_arguments() + self.auto_register_init_arguments() def configure_optimizers(self): return Adam(self.parameters(), lr=(self.hparams.lr or self.hparams.learning_rate)) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 01ebe47f68295..97f5e6150201a 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -108,7 +108,7 @@ But if you don't want to use the values saved in the checkpoint, pass in your ow def __init__(self, in_dim, out_dim): super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.l1 = nn.Linear(self.in_dim, self.out_dim) you can restore the model like this diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 5370feb72c4c5..eb72410535625 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -157,7 +157,7 @@ def __init__(self, lr_scheduler_gamma=1e-1, num_workers=6) -> None: super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.dl_path = dl_path self.__build_model() diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index c009f689bb4d1..65d3bc2a14355 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -74,7 +74,7 @@ class GAN(LightningModule): def __init__(self, latent_dim=100, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() # networks mnist_shape = (1, 28, 28) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 68a5bc0f0627d..126b62d4dee5f 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -34,7 +34,7 @@ def __init__(self, arch, pretrained, lr, momentum, weight_decay, data_path, batc TODO: add docstring here """ super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 5ab9ea8433dcd..f5f1f595268b0 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -202,7 +202,7 @@ def __init__(self, episode_length, batch_size) -> None: super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index ac6ecc409d4ac..3c6fbf274b03d 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -136,7 +136,7 @@ def __init__(self, features_start: int, bilinear: bool): super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.net = UNet(num_classes=19, num_layers=self.num_layers, features_start=self.features_start, bilinear=self.bilinear) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index 25f6e9a109fc4..f8d916d16cc76 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -51,7 +51,7 @@ def __init__(self, """ # init superclass super().__init__() - self._auto_register_arguments() + self.auto_register_init_arguments() self.c_d1 = nn.Linear(in_features=self.in_features, out_features=self.hidden_dim) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 073ad6c656d4a..06e7a3f36a9ad 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1699,15 +1699,15 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]: " and this method will be removed in v1.0.0", DeprecationWarning) return self.get_progress_bar_dict() - def _auto_register_arguments(self, include_parents=False): - """Automatically register all init arguments to `self`.""" - if not hasattr(self, '_module_self_arguments'): - self._auto_collect_arguments() - - args = dict(self._module_parents_arguments) if include_parents else {} - args.update(self._module_self_arguments) - for k, v in ((k, v ) for k, v in args.items() if not hasattr(self, k)): - setattr(self, 'k', v) + # def auto_register_init_arguments(self, include_parents=False): + # """Automatically register all init arguments to `self`.""" + # if not hasattr(self, '_module_self_arguments'): + # self._auto_collect_arguments() + # + # args = dict(self._module_parents_arguments) if include_parents else {} + # args.update(self._module_self_arguments) + # for k, v in ((k, v ) for k, v in args.items() if not hasattr(self, k)): + # setattr(self, k, v) def _auto_collect_arguments(self): """Collect all arguments module arguments.""" diff --git a/tests/base/model_template.py b/tests/base/model_template.py index a117f63a0d616..0971dc6793688 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -53,7 +53,16 @@ def __init__(self, **kwargs) -> object: # init superclass super().__init__() - self._auto_register_arguments() + self.drop_prob = drop_prob + self.batch_size = batch_size + self.in_features = in_features + self.learning_rate = learning_rate + self.optimizer_name = optimizer_name + self.data_root = data_root + self.out_features = out_features + self.hidden_dim = hidden_dim + self.b1 = b1 + self.b2 = b2 # if you specify an example input, the summary will show input/output for each layer self.example_input_array = torch.rand(5, 28 * 28) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 89b3789d8dde0..e3c4e981ea5c9 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -1,5 +1,4 @@ import os -from argparse import Namespace import pytest import torch @@ -12,17 +11,8 @@ class SubClassEvalModel(EvalModelTemplate): object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): - super().__init__() - - -class HparamsNamespaceEvalModel(EvalModelTemplate): - def __init__(self, *args, hparams=Namespace(hparam_arg=123), **kwargs): - super().__init__() - - -class HparamsDictEvalModel(EvalModelTemplate): - def __init__(self, *args, hparams=dict(hparam_arg=123), **kwargs): - super().__init__() + super().__init__(*args, **kwargs) + self.subclass_arg = subclass_arg class SubSubClassEvalModel(SubClassEvalModel): @@ -31,21 +21,16 @@ class SubSubClassEvalModel(SubClassEvalModel): @pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModel, - SubSubClassEvalModel, - HparamsNamespaceEvalModel, - HparamsDictEvalModel]) + SubSubClassEvalModel]) def test_auto_hparams(tmpdir, cls): # test that the model automatically sets the args passed into init as attrs model = cls() - assert model.module_hparams.batch_size == 32 + assert model.batch_size == 32 model = cls(batch_size=179) - assert model.module_hparams.batch_size == 179 + assert model.batch_size == 179 if isinstance(model, SubClassEvalModel): - assert model.module_hparams.subclass_arg == 1200 - - if isinstance(model, (HparamsNamespaceEvalModel, HparamsDictEvalModel)): - assert model.module_hparams.hparam_arg == 123 + assert model.subclass_arg == 1200 # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5, default_root_dir=tmpdir) @@ -59,8 +44,8 @@ def test_auto_hparams(tmpdir, cls): # verify that model loads correctly model = cls.load_from_checkpoint(raw_checkpoint_path) - assert model.module_hparams.batch_size == 179 + assert model.batch_size == 179 # verify that we can overwrite whatever we want model = cls.load_from_checkpoint(raw_checkpoint_path, batch_size=99) - assert model.module_hparams.batch_size == 99 + assert model.batch_size == 99 From dfd3a26da24a0522136c200e86692dcdcad0daa1 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 22:13:33 +0200 Subject: [PATCH 084/100] tests --- tests/base/models.py | 3 ++- tests/models/test_cpu.py | 6 +++--- tests/models/test_horovod.py | 3 +-- tests/trainer/test_lr_finder.py | 7 +++---- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/base/models.py b/tests/base/models.py index 1e9cbccde3a09..8651a2f8bb91a 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -67,8 +67,9 @@ def forward(self, img): class TestGAN(LightningModule): """Implements a basic GAN for the purpose of illustrating multiple optimizers.""" - def __init__(self, hparams: dict): + def __init__(self, hidden_dim): super().__init__() + self.hidden_dim = hidden_dim # networks mnist_shape = (1, 28, 28) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index c3487289af38c..d4195d28dbb7e 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -272,8 +272,8 @@ def __len__(self): return 1 class BpttTestModel(EvalModelTemplate): - def __init__(self, hparams): - super().__init__(hparams) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.test_hidden = None def training_step(self, batch, batch_idx, hiddens): @@ -310,7 +310,7 @@ def train_dataloader(self): out_features=truncated_bptt_steps ) - model = BpttTestModel(hparams) + model = BpttTestModel(**hparams) # fit model trainer = Trainer( diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 6117bc8a0e264..69a4ff65ea18f 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -143,8 +143,7 @@ def validation_step(self, batch, *args, **kwargs): @pytest.mark.skipif(sys.version_info >= (3, 8), reason="Horovod not yet supported in Python 3.8") @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") def test_horovod_multi_optimizer(tmpdir): - hparams = EvalModelTemplate.get_default_hparams() - model = TestGAN(hparams) + model = TestGAN(hidden_dim=1000) trainer_options = dict( default_root_dir=str(tmpdir), diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index 95e839d0b23eb..4bbda35235eeb 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -96,11 +96,10 @@ def test_trainer_arg_bool(tmpdir): def test_trainer_arg_str(tmpdir): """ Test that setting trainer arg to string works """ - hparams = EvalModelTemplate.get_default_hparams() - hparams['my_fancy_lr'] = 1.0 # update with non-standard field - model = EvalModelTemplate(**hparams) + model = EvalModelTemplate() + model.my_fancy_lr = 1.0 # update with non-standard field - before_lr = hparams.get('my_fancy_lr') + before_lr = model.my_fancy_lr # logger file to get meta trainer = Trainer( default_save_path=tmpdir, From 72f4cd038e8528038d39cf52d25bb725c1f417e2 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 22:25:58 +0200 Subject: [PATCH 085/100] examples --- docs/source/lr_finder.rst | 4 ++-- docs/source/weights_loading.rst | 3 ++- .../computer_vision_fine_tuning.py | 23 ++++++++++++------- .../generative_adversarial_net.py | 13 +++++++++-- pl_examples/domain_templates/imagenet.py | 17 ++++++++++++-- .../domain_templates/reinforce_learn_Qnet.py | 11 ++++++++- .../domain_templates/semantic_segmentation.py | 7 +++++- pl_examples/models/lightning_template.py | 11 +++++++-- 8 files changed, 70 insertions(+), 19 deletions(-) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index 0ea8551000f6c..cf13b004e3bb3 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -42,8 +42,8 @@ This flag sets your learning rate which can be accessed via ``self.lr`` or ``sel class LitModel(LightningModule): - def __init__(self, hparams): - self.auto_register_init_arguments() + def __init__(self, learning_rate): + self.learning_rate = learning_rate def configure_optimizers(self): return Adam(self.parameters(), lr=(self.hparams.lr or self.hparams.learning_rate)) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 97f5e6150201a..11844678397a9 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -108,7 +108,8 @@ But if you don't want to use the values saved in the checkpoint, pass in your ow def __init__(self, in_dim, out_dim): super().__init__() - self.auto_register_init_arguments() + self.in_dim = in_dim + self.out_dim = out_dim self.l1 = nn.Linear(self.in_dim, self.out_dim) you can restore the model like this diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index eb72410535625..b2818eb1d6aa4 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -149,15 +149,22 @@ class TransferLearningModel(pl.LightningModule): """ def __init__(self, dl_path: Union[str, Path], - backbone='resnet50', - train_bn=True, - milestones=(5, 10), - batch_size=8, - lr=1e-2, - lr_scheduler_gamma=1e-1, - num_workers=6) -> None: + backbone: str = 'resnet50', + train_bn: bool = True, + milestones: tuple = (5, 10), + batch_size: int = 8, + lr: float = 1e-2, + lr_scheduler_gamma: float = 1e-1, + num_workers: int = 6) -> None: super().__init__() - self.auto_register_init_arguments() + self.dl_path = dl_path + self.backbone = backbone + self.train_bn = train_bn + self.milestones = milestones + self.batch_size = batch_size + self.lr = lr + self.lr_scheduler_gamma = lr_scheduler_gamma + self.num_workers = num_workers self.dl_path = dl_path self.__build_model() diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 65d3bc2a14355..001c8389ad1e6 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -72,9 +72,18 @@ def forward(self, img): class GAN(LightningModule): - def __init__(self, latent_dim=100, lr=0.0002, b1=0.5, b2=0.999, batch_size=64): + def __init__(self, + latent_dim: int = 100, + lr: float = 0.0002, + b1: float = 0.5, + b2: float = 0.999, + batch_size: int = 64): super().__init__() - self.auto_register_init_arguments() + self.latent_dim = latent_dim + self.lr = lr + self.b1 = b1 + self.b2 = b2 + self.batch_size = batch_size # networks mnist_shape = (1, 28, 28) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 126b62d4dee5f..df22e5e704956 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -29,12 +29,25 @@ class ImageNetLightningModel(LightningModule): - def __init__(self, arch, pretrained, lr, momentum, weight_decay, data_path, batch_size): + def __init__(self, + arch, + pretrained, + lr: float, + momentum: float, + weight_decay: int, + data_path: str, + batch_size: int): """ TODO: add docstring here """ super().__init__() - self.auto_register_init_arguments() + self.arch = arch + self.pretrained = pretrained + self.lr = lr + self.momentum = momentum + self.weight_decay = weight_decay + self.data_path = data_path + self.batch_size = batch_size self.model = models.__dict__[self.arch](pretrained=self.pretrained) def forward(self, x): diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index f5f1f595268b0..88babcb94e43b 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -202,7 +202,16 @@ def __init__(self, episode_length, batch_size) -> None: super().__init__() - self.auto_register_init_arguments() + self.replay_size = replay_size + self.warm_start_steps = warm_start_steps + self.gamma = gamma + self.eps_start = eps_start + self.eps_end = eps_end + self.eps_last_frame = eps_last_frame + self.sync_rate = sync_rate + self.lr = lr + self.episode_length = episode_length + self.batch_size = batch_size self.env = gym.make(self.env) obs_size = self.env.observation_space.shape[0] diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 3c6fbf274b03d..5580eb4c307cb 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -136,7 +136,12 @@ def __init__(self, features_start: int, bilinear: bool): super().__init__() - self.auto_register_init_arguments() + self.data_path = data_path + self.batch_size = batch_size + self.lr = lr + self.num_layers = num_layers + self.features_start = features_start + self.bilinear = bilinear self.net = UNet(num_classes=19, num_layers=self.num_layers, features_start=self.features_start, bilinear=self.bilinear) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index f8d916d16cc76..d9ceb7ba8a2f4 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -45,13 +45,20 @@ def __init__(self, data_root: str = './datasets', out_features: int = 10, hidden_dim: int = 1000, - ): + ) -> 'LightningTemplateModel': """ Pass in hyperparameters as a `argparse.Namespace` or a `dict` to the model. """ # init superclass super().__init__() - self.auto_register_init_arguments() + self.drop_prob = drop_prob + self.batch_size = batch_size + self.in_features = in_features + self.learning_rate = learning_rate + self.optimizer_name = optimizer_name + self.data_root = data_root + self.out_features = out_features + self.hidden_dim = hidden_dim self.c_d1 = nn.Linear(in_features=self.in_features, out_features=self.hidden_dim) From 2a8872bb5e630a5bc46884f7c80fc8f5d7453f25 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 22:41:02 +0200 Subject: [PATCH 086/100] examples --- docs/source/hyperparameters.rst | 2 -- tests/base/models.py | 8 +++++--- tests/models/test_horovod.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 5e5ea3e217325..f53f94bb46fd1 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -128,8 +128,6 @@ modify the network and read those values in the LightningModule parser.add_argument('--learning_rate', type=float, default=0.002) return parser - model = LitMNIST(10, 20, 0.0001, 5) - Now pass in the params when you init your model .. code-block:: python diff --git a/tests/base/models.py b/tests/base/models.py index 8651a2f8bb91a..77deb0766b9b6 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -67,9 +67,12 @@ def forward(self, img): class TestGAN(LightningModule): """Implements a basic GAN for the purpose of illustrating multiple optimizers.""" - def __init__(self, hidden_dim): + def __init__(self, hidden_dim, learning_rate, b1, b2, **kwargs): super().__init__() self.hidden_dim = hidden_dim + self.learning_rate = learning_rate + self.b1 = b1 + self.b2 = b2 # networks mnist_shape = (1, 28, 28) @@ -128,8 +131,7 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): fake = torch.zeros(imgs.size(0), 1) fake = fake.type_as(fake) - fake_loss = self.adversarial_loss( - self.discriminator(self.generated_imgs.detach()), fake) + fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs.detach()), fake) # discriminator loss is the average of these d_loss = (real_loss + fake_loss) / 2 diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 69a4ff65ea18f..4e5fe0ef81552 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -143,7 +143,7 @@ def validation_step(self, batch, *args, **kwargs): @pytest.mark.skipif(sys.version_info >= (3, 8), reason="Horovod not yet supported in Python 3.8") @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") def test_horovod_multi_optimizer(tmpdir): - model = TestGAN(hidden_dim=1000) + model = TestGAN(**EvalModelTemplate.get_default_hparams()) trainer_options = dict( default_root_dir=str(tmpdir), From 5fe6f024af2f9dcc1c7b96a9b817d857ca01dc10 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sat, 23 May 2020 23:59:25 +0200 Subject: [PATCH 087/100] examples --- docs/source/hyperparameters.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index f53f94bb46fd1..6eb5f6e6cc751 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -107,7 +107,10 @@ modify the network and read those values in the LightningModule def __init__(self, layer_1_dim, layer_2_dim, learning_rate, batch_size): super().__init__() - self._auto_reguster_arguments() + self.layer_1_dim = layer_1_dim + self.layer_2_dim = layer_2_dim + self.learning_rate = learning_rate + self.batch_size = batch_size self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.layer_2_dim) From bad8d11614c0352705142e7b00938a5a8ae53432 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sun, 24 May 2020 10:26:56 +0200 Subject: [PATCH 088/100] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/hyperparameters.rst | 5 +---- docs/source/lr_finder.rst | 2 +- tests/models/test_hparams.py | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 6eb5f6e6cc751..2c4c4e9ed02f8 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -141,7 +141,7 @@ Now pass in the params when you init your model model = LitMNIST(args) Within any LightningModule all the arguments you pass into your `__init__` will be available -simply with `self._module_arguments`. However, we won't overwrite any other arguments you have already defined. +simply with `self.module_arguments`. We will also add all of those values to the TensorBoard hparams tab (unless it's an object which we won't). We also will store those values into checkpoints for you which you can use to init your models. @@ -157,12 +157,9 @@ models. self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim) - # self.some_other_param is automatically available self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.some_other_param) self.layer_3 = torch.nn.Linear(self.some_other_param, 10) - self.some_other_param = 12 - # but you can override it as normal model = LitMNIST(10, 20) diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index cf13b004e3bb3..308cc216e1343 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -46,7 +46,7 @@ This flag sets your learning rate which can be accessed via ``self.lr`` or ``sel self.learning_rate = learning_rate def configure_optimizers(self): - return Adam(self.parameters(), lr=(self.hparams.lr or self.hparams.learning_rate)) + return Adam(self.parameters(), lr=(self.lr or self.learning_rate)) # finds learning rate automatically # sets hparams.lr or hparams.learning_rate to that learning rate diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index e3c4e981ea5c9..d0f2fa93783dc 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -23,7 +23,7 @@ class SubSubClassEvalModel(SubClassEvalModel): SubClassEvalModel, SubSubClassEvalModel]) def test_auto_hparams(tmpdir, cls): - # test that the model automatically sets the args passed into init as attrs + """ Test that the model automatically saves the arguments passed into the constructor """ model = cls() assert model.batch_size == 32 model = cls(batch_size=179) From 55b58f7405765c1e4b7f0532de69913076b0a1fb Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 11:13:08 +0200 Subject: [PATCH 089/100] chp key --- docs/source/weights_loading.rst | 4 ++-- pytorch_lightning/core/lightning.py | 20 ++++++-------------- pytorch_lightning/trainer/training_io.py | 16 ++++++++-------- tests/models/test_hparams.py | 5 +++-- tests/trainer/test_trainer.py | 3 ++- 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 11844678397a9..7876c32bc4fc6 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -2,7 +2,7 @@ import os from pytorch_lightning.trainer.trainer import Trainer - from pytorch_lightning.core.lightning import LightningModule + from pytorch_lightning.core.lightning import LightningModule, CHECKPOINT_KEY_MODULE_ARGS Saving and loading weights @@ -71,7 +71,7 @@ under the `module_arguments` key in the checkpoint. # all init args were saved to the checkpoint checkpoint = torch.load(CKPT_PATH) - print(checkpoint['module_arguments']) + print(checkpoint[CHECKPOINT_KEY_MODULE_ARGS]) # {'learning_rate': the_value} Manual saving diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 06e7a3f36a9ad..94b06d8a1e7bc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -30,6 +30,8 @@ else: XLA_AVAILABLE = True +CHECKPOINT_KEY_MODULE_ARGS = 'module_arguments' + class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, ModelHooks): @@ -1558,10 +1560,10 @@ def load_from_checkpoint( hparams['on_gpu'] = False # overwrite hparams by the given file - checkpoint['module_arguments'] = hparams + checkpoint[CHECKPOINT_KEY_MODULE_ARGS] = hparams # override the module_arguments with values that were passed in - checkpoint['module_arguments'].update(kwargs) + checkpoint[CHECKPOINT_KEY_MODULE_ARGS].update(kwargs) model = cls._load_model_state(checkpoint, *args, **kwargs) return model @@ -1570,8 +1572,8 @@ def load_from_checkpoint( def _load_model_state(cls, checkpoint: Dict[str, Any], *args, **kwargs) -> 'LightningModule': # pass in the values we saved automatically - if 'module_arguments' in checkpoint: - model_args = checkpoint['module_arguments'] + if CHECKPOINT_KEY_MODULE_ARGS in checkpoint: + model_args = checkpoint[CHECKPOINT_KEY_MODULE_ARGS] kwargs.update(**model_args) # load the state_dict on the model automatically @@ -1699,16 +1701,6 @@ def get_tqdm_dict(self) -> Dict[str, Union[int, str]]: " and this method will be removed in v1.0.0", DeprecationWarning) return self.get_progress_bar_dict() - # def auto_register_init_arguments(self, include_parents=False): - # """Automatically register all init arguments to `self`.""" - # if not hasattr(self, '_module_self_arguments'): - # self._auto_collect_arguments() - # - # args = dict(self._module_parents_arguments) if include_parents else {} - # args.update(self._module_self_arguments) - # for k, v in ((k, v ) for k, v in args.items() if not hasattr(self, k)): - # setattr(self, k, v) - def _auto_collect_arguments(self): """Collect all arguments module arguments.""" frame = inspect.currentframe() diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index e1baca64f1132..ec37344a00561 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -95,7 +95,7 @@ import torch.distributed as torch_distrib from pytorch_lightning import _logger as log -from pytorch_lightning.core.lightning import LightningModule +from pytorch_lightning.core.lightning import LightningModule, CHECKPOINT_KEY_MODULE_ARGS from pytorch_lightning.loggers import LightningLoggerBase from pytorch_lightning.overrides.data_parallel import ( LightningDistributedDataParallel, @@ -267,8 +267,8 @@ def save_checkpoint(self, filepath, weights_only: bool = False): try: self._atomic_save(checkpoint, filepath) except AttributeError as err: - if 'module_arguments' in checkpoint: - del checkpoint['module_arguments'] + if CHECKPOINT_KEY_MODULE_ARGS in checkpoint: + del checkpoint[CHECKPOINT_KEY_MODULE_ARGS] rank_zero_warn('Warning, `module_arguments` dropped from checkpoint.' f' An attribute is not picklable {err}') self._atomic_save(checkpoint, filepath) @@ -353,10 +353,10 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict: checkpoint['state_dict'] = model.state_dict() - if hasattr(model, 'module_arguments') and model.module_arguments: + if hasattr(model, CHECKPOINT_KEY_MODULE_ARGS) and model.module_arguments: # add arguments to the checkpoint - checkpoint['module_arguments'] = {k: v for k, v in model.module_arguments.items() - if is_picklable(v)} + checkpoint[CHECKPOINT_KEY_MODULE_ARGS] = {k: v for k, v in model.module_arguments.items() + if is_picklable(v)} # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -461,8 +461,8 @@ def hpc_save(self, folderpath: str, logger): try: self._atomic_save(checkpoint, filepath) except AttributeError as err: - if 'module_arguments' in checkpoint: - del checkpoint['module_arguments'] + if CHECKPOINT_KEY_MODULE_ARGS in checkpoint: + del checkpoint[CHECKPOINT_KEY_MODULE_ARGS] rank_zero_warn('warning, `module_arguments` dropped from checkpoint.' f' An attribute is not picklable {err}') self._atomic_save(checkpoint, filepath) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index d0f2fa93783dc..ec4c498084ee6 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -4,6 +4,7 @@ import torch from pytorch_lightning import Trainer +from pytorch_lightning.core.lightning import CHECKPOINT_KEY_MODULE_ARGS from tests.base import EvalModelTemplate @@ -39,8 +40,8 @@ def test_auto_hparams(tmpdir, cls): raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) raw_checkpoint = torch.load(raw_checkpoint_path) - assert 'module_arguments' in raw_checkpoint - assert raw_checkpoint['module_arguments']['batch_size'] == 179 + assert CHECKPOINT_KEY_MODULE_ARGS in raw_checkpoint + assert raw_checkpoint[CHECKPOINT_KEY_MODULE_ARGS]['batch_size'] == 179 # verify that model loads correctly model = cls.load_from_checkpoint(raw_checkpoint_path) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 7fdc1d00d95b2..74e01fba7a37e 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -11,6 +11,7 @@ from pytorch_lightning import Callback, LightningModule from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint +from pytorch_lightning.core.lightning import CHECKPOINT_KEY_MODULE_ARGS from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.trainer.logging import TrainerLoggingMixin @@ -42,7 +43,7 @@ def test_no_val_module(tmpdir): # assert ckpt has hparams ckpt = torch.load(new_weights_path) - assert 'module_arguments' in ckpt.keys(), 'module_arguments missing from checkpoints' + assert CHECKPOINT_KEY_MODULE_ARGS in ckpt.keys(), 'module_arguments missing from checkpoints' # load new model hparams_path = tutils.get_data_path(logger, path_dir=tmpdir) From 538301400a7ccc64b0db437a194a3928c77aa7b7 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 11:25:42 +0200 Subject: [PATCH 090/100] tests --- tests/models/test_restore.py | 37 --------------------------------- tests/trainer/test_lr_finder.py | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 37 deletions(-) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 870ba61889e18..cae58cc8faa8f 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -269,43 +269,6 @@ def test_model_saving_loading(tmpdir): assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 -# def test_load_model_with_missing_hparams(tmpdir): -# trainer = Trainer( -# progress_bar_refresh_rate=0, -# max_epochs=1, -# checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), -# logger=False, -# default_root_dir=tmpdir, -# ) -# -# class CurrentModelWithoutHparams(EvalModelTemplate): -# def __init__(self, *args, **kwargs): -# super().__init__() -# -# class CurrentModelUnusedHparams(EvalModelTemplate): -# def __init__(self, hparams={}, *args, **kwargs): -# super().__init__() -# -# model = CurrentModelWithoutHparams() -# trainer.fit(model) -# last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] -# -# # try to load a checkpoint that has hparams but model is missing hparams arg -# with pytest.raises(MisconfigurationException, match=r".*__init__ is missing the argument 'hparams'.*"): -# CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) -# -# # create a checkpoint without hyperparameters -# # if the model does not take a hparams argument, it should not throw an error -# ckpt = torch.load(last_checkpoint) -# # del(ckpt['hparams']) -# torch.save(ckpt, last_checkpoint) -# CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) -# -# # load checkpoint without hparams again warn if user's model has hparams argument -# with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): -# CurrentModelUnusedHparams.load_from_checkpoint(last_checkpoint) - - def test_model_pickle(tmpdir): import pickle diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index 4bbda35235eeb..67dd6a6f3d0bd 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -178,3 +178,24 @@ def test_suggestion_parameters_work(tmpdir): assert lr1 != lr2, \ 'Skipping parameter did not influence learning rate' + + +def test_suggestion_with_non_finite_values(tmpdir): + """ Test that non-finite values does not alter results """ + + hparams = EvalModelTemplate.get_default_hparams() + model = EvalModelTemplate(hparams) + + # logger file to get meta + trainer = Trainer( + default_save_path=tmpdir, + max_epochs=10 + ) + + lrfinder = trainer.lr_find(model) + before_lr = lrfinder.suggestion() + lrfinder.results['loss'][-1] = float('nan') + after_lr = lrfinder.suggestion() + + assert before_lr == after_lr, \ + 'Learning rate was altered because of non-finite loss values' From 1fd8cce78e161ca89eccf7aa414e771087500da4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sun, 24 May 2020 11:35:41 +0200 Subject: [PATCH 091/100] Apply suggestions from code review --- docs/source/hyperparameters.rst | 2 +- pl_examples/domain_templates/reinforce_learn_Qnet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 2c4c4e9ed02f8..9443348912292 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -138,7 +138,7 @@ Now pass in the params when you init your model parser = ArgumentParser() parser = LitMNIST.add_model_specific_args(parser) args = parser.parse_args() - model = LitMNIST(args) + model = LitMNIST(**vars(args)) Within any LightningModule all the arguments you pass into your `__init__` will be available simply with `self.module_arguments`. diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 88babcb94e43b..79089686447e1 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -335,7 +335,7 @@ def get_device(self, batch) -> str: def main(args) -> None: - model = DQNLightning(args) + model = DQNLightning(**vars(args)) trainer = pl.Trainer( gpus=1, From ab3be5959ec42d8b62413a601425363d777668f7 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 11:51:04 +0200 Subject: [PATCH 092/100] class --- pytorch_lightning/core/lightning.py | 4 ++-- tests/models/test_hparams.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 94b06d8a1e7bc..9eb3c5aa1288f 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1705,7 +1705,7 @@ def _auto_collect_arguments(self): """Collect all arguments module arguments.""" frame = inspect.currentframe() - frame_args = _collect_init_args(frame, []) + frame_args = _collect_init_args(frame.f_back, []) child = _get_latest_child(frame) # set module_arguments in child @@ -1724,7 +1724,7 @@ def module_arguments(self) -> dict: def _collect_init_args(frame, path_args: list) -> list: """Recursive search for all children.""" - if 'self' in frame.f_locals: + if '__class__' in frame.f_locals: local_args = dict(frame.f_locals) local_args.update(local_args.get('kwargs', {})) local_args = {k: v for k, v in local_args.items() diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index ec4c498084ee6..faab3fbbd476a 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -23,7 +23,7 @@ class SubSubClassEvalModel(SubClassEvalModel): @pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModel, SubSubClassEvalModel]) -def test_auto_hparams(tmpdir, cls): +def test_collect_init_arguments(tmpdir, cls): """ Test that the model automatically saves the arguments passed into the constructor """ model = cls() assert model.batch_size == 32 From 8f57274cbf6a35c953ba1fc7860f59b4c9763fdb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sun, 24 May 2020 08:26:58 -0400 Subject: [PATCH 093/100] updated docs --- docs/source/hyperparameters.rst | 81 +++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index 9443348912292..d931b8ab52fbf 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -75,7 +75,7 @@ Now in your main trainer file, add the Trainer args, the program args, and add t # ie: now --gpus --num_nodes ... --fast_dev_run all work in the cli parser = Trainer.add_argparse_args(parser) - hparams = parser.parse_args() + args = parser.parse_args() Now you can call run your program like so @@ -87,25 +87,35 @@ Finally, make sure to start the training like so: .. code-block:: python - # YES - model = LitModel(hparams) - trainer = Trainer.from_argparse_args(hparams, early_stopping_callback=...) + # init the trainer like this + trainer = Trainer.from_argparse_args(args, early_stopping_callback=...) + + # NOT like this + trainer = Trainer(gpus=hparams.gpus, ...) + + # init the model with Namespace directly + model = LitModel(args) + + # or init the model with all the key-value pairs + dict_args = vars(args) + model = LitModel(**dict_args) - # NO - # model = LitModel(learning_rate=hparams.learning_rate, ...) - # trainer = Trainer(gpus=hparams.gpus, ...) +LightningModule hyperparameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -LightningModule hparams -^^^^^^^^^^^^^^^^^^^^^^^ +.. warning:: The use of `hparams` is no longer recommended (but still supported) -Normally, we don't hard-code the values to a model. We usually use the command line to -modify the network and read those values in the LightningModule +LightningModule is just an nn.Module, you can use it as you normally would. However, there are +some best practices to improve readability and reproducibility. + +1. It's more readable to specify all the arguments that go into a module (with default values). +This helps users of your module know everything that is required to run this. .. testcode:: class LitMNIST(LightningModule): - def __init__(self, layer_1_dim, layer_2_dim, learning_rate, batch_size): + def __init__(self, layer_1_dim=128, layer_2_dim=256, learning_rate=1e-4, batch_size=32, **kwargs): super().__init__() self.layer_1_dim = layer_1_dim self.layer_2_dim = layer_2_dim @@ -131,17 +141,38 @@ modify the network and read those values in the LightningModule parser.add_argument('--learning_rate', type=float, default=0.002) return parser -Now pass in the params when you init your model +2. You can also pass in a dict or Namespace, but this obscures the parameters your module is looking +for. The user would have to search the file to find what is parametrized. + +.. code-block:: python + + # using a argparse.Namespace + class LitMNIST(LightningModule): + + def __init__(self, hparams, *args, **kwargs): + super().__init__() + self.hparams = hparams + + self.layer_1 = torch.nn.Linear(28 * 28, self.hparams.layer_1_dim) + self.layer_2 = torch.nn.Linear(self.hparams.layer_1_dim, self.hparams.layer_2_dim) + self.layer_3 = torch.nn.Linear(self.hparams.layer_2_dim, 10) + + def train_dataloader(self): + return DataLoader(mnist_train, batch_size=self.hparams.batch_size) + +One way to get around this is to convert a Namespace or dict into key-value pairs using `**` .. code-block:: python parser = ArgumentParser() parser = LitMNIST.add_model_specific_args(parser) args = parser.parse_args() - model = LitMNIST(**vars(args)) + dict_args = vars(args) + model = LitMNIST(**dict_args) + +Within any LightningModule all the arguments you pass into your `__init__` will be stored in +the checkpoint so that you know all the values that went into creating this model. -Within any LightningModule all the arguments you pass into your `__init__` will be available -simply with `self.module_arguments`. We will also add all of those values to the TensorBoard hparams tab (unless it's an object which we won't). We also will store those values into checkpoints for you which you can use to init your models. @@ -190,13 +221,13 @@ polluting the main.py file, the LightningModule lets you define arguments for ea class LitMNIST(LightningModule): - def __init__(self, hparams): + def __init__(self, layer_1_dim, **kwargs): super().__init__() - self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) + self.layer_1 = torch.nn.Linear(28 * 28, layer_1_dim) @staticmethod def add_model_specific_args(parent_parser): - parser = ArgumentParser(parents=[parent_parser]) + parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument('--layer_1_dim', type=int, default=128) return parser @@ -204,13 +235,13 @@ polluting the main.py file, the LightningModule lets you define arguments for ea class GoodGAN(LightningModule): - def __init__(self, hparams): + def __init__(self, encoder_layers, **kwargs): super().__init__() - self.encoder = Encoder(layers=hparams.encoder_layers) + self.encoder = Encoder(layers=encoder_layers) @staticmethod def add_model_specific_args(parent_parser): - parser = ArgumentParser(parents=[parent_parser]) + parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument('--encoder_layers', type=int, default=12) return parser @@ -220,14 +251,14 @@ Now we can allow each model to inject the arguments it needs in the ``main.py`` .. code-block:: python def main(args): + dict_args = vars(args) # pick model if args.model_name == 'gan': - model = GoodGAN(hparams=args) + model = GoodGAN(**dict_args) elif args.model_name == 'mnist': - model = LitMNIST(hparams=args) + model = LitMNIST(**dict_args) - model = LitMNIST(hparams=args) trainer = Trainer.from_argparse_args(args) trainer.fit(model) From 20ad2ca30e74a6ddeb193a5388d01fdf8fe7b211 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sun, 24 May 2020 08:32:52 -0400 Subject: [PATCH 094/100] updated docs --- pl_examples/domain_templates/computer_vision_fine_tuning.py | 2 +- pl_examples/domain_templates/generative_adversarial_net.py | 2 +- pl_examples/domain_templates/imagenet.py | 2 +- pl_examples/domain_templates/reinforce_learn_Qnet.py | 2 +- pl_examples/domain_templates/semantic_segmentation.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index b2818eb1d6aa4..4371c869450a3 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -155,7 +155,7 @@ def __init__(self, batch_size: int = 8, lr: float = 1e-2, lr_scheduler_gamma: float = 1e-1, - num_workers: int = 6) -> None: + num_workers: int = 6, **kwargs) -> None: super().__init__() self.dl_path = dl_path self.backbone = backbone diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 001c8389ad1e6..23049358395b9 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -77,7 +77,7 @@ def __init__(self, lr: float = 0.0002, b1: float = 0.5, b2: float = 0.999, - batch_size: int = 64): + batch_size: int = 64, **kwargs): super().__init__() self.latent_dim = latent_dim self.lr = lr diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index df22e5e704956..e6584d76554fb 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -36,7 +36,7 @@ def __init__(self, momentum: float, weight_decay: int, data_path: str, - batch_size: int): + batch_size: int, **kwargs): """ TODO: add docstring here """ diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 79089686447e1..95d6873a444db 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -200,7 +200,7 @@ def __init__(self, sync_rate, lr: float, episode_length, - batch_size) -> None: + batch_size, **kwargs) -> None: super().__init__() self.replay_size = replay_size self.warm_start_steps = warm_start_steps diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 5580eb4c307cb..2f486c5b81827 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -134,7 +134,7 @@ def __init__(self, lr: float, num_layers: int, features_start: int, - bilinear: bool): + bilinear: bool, **kwargs): super().__init__() self.data_path = data_path self.batch_size = batch_size From f68316061449c0812311d3943e2ed175eff9cce2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sun, 24 May 2020 08:33:55 -0400 Subject: [PATCH 095/100] updated docs --- pl_examples/models/lightning_template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index d9ceb7ba8a2f4..0caf2c861c455 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -45,6 +45,7 @@ def __init__(self, data_root: str = './datasets', out_features: int = 10, hidden_dim: int = 1000, + **kwargs ) -> 'LightningTemplateModel': """ Pass in hyperparameters as a `argparse.Namespace` or a `dict` to the model. From db5d1bfc1ee521d6d454a8e1b1a89c7f5e913170 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sun, 24 May 2020 08:34:16 -0400 Subject: [PATCH 096/100] updated docs --- pl_examples/models/lightning_template.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index 0caf2c861c455..b309094254118 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -47,9 +47,6 @@ def __init__(self, hidden_dim: int = 1000, **kwargs ) -> 'LightningTemplateModel': - """ - Pass in hyperparameters as a `argparse.Namespace` or a `dict` to the model. - """ # init superclass super().__init__() self.drop_prob = drop_prob From 6af55e79fd1f961e697aeaefc7bbc740a6750103 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 16:19:50 +0200 Subject: [PATCH 097/100] save --- docs/source/weights_loading.rst | 4 ++-- pytorch_lightning/trainer/training_io.py | 23 ++++++++--------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 7876c32bc4fc6..11844678397a9 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -2,7 +2,7 @@ import os from pytorch_lightning.trainer.trainer import Trainer - from pytorch_lightning.core.lightning import LightningModule, CHECKPOINT_KEY_MODULE_ARGS + from pytorch_lightning.core.lightning import LightningModule Saving and loading weights @@ -71,7 +71,7 @@ under the `module_arguments` key in the checkpoint. # all init args were saved to the checkpoint checkpoint = torch.load(CKPT_PATH) - print(checkpoint[CHECKPOINT_KEY_MODULE_ARGS]) + print(checkpoint['module_arguments']) # {'learning_rate': the_value} Manual saving diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index ec37344a00561..d036f5d9f670b 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -88,6 +88,7 @@ import re import signal from abc import ABC +from argparse import Namespace from subprocess import call from typing import Union @@ -119,6 +120,12 @@ else: HOROVOD_AVAILABLE = True +PRIMITIVE_TYPES = ( + bool, int, float, str, + list, tuple, set, dict, + Namespace, # for back compatibility +) + class TrainerIOMixin(ABC): @@ -356,7 +363,7 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict: if hasattr(model, CHECKPOINT_KEY_MODULE_ARGS) and model.module_arguments: # add arguments to the checkpoint checkpoint[CHECKPOINT_KEY_MODULE_ARGS] = {k: v for k, v in model.module_arguments.items() - if is_picklable(v)} + if isinstance(v, PRIMITIVE_TYPES)} # give the model a chance to add a few things model.on_save_checkpoint(checkpoint) @@ -509,17 +516,3 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'): ckpt_vs.append(int(name)) return max(ckpt_vs) - - -def is_picklable(obj) -> bool: - """Try if the object is serializable - - >>> is_picklable(5) - True - """ - try: - pickle.dumps(obj) - except Exception: - return False - else: - return True From 04e8e673a5bdeb1e1064a430a504c59d60d0d33e Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 18:06:16 +0200 Subject: [PATCH 098/100] wip --- tests/models/test_hparams.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index faab3fbbd476a..268825190faeb 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -9,7 +9,6 @@ class SubClassEvalModel(EvalModelTemplate): - object_that_should_not_be_saved = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): super().__init__(*args, **kwargs) @@ -20,29 +19,46 @@ class SubSubClassEvalModel(SubClassEvalModel): pass +class AggSubClassEvalModel(SubClassEvalModel): + + def __init__(self, *args, my_loss=torch.nn.CrossEntropyLoss(), **kwargs): + super().__init__(*args, **kwargs) + self.my_loss = my_loss + + @pytest.mark.parametrize("cls", [EvalModelTemplate, SubClassEvalModel, - SubSubClassEvalModel]) + SubSubClassEvalModel, + AggSubClassEvalModel]) def test_collect_init_arguments(tmpdir, cls): """ Test that the model automatically saves the arguments passed into the constructor """ - model = cls() + extra_args = dict(my_loss=torch.nn.CosineEmbeddingLoss()) if cls is AggSubClassEvalModel else {} + + model = cls(**extra_args) assert model.batch_size == 32 - model = cls(batch_size=179) + model = cls(batch_size=179, **extra_args) assert model.batch_size == 179 if isinstance(model, SubClassEvalModel): assert model.subclass_arg == 1200 + if isinstance(model, AggSubClassEvalModel): + assert isinstance(model.my_loss, torch.nn.CosineEmbeddingLoss) + # verify that the checkpoint saved the correct values trainer = Trainer(max_steps=5, default_root_dir=tmpdir) trainer.fit(model) raw_checkpoint_path = os.listdir(trainer.checkpoint_callback.dirpath) raw_checkpoint_path = [x for x in raw_checkpoint_path if '.ckpt' in x][0] raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) + raw_checkpoint = torch.load(raw_checkpoint_path) assert CHECKPOINT_KEY_MODULE_ARGS in raw_checkpoint assert raw_checkpoint[CHECKPOINT_KEY_MODULE_ARGS]['batch_size'] == 179 + if isinstance(model, AggSubClassEvalModel): + assert isinstance(model.my_loss, torch.nn.CrossEntropyLoss) + # verify that model loads correctly model = cls.load_from_checkpoint(raw_checkpoint_path) assert model.batch_size == 179 From a432f6e950fb9ea85d7acadf8a156fdbd668e1dd Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 18:50:12 +0200 Subject: [PATCH 099/100] fix --- tests/base/model_template.py | 3 ++- tests/models/test_hparams.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 0971dc6793688..018813112e15c 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -65,7 +65,8 @@ def __init__(self, self.b2 = b2 # if you specify an example input, the summary will show input/output for each layer - self.example_input_array = torch.rand(5, 28 * 28) + # TODO: to be fixed in #1773 + #self.example_input_array = torch.rand(5, 28 * 28) # build model self.__build_model() diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 268825190faeb..c97dc61233fb0 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -9,6 +9,7 @@ class SubClassEvalModel(EvalModelTemplate): + any_other_loss = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): super().__init__(*args, **kwargs) @@ -56,13 +57,13 @@ def test_collect_init_arguments(tmpdir, cls): assert CHECKPOINT_KEY_MODULE_ARGS in raw_checkpoint assert raw_checkpoint[CHECKPOINT_KEY_MODULE_ARGS]['batch_size'] == 179 - if isinstance(model, AggSubClassEvalModel): - assert isinstance(model.my_loss, torch.nn.CrossEntropyLoss) - # verify that model loads correctly model = cls.load_from_checkpoint(raw_checkpoint_path) assert model.batch_size == 179 + if isinstance(model, AggSubClassEvalModel): + assert isinstance(model.my_loss, torch.nn.CrossEntropyLoss) + # verify that we can overwrite whatever we want model = cls.load_from_checkpoint(raw_checkpoint_path, batch_size=99) assert model.batch_size == 99 From 2892e5a03a6ef952dde8943d5eca68e940cfc663 Mon Sep 17 00:00:00 2001 From: Jirka Date: Sun, 24 May 2020 19:00:31 +0200 Subject: [PATCH 100/100] flake8 --- tests/base/model_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 018813112e15c..782ace16193d3 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -66,7 +66,7 @@ def __init__(self, # if you specify an example input, the summary will show input/output for each layer # TODO: to be fixed in #1773 - #self.example_input_array = torch.rand(5, 28 * 28) + # self.example_input_array = torch.rand(5, 28 * 28) # build model self.__build_model()