Increase code-sharing of LCEMGP & define construct_inputs (#2291)

saitcakmak · facebook-github-bot · commit dc219ca885b3 · 2024-04-11T19:43:24.000-07:00
Summary: Pull Request resolved: #2291 Increases code sharing between LCEMGP & the parent MultiTaskGP: - Allows customizing mean, covariance & likelihood modules. - Eliminates duplicate `forward` implementation by renaming `task_covar_matrix` to `task_covar_module`. Defines a `construct_inputs` method for LCEMGP that supports the kwargs used to customize the task covariance module (which differ from those used for MultiTaskGP). Reviewed By: Balandat Differential Revision: D55935507 fbshipit-source-id: ed6fc6e47eeb02d0dddd7657df809f9623142a81
diff --git a/botorch/models/contextual_multioutput.py b/botorch/models/contextual_multioutput.py
@@ -14,15 +14,17 @@
 """
 
 import warnings
-from typing import List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import torch
 from botorch.models.multitask import MultiTaskGP
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform
+from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset
 from gpytorch.constraints import Interval
-from gpytorch.distributions.multivariate_normal import MultivariateNormal
 from gpytorch.kernels.rbf_kernel import RBFKernel
+from gpytorch.likelihoods.likelihood import Likelihood
+from gpytorch.module import Module
 from linear_operator.operators import LinearOperator
 from torch import Tensor
 from torch.nn import ModuleList
@@ -41,6 +43,9 @@ def __init__(
         train_Y: Tensor,
         task_feature: int,
         train_Yvar: Optional[Tensor] = None,
+        mean_module: Optional[Module] = None,
+        covar_module: Optional[Module] = None,
+        likelihood: Optional[Likelihood] = None,
         context_cat_feature: Optional[Tensor] = None,
         context_emb_feature: Optional[Tensor] = None,
         embs_dim_list: Optional[List[int]] = None,
@@ -57,6 +62,12 @@ def __init__(
             train_Yvar: An optional (n x 1) tensor of observed variances of each
                 training Y. If None, we infer the noise. Note that the inferred noise
                 is common across all tasks.
+            mean_module: The mean function to be used. Defaults to `ConstantMean`.
+            covar_module: The module for computing the covariance matrix between
+                the non-task features. Defaults to `MaternKernel`.
+            likelihood: A likelihood. The default is selected based on `train_Yvar`.
+                If `train_Yvar` is None, a standard `GaussianLikelihood` with inferred
+                noise level is used. Otherwise, a FixedNoiseGaussianLikelihood is used.
             context_cat_feature: (n_contexts x k) one-hot encoded context
                 features. Rows are ordered by context indices, where k is the
                 number of categorical variables. If None, task indices will
@@ -74,29 +85,40 @@ def __init__(
                 training data. Note that when a task is not observed, the corresponding
                 task covariance will heavily depend on random initialization and may
                 behave unexpectedly.
+            input_transform: An input transform that is applied in the model's
+                forward pass.
+            outcome_transform: An outcome transform that is applied to the
+                training data during instantiation and to the posterior during
+                inference (that is, the `Posterior` obtained by calling
+                `.posterior` on the model will be on the original scale).
         """
         super().__init__(
             train_X=train_X,
             train_Y=train_Y,
             task_feature=task_feature,
             train_Yvar=train_Yvar,
+            mean_module=mean_module,
+            covar_module=covar_module,
+            likelihood=likelihood,
             output_tasks=output_tasks,
             all_tasks=all_tasks,
             input_transform=input_transform,
             outcome_transform=outcome_transform,
         )
         self.device = train_X.device
         if all_tasks is None:
-            all_tasks = train_X[:, task_feature].unique()
-            self.all_tasks = all_tasks.to(dtype=torch.long).tolist()
+            all_tasks_tensor = train_X[:, task_feature].unique()
+            self.all_tasks = all_tasks_tensor.to(dtype=torch.long).tolist()
         else:
-            all_tasks = torch.tensor(all_tasks, dtype=torch.long)
+            all_tasks_tensor = torch.tensor(all_tasks, dtype=torch.long)
             self.all_tasks = all_tasks
         self.all_tasks.sort()  # These are the context indices.
 
         if context_cat_feature is None:
-            context_cat_feature = all_tasks.unsqueeze(-1).to(device=self.device)
-        self.context_cat_feature = context_cat_feature  # row indices = context indices
+            context_cat_feature = all_tasks_tensor.unsqueeze(-1).to(device=self.device)
+        self.context_cat_feature: Tensor = (
+            context_cat_feature  # row indices = context indices
+        )
         self.context_emb_feature = context_emb_feature
 
         #  construct emb_dims based on categorical features
@@ -115,7 +137,7 @@ def __init__(
                 for x, y in self.emb_dims
             ]
         )
-        self.task_covar_module = RBFKernel(
+        self.task_covar_module_base = RBFKernel(
             ard_num_dims=n_embs,
             lengthscale_constraint=Interval(
                 0.0, 2.0, transform=None, initial_value=1.0
@@ -132,7 +154,7 @@ def _eval_context_covar(self) -> LinearOperator:
         to get the task covariance matrix.
         """
         all_embs = self._task_embeddings()
-        return self.task_covar_module(all_embs)
+        return self.task_covar_module_base(all_embs)
 
     def _task_embeddings(self) -> Tensor:
         """Generate embedding features for all contexts."""
@@ -154,7 +176,7 @@ def _task_embeddings(self) -> Tensor:
             )
         return embeddings
 
-    def task_covar_matrix(self, task_idcs: Tensor) -> Tensor:
+    def task_covar_module(self, task_idcs: Tensor) -> Tensor:
         r"""Compute the task covariance matrix for a given tensor of
         task / context indices.
 
@@ -184,17 +206,47 @@ def task_covar_matrix(self, task_idcs: Tensor) -> Tensor:
             covar_matrix[base_idx].transpose(-1, -2).gather(index=expanded_idx, dim=-2)
         )
 
-    def forward(self, x: Tensor) -> MultivariateNormal:
-        if self.training:
-            x = self.transform_inputs(x)
-        x_basic, task_idcs = self._split_inputs(x)
-        # Compute base mean and covariance
-        mean_x = self.mean_module(x_basic)
-        covar_x = self.covar_module(x_basic)
-        # Compute task covariances
-        covar_i = self.task_covar_matrix(task_idcs)
-        covar = covar_x.mul(covar_i)
-        return MultivariateNormal(mean_x, covar)
+    @classmethod
+    def construct_inputs(
+        cls,
+        training_data: Union[SupervisedDataset, MultiTaskDataset],
+        task_feature: int,
+        output_tasks: Optional[List[int]] = None,
+        context_cat_feature: Optional[Tensor] = None,
+        context_emb_feature: Optional[Tensor] = None,
+        embs_dim_list: Optional[List[int]] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        r"""Construct `Model` keyword arguments from a dataset and other args.
+
+        Args:
+            training_data: A `SupervisedDataset` or a `MultiTaskDataset`.
+            task_feature: Column index of embedded task indicator features.
+            output_tasks: A list of task indices for which to compute model
+                outputs for. If omitted, return outputs for all task indices.
+            context_cat_feature: (n_contexts x k) one-hot encoded context
+                features. Rows are ordered by context indices, where k is the
+                number of categorical variables. If None, task indices will
+                be used and k = 1.
+            context_emb_feature: (n_contexts x m) pre-given continuous
+                embedding features. Rows are ordered by context indices.
+            embs_dim_list: Embedding dimension for each categorical variable.
+                The length equals k. If None, the embedding dimension is set to 1
+                for each categorical variable.
+        """
+        base_inputs = super().construct_inputs(
+            training_data=training_data,
+            task_feature=task_feature,
+            output_tasks=output_tasks,
+            **kwargs,
+        )
+        if context_cat_feature is not None:
+            base_inputs["context_cat_feature"] = context_cat_feature
+        if context_emb_feature is not None:
+            base_inputs["context_emb_feature"] = context_emb_feature
+        if embs_dim_list is not None:
+            base_inputs["embs_dim_list"] = embs_dim_list
+        return base_inputs
 
 
 class FixedNoiseLCEMGP(LCEMGP):
diff --git a/botorch/utils/test_helpers.py b/botorch/utils/test_helpers.py
@@ -67,7 +67,7 @@ def standardize_moments(
 
 def gen_multi_task_dataset(
     yvar: Optional[float] = None, task_values: Optional[List[int]] = None, **tkwargs
-) -> Tuple[MultiTaskDataset, Tuple[Tensor, Tensor, Tensor]]:
+) -> Tuple[MultiTaskDataset, Tuple[Tensor, Tensor, Optional[Tensor]]]:
     """Constructs a multi-task dataset with two tasks, each with 10 data points."""
     X = torch.linspace(0, 0.95, 10, **tkwargs) + 0.05 * torch.rand(10, **tkwargs)
     X = X.unsqueeze(dim=-1)
diff --git a/test/models/test_contextual_multioutput.py b/test/models/test_contextual_multioutput.py
@@ -10,6 +10,7 @@
 from botorch.models.contextual_multioutput import FixedNoiseLCEMGP, LCEMGP
 from botorch.models.multitask import MultiTaskGP
 from botorch.posteriors import GPyTorchPosterior
+from botorch.utils.test_helpers import gen_multi_task_dataset
 from botorch.utils.testing import BotorchTestCase
 from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
@@ -22,27 +23,15 @@
 
 class ContextualMultiOutputTest(BotorchTestCase):
     def test_LCEMGP(self):
-        d = 1
         for dtype, fixed_noise in ((torch.float, True), (torch.double, False)):
-            # test with batch evaluation
-            train_x = torch.rand(10, d, device=self.device, dtype=dtype)
-            train_y = torch.cos(train_x)
-            # 2 contexts here
-            task_indices = torch.tensor(
-                [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-                device=self.device,
-                dtype=dtype,
+            _, (train_x, train_y, train_yvar) = gen_multi_task_dataset(
+                yvar=0.01 if fixed_noise else None, dtype=dtype, device=self.device
             )
-            train_x = torch.cat([train_x, task_indices.unsqueeze(-1)], axis=1)
-
-            if fixed_noise:
-                train_yvar = torch.ones(10, 1, device=self.device, dtype=dtype) * 0.01
-            else:
-                train_yvar = None
+            task_feature = 0
             model = LCEMGP(
                 train_X=train_x,
                 train_Y=train_y,
-                task_feature=d,
+                task_feature=task_feature,
                 train_Yvar=train_yvar,
             )
 
@@ -65,20 +54,18 @@ def test_LCEMGP(self):
             self.assertIsInstance(embeddings, Tensor)
             self.assertEqual(embeddings.shape, torch.Size([2, 1]))
 
-            test_x = torch.rand(5, d, device=self.device, dtype=dtype)
-            task_indices = torch.tensor(
-                [0.0, 0.0, 0.0, 0.0, 0.0], device=self.device, dtype=dtype
-            )
-            test_x = torch.cat([test_x, task_indices.unsqueeze(-1)], axis=1)
+            test_x = train_x[:5]
             self.assertIsInstance(model(test_x), MultivariateNormal)
 
             # test posterior
-            posterior_f = model.posterior(test_x[:, :d])
+            posterior_f = model.posterior(test_x[:, task_feature + 1 :])
             self.assertIsInstance(posterior_f, GPyTorchPosterior)
             self.assertIsInstance(posterior_f.distribution, MultitaskMultivariateNormal)
 
             # test posterior w/ single output index
-            posterior_f = model.posterior(test_x[:, :d], output_indices=[0])
+            posterior_f = model.posterior(
+                test_x[:, task_feature + 1 :], output_indices=[0]
+            )
             self.assertIsInstance(posterior_f, GPyTorchPosterior)
             self.assertIsInstance(posterior_f.distribution, MultivariateNormal)
 
@@ -87,9 +74,9 @@ def test_LCEMGP(self):
             model2 = LCEMGP(
                 train_X=train_x,
                 train_Y=train_y,
-                task_feature=d,
+                task_feature=task_feature,
                 embs_dim_list=[2],  # increase dim from 1 to 2
-                context_emb_feature=torch.Tensor([[0.2], [0.3]]),
+                context_emb_feature=torch.tensor([[0.2], [0.3]]),
             )
             self.assertIsInstance(model2, LCEMGP)
             self.assertIsInstance(model2, MultiTaskGP)
@@ -113,37 +100,63 @@ def test_LCEMGP(self):
                 left_interp_indices=task_idcs,
                 right_interp_indices=task_idcs,
             ).to_dense()
-            self.assertAllClose(previous_covar, model.task_covar_matrix(task_idcs))
+            self.assertAllClose(previous_covar, model.task_covar_module(task_idcs))
 
     def test_FixedNoiseLCEMGP(self):
-        d = 1
         for dtype in (torch.float, torch.double):
-            train_x = torch.rand(10, d, device=self.device, dtype=dtype)
-            train_y = torch.cos(train_x)
-            task_indices = torch.tensor(
-                [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], device=self.device
+            _, (train_x, train_y, train_yvar) = gen_multi_task_dataset(
+                yvar=0.01, dtype=dtype, device=self.device
             )
-            train_x = torch.cat([train_x, task_indices.unsqueeze(-1)], axis=1)
-            train_yvar = torch.ones(10, 1, device=self.device, dtype=dtype) * 0.01
 
             with self.assertWarnsRegex(DeprecationWarning, "FixedNoiseLCEMGP"):
                 model = FixedNoiseLCEMGP(
                     train_X=train_x,
                     train_Y=train_y,
                     train_Yvar=train_yvar,
-                    task_feature=d,
+                    task_feature=0,
                 )
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
             fit_gpytorch_mll(mll, optimizer_kwargs={"options": {"maxiter": 1}})
-
             self.assertIsInstance(model, FixedNoiseLCEMGP)
 
-            test_x = torch.rand(5, d, device=self.device, dtype=dtype)
-            task_indices = torch.tensor(
-                [0.0, 0.0, 0.0, 0.0, 0.0], device=self.device, dtype=dtype
+            test_x = train_x[:5]
+            self.assertIsInstance(model(test_x), MultivariateNormal)
+
+    def test_construct_inputs(self) -> None:
+        for with_embedding_inputs, yvar in ((True, None), (False, 0.01)):
+            dataset, (train_x, train_y, train_yvar) = gen_multi_task_dataset(
+                yvar=yvar, dtype=torch.double, device=self.device
             )
-            test_x = torch.cat(
-                [test_x, task_indices.unsqueeze(-1)],
-                axis=1,
+            model_inputs = LCEMGP.construct_inputs(
+                training_data=dataset,
+                task_feature=0,
+                embs_dim_list=[2] if with_embedding_inputs else None,
+                context_emb_feature=(
+                    torch.tensor([[0.2], [0.3]]) if with_embedding_inputs else None
+                ),
+                context_cat_feature=(
+                    torch.tensor([[0.4], [0.5]]) if with_embedding_inputs else None
+                ),
             )
-            self.assertIsInstance(model(test_x), MultivariateNormal)
+            # Check that the model inputs are valid.
+            LCEMGP(**model_inputs)
+            # Check that the model inputs are as expected.
+            self.assertAllClose(model_inputs.pop("train_X"), train_x)
+            self.assertAllClose(model_inputs.pop("train_Y"), train_y)
+            if yvar is not None:
+                self.assertAllClose(model_inputs.pop("train_Yvar"), train_yvar)
+            if with_embedding_inputs:
+                self.assertEqual(model_inputs.pop("embs_dim_list"), [2])
+                self.assertAllClose(
+                    model_inputs.pop("context_emb_feature"),
+                    torch.tensor([[0.2], [0.3]]),
+                )
+                self.assertAllClose(
+                    model_inputs.pop("context_cat_feature"),
+                    torch.tensor([[0.4], [0.5]]),
+                )
+            self.assertEqual(model_inputs.pop("all_tasks"), [0, 1])
+            self.assertEqual(model_inputs.pop("task_feature"), 0)
+            self.assertIsNone(model_inputs.pop("output_tasks"))
+            # Check that there are no unexpected inputs.
+            self.assertEqual(model_inputs, {})