add utility for computing AIC/BIC/MLL from a model (#2785)

sdaulton · facebook-github-bot · commit a44b2aa8f516 · 2025-04-01T18:12:38.000-07:00
Summary: Pull Request resolved: #2785 Add utility for computing in-sample model fit metrics Reviewed By: saitcakmak Differential Revision: D71827991 fbshipit-source-id: d69f08eddce95e547421998c596eb89ff7d2d6fb
diff --git a/botorch/utils/evaluation.py b/botorch/utils/evaluation.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from math import log
+
+import torch
+from botorch.utils.transforms import is_fully_bayesian
+from gpytorch.models.exact_gp import ExactGP
+
+MLL = "MLL"
+AIC = "AIC"
+BIC = "BIC"
+
+
+def compute_in_sample_model_fit_metric(model: ExactGP, criterion: str) -> float:
+    """Compute a in-sample model fit metric.
+
+    Args:
+        model: A fitted ExactGP.
+        criterion: Evaluation criterion. One of "MLL", "AIC", "BIC". AIC
+            penalizes the MLL based on the number of parameters. BIC uses
+            a slightly different penalty based on the number of parameters
+            and data points.
+
+    Returns:
+        The in-sample evaluation metric.
+    """
+    if criterion not in (AIC, BIC, MLL):
+        raise ValueError(f"Invalid evaluation criterion {criterion}.")
+    if is_fully_bayesian(model=model):
+        model.train(reset=False)
+    else:
+        model.train()
+    with torch.no_grad():
+        output = model(*model.train_inputs)
+        output = model.likelihood(output)
+        mll = output.log_prob(model.train_targets)
+        # compute average MLL over MCMC samples if the model is fully bayesian
+        mll_scalar = mll.mean().item()
+    model.eval()
+    num_params = sum(p.numel() for p in model.parameters())
+    if is_fully_bayesian(model=model):
+        num_params /= mll.shape[0]
+    if criterion == AIC:
+        return 2 * num_params - 2 * mll_scalar
+    elif criterion == BIC:
+        return num_params * log(model.train_inputs[0].shape[-2]) - 2 * mll_scalar
+    return mll_scalar
diff --git a/sphinx/source/utils.rst b/sphinx/source/utils.rst
@@ -32,6 +32,11 @@ Dispatcher
 .. automodule:: botorch.utils.dispatcher
 		:members:
 
+Evaluation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.evaluation
+		:members:
+
 Low-Rank Cholesky Update Utils
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.utils.low_rank
diff --git a/test/utils/test_evaluation.py b/test/utils/test_evaluation.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+from math import log, pi
+
+import torch
+from botorch.fit import fit_fully_bayesian_model_nuts, fit_gpytorch_mll
+from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
+from botorch.models.gp_regression import SingleTaskGP
+from botorch.test_utils.mock import mock_optimize
+from botorch.utils.evaluation import AIC, BIC, compute_in_sample_model_fit_metric, MLL
+from botorch.utils.testing import BotorchTestCase
+from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
+
+
+class TestEvaluation(BotorchTestCase):
+    @mock_optimize
+    def test_compute_in_sample_model_fit_metric(self):
+        torch.manual_seed(0)
+        for dtype, model_cls in product(
+            (torch.float, torch.double), (SingleTaskGP, SaasFullyBayesianSingleTaskGP)
+        ):
+            train_X = torch.linspace(
+                0, 1, 10, dtype=dtype, device=self.device
+            ).unsqueeze(-1)
+            train_Y = torch.sin(2 * pi * train_X)
+            model = model_cls(train_X=train_X, train_Y=train_Y)
+            if model_cls is SingleTaskGP:
+                fit_gpytorch_mll(ExactMarginalLogLikelihood(model.likelihood, model))
+            else:
+                fit_fully_bayesian_model_nuts(
+                    model,
+                    warmup_steps=8,
+                    num_samples=6,
+                    thinning=2,
+                    disable_progbar=True,
+                )
+            num_params = sum(p.numel() for p in model.parameters())
+            if model_cls is SaasFullyBayesianSingleTaskGP:
+                num_params /= 3  # divide by number of MCMC samples
+            mll = compute_in_sample_model_fit_metric(model=model, criterion=MLL)
+            aic = compute_in_sample_model_fit_metric(model=model, criterion=AIC)
+            bic = compute_in_sample_model_fit_metric(model=model, criterion=BIC)
+            self.assertEqual(aic, 2 * num_params - 2 * mll)
+            self.assertEqual(bic, log(10) * num_params - 2 * mll)
+        # test invalid criterion
+        with self.assertRaisesRegex(
+            ValueError, "Invalid evaluation criterion invalid."
+        ):
+            compute_in_sample_model_fit_metric(model=model, criterion="invalid")