Skip to content

Commit c69800d

Browse files
eddiebergmandependabot[bot]
andauthoredJun 17, 2022
Remove references to validation set in evaluator (#1517)
* Init commit * Fix logging server cleanup (#1503) * Fix logging server cleanup * Add comment relating to the `try: finally:` * Remove nested try: except: from `fit` * Bump peter-evans/find-comment from 1 to 2 (#1520) Bumps [peter-evans/find-comment](https://github.com/peter-evans/find-comment) from 1 to 2. - [Release notes](https://github.com/peter-evans/find-comment/releases) - [Commits](peter-evans/find-comment@v1...v2) --- updated-dependencies: - dependency-name: peter-evans/find-comment dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump actions/stale from 4 to 5 (#1521) Bumps [actions/stale](https://github.com/actions/stale) from 4 to 5. - [Release notes](https://github.com/actions/stale/releases) - [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md) - [Commits](actions/stale@v4...v5) --- updated-dependencies: - dependency-name: actions/stale dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Init commit * Update evaluation module * Clean up other occurences of the word validation * Re-add test for test predictions Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1 parent 9002fca commit c69800d

9 files changed

+88
-418
lines changed
 

‎autosklearn/automl.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def __init__(
244244

245245
if isinstance(disable_evaluator_output, Iterable):
246246
disable_evaluator_output = list(disable_evaluator_output) # Incase iterator
247-
allowed = set(["model", "cv_model", "y_optimization", "y_test", "y_valid"])
247+
allowed = set(["model", "cv_model", "y_optimization", "y_test"])
248248
unknown = allowed - set(disable_evaluator_output)
249249
if any(unknown):
250250
raise ValueError(

‎autosklearn/estimators.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def __init__(
262262
list are:
263263
264264
* ``'y_optimization'`` : do not save the predictions for the
265-
optimization/validation set, which would later on be used to build
266-
an ensemble.
265+
optimization set, which would later on be used to build an ensemble.
266+
267267
* ``model`` : do not save any model files
268268
269269
smac_scenario_args : dict, optional (None)

‎autosklearn/evaluation/__init__.py

+1-23
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,7 @@ def __init__(
230230
self.memory_limit = memory_limit
231231

232232
dm = self.backend.load_datamanager()
233-
if "X_valid" in dm.data and "Y_valid" in dm.data:
234-
self._get_validation_loss = True
235-
else:
236-
self._get_validation_loss = False
237-
if "X_test" in dm.data and "Y_test" in dm.data:
238-
self._get_test_loss = True
239-
else:
240-
self._get_test_loss = False
233+
self._get_test_loss = "X_test" in dm.data and "Y_test" in dm.data
241234

242235
self.port = port
243236
self.pynisher_context = pynisher_context
@@ -533,21 +526,6 @@ def run(
533526
additional_run_info["train_learning_curve"] = train_learning_curve
534527
additional_run_info["learning_curve_runtime"] = learning_curve_runtime
535528

536-
if self._get_validation_loss:
537-
validation_learning_curve = (
538-
autosklearn.evaluation.util.extract_learning_curve(
539-
info,
540-
"validation_loss",
541-
)
542-
)
543-
if len(validation_learning_curve) > 1:
544-
additional_run_info[
545-
"validation_learning_curve"
546-
] = validation_learning_curve
547-
additional_run_info[
548-
"learning_curve_runtime"
549-
] = learning_curve_runtime
550-
551529
if self._get_test_loss:
552530
test_learning_curve = (
553531
autosklearn.evaluation.util.extract_learning_curve(

‎autosklearn/evaluation/abstract_evaluator.py

+14-51
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,6 @@ def __init__(
220220
self.include = include
221221
self.exclude = exclude
222222

223-
self.X_valid = self.datamanager.data.get("X_valid")
224-
self.y_valid = self.datamanager.data.get("Y_valid")
225223
self.X_test = self.datamanager.data.get("X_test")
226224
self.y_test = self.datamanager.data.get("Y_test")
227225

@@ -359,7 +357,6 @@ def finish_up(
359357
loss: Union[Dict[str, float], float],
360358
train_loss: Optional[Dict[str, float]],
361359
opt_pred: np.ndarray,
362-
valid_pred: np.ndarray,
363360
test_pred: np.ndarray,
364361
additional_run_info: Optional[TYPE_ADDITIONAL_INFO],
365362
file_output: bool,
@@ -382,19 +379,12 @@ def finish_up(
382379
self.duration = time.time() - self.starttime
383380

384381
if file_output:
385-
file_out_loss, additional_run_info_ = self.file_output(
386-
opt_pred,
387-
valid_pred,
388-
test_pred,
389-
)
382+
file_out_loss, additional_run_info_ = self.file_output(opt_pred, test_pred)
390383
else:
391384
file_out_loss = None
392385
additional_run_info_ = {}
393386

394-
validation_loss, test_loss = self.calculate_auxiliary_losses(
395-
valid_pred,
396-
test_pred,
397-
)
387+
test_loss = self.calculate_auxiliary_losses(test_pred)
398388

399389
if file_out_loss is not None:
400390
return self.duration, file_out_loss, self.seed, additional_run_info_
@@ -424,8 +414,6 @@ def finish_up(
424414
additional_run_info["train_loss"] = [
425415
train_loss[metric.name] for metric in self.metrics
426416
]
427-
if validation_loss is not None:
428-
additional_run_info["validation_loss"] = validation_loss
429417
if test_loss is not None:
430418
additional_run_info["test_loss"] = test_loss
431419

@@ -442,41 +430,22 @@ def finish_up(
442430

443431
def calculate_auxiliary_losses(
444432
self,
445-
Y_valid_pred: np.ndarray,
446-
Y_test_pred: np.ndarray,
447-
) -> Tuple[Optional[float | Sequence[float]], Optional[float | Sequence[float]]]:
448-
if Y_valid_pred is not None:
449-
if self.y_valid is not None:
450-
validation_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
451-
self.y_valid, Y_valid_pred
452-
)
453-
if len(self.metrics) == 1:
454-
validation_loss = validation_loss[self.metrics[0].name]
455-
else:
456-
validation_loss = None
457-
else:
458-
validation_loss = None
433+
Y_test_pred: np.ndarray | None,
434+
) -> float | dict[str, float] | None:
435+
if Y_test_pred is None or self.y_test is None:
436+
return None
459437

460-
if Y_test_pred is not None:
461-
if self.y_test is not None:
462-
test_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
463-
self.y_test, Y_test_pred
464-
)
465-
if len(self.metrics) == 1:
466-
test_loss = test_loss[self.metrics[0].name]
467-
else:
468-
test_loss = None
469-
else:
470-
test_loss = None
438+
test_loss = self._loss(self.y_test, Y_test_pred)
439+
if len(self.metrics) == 1:
440+
test_loss = test_loss[self.metrics[0].name]
471441

472-
return validation_loss, test_loss
442+
return test_loss
473443

474444
def file_output(
475445
self,
476446
Y_optimization_pred: np.ndarray,
477-
Y_valid_pred: np.ndarray,
478447
Y_test_pred: np.ndarray,
479-
) -> Tuple[Optional[float], Dict[str, Union[str, int, float, List, Dict, Tuple]]]:
448+
) -> tuple[float | None, dict[str, Any]]:
480449
# Abort if self.Y_optimization is None
481450
# self.Y_optimization can be None if we use partial-cv, then,
482451
# obviously no output should be saved.
@@ -496,12 +465,7 @@ def file_output(
496465
)
497466

498467
# Abort if predictions contain NaNs
499-
for y, s in [
500-
# Y_train_pred deleted here. Fix unittest accordingly.
501-
[Y_optimization_pred, "optimization"],
502-
[Y_valid_pred, "validation"],
503-
[Y_test_pred, "test"],
504-
]:
468+
for y, s in [(Y_optimization_pred, "optimization"), (Y_test_pred, "test")]:
505469
if y is not None and not np.all(np.isfinite(y)):
506470
return (
507471
1.0,
@@ -553,14 +517,13 @@ def file_output(
553517
budget=self.budget,
554518
model=self.model if "model" not in self.disable_file_output else None,
555519
cv_model=models if "cv_model" not in self.disable_file_output else None,
520+
# TODO: below line needs to be deleted once backend is updated
521+
valid_predictions=None,
556522
ensemble_predictions=(
557523
Y_optimization_pred
558524
if "y_optimization" not in self.disable_file_output
559525
else None
560526
),
561-
valid_predictions=(
562-
Y_valid_pred if "y_valid" not in self.disable_file_output else None
563-
),
564527
test_predictions=(
565528
Y_test_pred if "y_test" not in self.disable_file_output else None
566529
),

‎autosklearn/evaluation/test_evaluator.py

-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def fit_predict_and_loss(self) -> None:
6767
loss=loss,
6868
train_loss=None,
6969
opt_pred=Y_pred,
70-
valid_pred=None,
7170
test_pred=None,
7271
file_output=False,
7372
final_call=True,
@@ -78,7 +77,6 @@ def fit_predict_and_loss(self) -> None:
7877
def predict_and_loss(
7978
self, train: bool = False
8079
) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]:
81-
8280
if train:
8381
Y_pred = self.predict_function(
8482
self.X_train, self.model, self.task_type, self.Y_train

0 commit comments

Comments
 (0)
Please sign in to comment.