From e9f1cfb72bfc952d7be149056dc1d6939e443d88 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 1 May 2024 21:34:54 +0200 Subject: [PATCH] feat: allow using tables that already contain target for prediction (#687) Closes #636 ### Summary of Changes No longer raise an error if a table that already contains the target is passed to `predict`. It's now simply ignored for training and overwritten. --- src/safeds/exceptions/__init__.py | 3 --- src/safeds/exceptions/_ml.py | 14 -------------- src/safeds/ml/classical/_util_sklearn.py | 5 ----- .../ml/classical/classification/_ada_boost.py | 2 -- .../ml/classical/classification/_classifier.py | 2 -- .../ml/classical/classification/_decision_tree.py | 2 -- .../classical/classification/_gradient_boosting.py | 2 -- .../classification/_k_nearest_neighbors.py | 2 -- .../classification/_logistic_regression.py | 2 -- .../ml/classical/classification/_random_forest.py | 2 -- .../classification/_support_vector_machine.py | 2 -- src/safeds/ml/classical/regression/_ada_boost.py | 2 -- .../ml/classical/regression/_decision_tree.py | 2 -- .../regression/_elastic_net_regression.py | 2 -- .../ml/classical/regression/_gradient_boosting.py | 2 -- .../classical/regression/_k_nearest_neighbors.py | 2 -- .../ml/classical/regression/_lasso_regression.py | 2 -- .../ml/classical/regression/_linear_regression.py | 2 -- .../ml/classical/regression/_random_forest.py | 2 -- src/safeds/ml/classical/regression/_regressor.py | 2 -- .../ml/classical/regression/_ridge_regression.py | 2 -- .../regression/_support_vector_machine.py | 2 -- .../ml/classical/classification/test_classifier.py | 6 ------ .../ml/classical/regression/test_regressor.py | 6 ------ 24 files changed, 72 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 4facf891a..3d50cad65 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -29,7 +29,6 @@ OutOfBoundsError, ) from safeds.exceptions._ml import ( - DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, FeatureDataMismatchError, @@ -63,7 +62,6 @@ "ValueNotPresentWhenFittedError": "._data:ValueNotPresentWhenFittedError", "WrongFileExtensionError": "._data:WrongFileExtensionError", # ML exceptions - "DatasetContainsTargetError": "._ml:DatasetContainsTargetError", "DatasetMissesDataError": "._ml:DatasetMissesDataError", "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError", "FeatureDataMismatchError": "._ml:FeatureDataMismatchError", @@ -100,7 +98,6 @@ "ValueNotPresentWhenFittedError", "WrongFileExtensionError", # ML exceptions - "DatasetContainsTargetError", "DatasetMissesDataError", "DatasetMissesFeaturesError", "FeatureDataMismatchError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index d44606d55..5cdb20c92 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -1,17 +1,3 @@ -class DatasetContainsTargetError(ValueError): - """ - Raised when a dataset contains the target column already. - - Parameters - ---------- - target_name: - The name of the target column. - """ - - def __init__(self, target_name: str): - super().__init__(f"Dataset already contains the target column '{target_name}'.") - - class DatasetMissesFeaturesError(ValueError): """ Raised when a dataset misses feature columns. diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 2f822f9a1..12025d585 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -4,7 +4,6 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table from safeds.exceptions import ( - DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, LearningError, @@ -102,8 +101,6 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError @@ -118,8 +115,6 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ # Validation if model is None or target_name is None or feature_names is None: raise ModelNotFittedError - if dataset.has_column(target_name): - raise DatasetContainsTargetError(target_name) missing_feature_names = [feature_name for feature_name in feature_names if not dataset.has_column(feature_name)] if missing_feature_names: raise DatasetMissesFeaturesError(missing_feature_names) diff --git a/src/safeds/ml/classical/classification/_ada_boost.py b/src/safeds/ml/classical/classification/_ada_boost.py index 59ac0987f..d251e542c 100644 --- a/src/safeds/ml/classical/classification/_ada_boost.py +++ b/src/safeds/ml/classical/classification/_ada_boost.py @@ -170,8 +170,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 2e0f9cabf..1ad5792b3 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -70,8 +70,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_decision_tree.py b/src/safeds/ml/classical/classification/_decision_tree.py index 1b52d88ac..0bb5fe014 100644 --- a/src/safeds/ml/classical/classification/_decision_tree.py +++ b/src/safeds/ml/classical/classification/_decision_tree.py @@ -84,8 +84,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py index a8ac994a9..56545d345 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting.py @@ -141,8 +141,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py index c01412425..82c6cf920 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py @@ -134,8 +134,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_logistic_regression.py b/src/safeds/ml/classical/classification/_logistic_regression.py index c8243cfe8..c3e0b09d0 100644 --- a/src/safeds/ml/classical/classification/_logistic_regression.py +++ b/src/safeds/ml/classical/classification/_logistic_regression.py @@ -84,8 +84,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_random_forest.py b/src/safeds/ml/classical/classification/_random_forest.py index d3882e81a..8f6ba7247 100644 --- a/src/safeds/ml/classical/classification/_random_forest.py +++ b/src/safeds/ml/classical/classification/_random_forest.py @@ -120,8 +120,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/classification/_support_vector_machine.py b/src/safeds/ml/classical/classification/_support_vector_machine.py index 347eef2a6..6890ebfd7 100644 --- a/src/safeds/ml/classical/classification/_support_vector_machine.py +++ b/src/safeds/ml/classical/classification/_support_vector_machine.py @@ -245,8 +245,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_ada_boost.py b/src/safeds/ml/classical/regression/_ada_boost.py index d65c7901a..dd27e266d 100644 --- a/src/safeds/ml/classical/regression/_ada_boost.py +++ b/src/safeds/ml/classical/regression/_ada_boost.py @@ -170,8 +170,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_decision_tree.py b/src/safeds/ml/classical/regression/_decision_tree.py index 0261d744f..2cd8066f9 100644 --- a/src/safeds/ml/classical/regression/_decision_tree.py +++ b/src/safeds/ml/classical/regression/_decision_tree.py @@ -84,8 +84,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_elastic_net_regression.py b/src/safeds/ml/classical/regression/_elastic_net_regression.py index 65e5e33f3..125f49e7a 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regression.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regression.py @@ -171,8 +171,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py index 55e67a442..34ec419ab 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting.py @@ -141,8 +141,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py index 0ffcc5d51..8a96b3a62 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py @@ -136,8 +136,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_lasso_regression.py b/src/safeds/ml/classical/regression/_lasso_regression.py index 660d8960a..e912c4aba 100644 --- a/src/safeds/ml/classical/regression/_lasso_regression.py +++ b/src/safeds/ml/classical/regression/_lasso_regression.py @@ -125,8 +125,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_linear_regression.py b/src/safeds/ml/classical/regression/_linear_regression.py index da5dee320..101fec7a5 100644 --- a/src/safeds/ml/classical/regression/_linear_regression.py +++ b/src/safeds/ml/classical/regression/_linear_regression.py @@ -84,8 +84,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_random_forest.py b/src/safeds/ml/classical/regression/_random_forest.py index eb44f5b49..c595c5e7d 100644 --- a/src/safeds/ml/classical/regression/_random_forest.py +++ b/src/safeds/ml/classical/regression/_random_forest.py @@ -115,8 +115,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 088b4cfef..618f68ce7 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -68,8 +68,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_ridge_regression.py b/src/safeds/ml/classical/regression/_ridge_regression.py index 9ee4bfdd2..de1d5cfc1 100644 --- a/src/safeds/ml/classical/regression/_ridge_regression.py +++ b/src/safeds/ml/classical/regression/_ridge_regression.py @@ -126,8 +126,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/src/safeds/ml/classical/regression/_support_vector_machine.py b/src/safeds/ml/classical/regression/_support_vector_machine.py index ab92a72d3..c1425530c 100644 --- a/src/safeds/ml/classical/regression/_support_vector_machine.py +++ b/src/safeds/ml/classical/regression/_support_vector_machine.py @@ -245,8 +245,6 @@ def predict(self, dataset: Table) -> TabularDataset: ------ ModelNotFittedError If the model has not been fitted yet. - DatasetContainsTargetError - If the dataset contains the target column already. DatasetMissesFeaturesError If the dataset misses feature columns. PredictionError diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 5d8803c8a..5cba32cc7 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -6,7 +6,6 @@ import pytest from safeds.data.tabular.containers import Table from safeds.exceptions import ( - DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, MissingValuesColumnError, @@ -190,11 +189,6 @@ def test_should_raise_if_not_fitted(self, classifier: Classifier, valid_data: Ta with pytest.raises(ModelNotFittedError): classifier.predict(valid_data.features) - def test_should_raise_if_dataset_contains_target(self, classifier: Classifier, valid_data: TabularDataset) -> None: - fitted_classifier = classifier.fit(valid_data) - with pytest.raises(DatasetContainsTargetError, match="target"): - fitted_classifier.predict(valid_data.to_table()) - def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, valid_data: TabularDataset) -> None: fitted_classifier = classifier.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 7d2a2f5b4..1a073883f 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -8,7 +8,6 @@ from safeds.data.tabular.containers import Column, Table from safeds.exceptions import ( ColumnLengthMismatchError, - DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, MissingValuesColumnError, @@ -191,11 +190,6 @@ def test_should_raise_if_not_fitted(self, regressor: Regressor, valid_data: Tabu with pytest.raises(ModelNotFittedError): regressor.predict(valid_data.features) - def test_should_raise_if_dataset_contains_target(self, regressor: Regressor, valid_data: TabularDataset) -> None: - fitted_regressor = regressor.fit(valid_data) - with pytest.raises(DatasetContainsTargetError, match="target"): - fitted_regressor.predict(valid_data.to_table()) - def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, valid_data: TabularDataset) -> None: fitted_regressor = regressor.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"):