diff --git a/docs/tutorials/classification.ipynb b/docs/tutorials/classification.ipynb index ab2f22eef..52ec2aeb0 100644 --- a/docs/tutorials/classification.ipynb +++ b/docs/tutorials/classification.ipynb @@ -29,7 +29,7 @@ "\n", "titanic = Table.from_csv_file(\"data/titanic.csv\")\n", "#For visualisation purposes we only print out the first 15 rows.\n", - "titanic.slice_rows(0,15)" + "titanic.slice_rows(0, 15)" ], "metadata": { "collapsed": false @@ -77,7 +77,6 @@ "source": [ "from safeds.data.tabular.transformation import OneHotEncoder\n", "\n", - "old_column_names = train_table.column_names\n", "encoder = OneHotEncoder().fit(train_table, [\"sex\"])" ], "metadata": { @@ -97,18 +96,14 @@ "cell_type": "code", "execution_count": null, "outputs": [], - "source": [ - "transformed_table = encoder.transform(train_table)\n", - "new_column_names = transformed_table.column_names\n", - "new_columns= set(new_column_names) - set(old_column_names)" - ], + "source": "transformed_table = encoder.transform(train_table)", "metadata": { "collapsed": false } }, { "cell_type": "markdown", - "source": "5. Mark the `survived` `Column` as the target variable to be predicted. Use the new names of the fitted `Column`s as features, which will be used to make predictions based on the target variable.", + "source": "5. Mark the `survived` `Column` as the target variable to be predicted. Include some columns only as extra columns, which are completely ignored by the model:", "metadata": { "collapsed": false } @@ -118,9 +113,9 @@ "execution_count": null, "outputs": [], "source": [ - "train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", feature_names=[\n", - " *new_columns\n", - "])" + "extra_names = [\"id\", \"name\", \"ticket\", \"cabin\", \"port_embarked\", \"age\", \"fare\"]\n", + "\n", + "train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", extra_names)" ], "metadata": { "collapsed": false @@ -192,9 +187,7 @@ "encoder = OneHotEncoder().fit(test_table, [\"sex\"])\n", "testing_table = encoder.transform(testing_table)\n", "\n", - "test_tabular_dataset = testing_table.to_tabular_dataset(\"survived\", feature_names=[\n", - " *new_columns\n", - "])\n", + "test_tabular_dataset = testing_table.to_tabular_dataset(\"survived\", extra_names)\n", "fitted_model.accuracy(test_tabular_dataset)\n" ], "metadata": { diff --git a/docs/tutorials/regression.ipynb b/docs/tutorials/regression.ipynb index 364147288..2d5041791 100644 --- a/docs/tutorials/regression.ipynb +++ b/docs/tutorials/regression.ipynb @@ -60,7 +60,7 @@ }, { "cell_type": "markdown", - "source": "3. Mark the `price` `Column` as the target variable to be predicted. Use the new names of the fitted `Column`s as features, which will be used to make predictions based on the target variable.\n", + "source": "3. Mark the `price` `Column` as the target variable to be predicted. Include the `id` column only as an extra column, which is completely ignored by the model:", "metadata": { "collapsed": false } @@ -70,10 +70,9 @@ "execution_count": null, "outputs": [], "source": [ - "feature_columns = set(train_table.column_names) - set([\"price\", \"id\"])\n", + "extra_names = [\"id\"]\n", "\n", - "train_tabular_dataset = train_table.to_tabular_dataset(\"price\", feature_names=[\n", - " *feature_columns])\n" + "train_tabular_dataset = train_table.to_tabular_dataset(\"price\", extra_names)\n" ], "metadata": { "collapsed": false @@ -147,9 +146,7 @@ } ], "source": [ - "test_tabular_dataset = testing_table.to_tabular_dataset(\"price\", feature_names=[\n", - " *feature_columns\n", - "])\n", + "test_tabular_dataset = testing_table.to_tabular_dataset(\"price\", extra_names)\n", "\n", "fitted_model.mean_absolute_error(test_tabular_dataset)\n" ], diff --git a/src/safeds/data/labeled/containers/_tabular_dataset.py b/src/safeds/data/labeled/containers/_tabular_dataset.py index 7f28667dc..81e73bf9b 100644 --- a/src/safeds/data/labeled/containers/_tabular_dataset.py +++ b/src/safeds/data/labeled/containers/_tabular_dataset.py @@ -5,9 +5,6 @@ from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Table -from safeds.exceptions import ( - UnknownColumnNameError, -) if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -22,150 +19,67 @@ class TabularDataset: """ A tabular dataset maps feature columns to a target column. + Create a tabular dataset from a mapping of column names to their values. + Parameters ---------- data: The data. target_name: Name of the target column. - feature_names: - Names of the feature columns. If None, all columns except the target column are used. + extra_names: + Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but + the target column are used as features. Raises ------ ColumnLengthMismatchError If columns have different lengths. ValueError - If the target column is also a feature column. + If the target column is also an extra column. ValueError - If no feature columns are specified. + If no feature columns remains. Examples -------- - >>> from safeds.data.tabular.containers import Table - >>> table = Table({"col1": ["a", "b"], "col2": [1, 2]}) - >>> tabular_dataset = table.to_tabular_dataset("col2", ["col1"]) + >>> from safeds.data.labeled.containers import TabularDataset + >>> dataset = TabularDataset( + ... {"id": [1, 2, 3], "feature": [4, 5, 6], "target": [1, 2, 3]}, + ... target_name="target", + ... extra_names=["id"] + ... ) """ - # ------------------------------------------------------------------------------------------------------------------ - # Creation - # ------------------------------------------------------------------------------------------------------------------ - - @staticmethod - def _from_table( - table: Table, - target_name: str, - feature_names: list[str] | None = None, - ) -> TabularDataset: - """ - Create a tabular dataset from a table. - - Parameters - ---------- - table: - The table. - target_name: - Name of the target column. - feature_names: - Names of the feature columns. If None, all columns except the target column are used. - - Returns - ------- - tabular_dataset: - The created tabular dataset. - - Raises - ------ - UnknownColumnNameError - If target_name matches none of the column names. - ValueError - If the target column is also a feature column. - ValueError - If no feature columns are specified. - - Examples - -------- - >>> from safeds.data.labeled.containers import TabularDataset - >>> from safeds.data.tabular.containers import Table - >>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]}) - >>> tabular_dataset = TabularDataset._from_table(table, "col2", ["col1"]) - """ - table = table._as_table() - if target_name not in table.column_names: - raise UnknownColumnNameError([target_name]) - - # If no feature names are specified, use all columns except the target column - if feature_names is None: - feature_names = table.column_names - feature_names.remove(target_name) - - # Validate inputs - if target_name in feature_names: - raise ValueError(f"Column '{target_name}' cannot be both feature and target.") - if len(feature_names) == 0: - raise ValueError("At least one feature column must be specified.") - - # Create result - result = object.__new__(TabularDataset) - - result._table = table - result._features = table.keep_only_columns(feature_names) - result._target = table.get_column(target_name) - - return result - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ def __init__( self, - data: Mapping[str, Sequence[Any]], + data: Table | Mapping[str, Sequence[Any]], target_name: str, - feature_names: list[str] | None = None, + extra_names: list[str] | None = None, ): - """ - Create a tabular dataset from a mapping of column names to their values. - - Parameters - ---------- - data: - The data. - target_name: - Name of the target column. - feature_names: - Names of the feature columns. If None, all columns except the target column are used. - - Raises - ------ - ColumnLengthMismatchError - If columns have different lengths. - ValueError - If the target column is also a feature column. - ValueError - If no feature columns are specified. - - Examples - -------- - >>> from safeds.data.labeled.containers import TabularDataset - >>> table = TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) - """ - self._table = Table(data) + # Preprocess inputs + if not isinstance(data, Table): + data = Table(data) + if extra_names is None: + extra_names = [] - # If no feature names are specified, use all columns except the target column - if feature_names is None: - feature_names = self._table.column_names - if target_name in feature_names: - feature_names.remove(target_name) + # Derive feature names + feature_names = [name for name in data.column_names if name not in {target_name, *extra_names}] # Validate inputs - if target_name in feature_names: - raise ValueError(f"Column '{target_name}' cannot be both feature and target.") + if target_name in extra_names: + raise ValueError(f"Column '{target_name}' cannot be both target and extra.") if len(feature_names) == 0: - raise ValueError("At least one feature column must be specified.") + raise ValueError("At least one feature column must remain.") - self._features: Table = self._table.keep_only_columns(feature_names) - self._target: Column = self._table.get_column(target_name) + # Set attributes + self._table: Table = data + self._features: Table = data.keep_only_columns(feature_names) + self._target: Column = data.get_column(target_name) + self._extras: Table = data.keep_only_columns(extra_names) def __eq__(self, other: object) -> bool: """ @@ -210,27 +124,22 @@ def __sizeof__(self) -> int: @property def features(self) -> Table: - """ - Get the feature columns of the tabular dataset. - - Returns - ------- - features: - The table containing the feature columns. - """ + """The feature columns of the tabular dataset.""" return self._features @property def target(self) -> Column: + """The target column of the tabular dataset.""" + return self._target + + @property + def extras(self) -> Table: """ - Get the target column of the tabular dataset. + Additional columns of the tabular dataset that are neither features nor target. - Returns - ------- - target: - The target column. + These can be used to store additional information about instances, such as IDs. """ - return self._target + return self._extras # ------------------------------------------------------------------------------------------------------------------ # Conversion diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 3bc04dc64..0d5964d10 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2412,7 +2412,7 @@ def to_rows(self) -> list[Row]: for (_, series_row) in self._data.iterrows() ] - def to_tabular_dataset(self, target_name: str, feature_names: list[str] | None = None) -> TabularDataset: + def to_tabular_dataset(self, target_name: str, extra_names: list[str] | None = None) -> TabularDataset: """ Return a new `TabularDataset` with columns marked as a target column or feature columns. @@ -2422,12 +2422,13 @@ def to_tabular_dataset(self, target_name: str, feature_names: list[str] | None = ---------- target_name: Name of the target column. - feature_names: - Names of the feature columns. If None, all columns except the target column are used. + extra_names: + Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but + the target column are used as features. Returns ------- - tabular_dataset: + dataset: A new tabular dataset with the given target and feature names. Raises @@ -2441,11 +2442,11 @@ def to_tabular_dataset(self, target_name: str, feature_names: list[str] | None = -------- >>> from safeds.data.tabular.containers import Table >>> table = Table({"item": ["apple", "milk", "beer"], "price": [1.10, 1.19, 1.79], "amount_bought": [74, 72, 51]}) - >>> tabular_dataset = table.to_tabular_dataset(target_name="amount_bought", feature_names=["item", "price"]) + >>> dataset = table.to_tabular_dataset(target_name="amount_bought", extra_names=["item"]) """ from safeds.data.labeled.containers import TabularDataset - return TabularDataset._from_table(self, target_name, feature_names) + return TabularDataset(self, target_name, extra_names) # ------------------------------------------------------------------------------------------------------------------ # IPython integration diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index db93b0333..bf71e92e6 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -20,8 +20,6 @@ from pathlib import Path from typing import Any - from safeds.data.labeled.containers import TabularDataset - class TimeSeries(Table): @@ -76,58 +74,6 @@ def timeseries_from_csv_file( feature_names=feature_names, ) - @staticmethod - def _from_tabular_dataset( - tabular_dataset: TabularDataset, - time_name: str, - ) -> TimeSeries: - """Create a time series from a tabular dataset. - - Parameters - ---------- - tabular_dataset: - The tabular dataset. - time_name: - Name of the time column. - - Returns - ------- - time_series: - the created time series - - Raises - ------ - UnknownColumnNameError - If time_name matches none of the column names. - Value Error - If time column is also a feature column - - Examples - -------- - >>> from safeds.data.labeled.containers import TabularDataset - >>> from safeds.data.tabular.containers import Table, TimeSeries - >>> tabular_dataset = TabularDataset({"date": ["01.01", "01.02", "01.03", "01.04"], "col1": ["a", "b", "c", "a"]}, "col1" ) - >>> timeseries = TimeSeries._from_tabular_dataset(tabular_dataset, time_name = "date") - """ - if time_name not in tabular_dataset._table.column_names: - raise UnknownColumnNameError([time_name]) - table = tabular_dataset.to_table() - # make sure that the time_name is not part of the features - result = object.__new__(TimeSeries) - feature_names = tabular_dataset.features.column_names - if time_name in feature_names: - feature_names.remove(time_name) - - if time_name == tabular_dataset.target.name: - raise ValueError(f"Column '{time_name}' cannot be both time column and target.") - - result._data = table._data - result._schema = table.schema - result._time = table.get_column(time_name) - result._features = table.keep_only_columns(feature_names) - result._target = table.get_column(tabular_dataset.target.name) - return result - @staticmethod def _from_table( table: Table, @@ -237,8 +183,8 @@ def __init__( Examples -------- - >>> from safeds.data.labeled.containers import TabularDataset - >>> table = TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) + >>> from safeds.data.tabular.containers import TimeSeries + >>> table = TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", "a") """ import pandas as pd diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 80056b8ba..2f822f9a1 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -161,9 +161,16 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ warnings.filterwarnings("ignore", message="X does not have valid feature names") predicted_target_vector = model.predict(dataset_df.values) result_set[target_name] = predicted_target_vector + + extra_names = [ + column_name + for column_name in dataset.column_names + if column_name != target_name and column_name not in feature_names + ] + return Table._from_pandas_dataframe(result_set).to_tabular_dataset( target_name=target_name, - feature_names=feature_names, + extra_names=extra_names, ) except ValueError as exception: raise PredictionError(str(exception)) from exception diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py index 9743cf874..6c84f2a1a 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py @@ -8,35 +8,39 @@ @pytest.mark.parametrize( ("table1", "table2", "expected"), [ - (TabularDataset({"a": [], "b": []}, "b", ["a"]), TabularDataset({"a": [], "b": []}, "b", ["a"]), True), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [], "b": []}, "b"), + TabularDataset({"a": [], "b": []}, "b"), True, ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + True, + ), + ( + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["b"]), False, ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["d"]), False, ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + TabularDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b"), False, ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + TabularDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b"), False, ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), False, ), ], @@ -61,9 +65,9 @@ def test_should_return_whether_two_tabular_datasets_are_equal( @pytest.mark.parametrize( ("table", "other"), [ - (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), None), - (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), Row()), - (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), Table()), + (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), None), + (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Row()), + (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Table()), ], ids=[ "TabularDataset vs. None", diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_extras.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_extras.py new file mode 100644 index 000000000..001b15524 --- /dev/null +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_extras.py @@ -0,0 +1,41 @@ +import pytest +from safeds.data.labeled.containers import TabularDataset +from safeds.data.tabular.containers import Table + + +@pytest.mark.parametrize( + ("tabular_dataset", "extras"), + [ + ( + TabularDataset( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + ), + Table(), + ), + ( + TabularDataset( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + extra_names=["A", "C"], + ), + Table({"A": [1, 4], "C": [3, 6]}), + ), + ], + ids=[ + "only_target_and_features", + "target_features_and_extras", + ], +) +def test_should_return_features(tabular_dataset: TabularDataset, extras: Table) -> None: + assert tabular_dataset.extras == extras diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_features.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_features.py index dd572a198..446664c36 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_features.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_features.py @@ -27,7 +27,7 @@ "T": [0, 1], }, target_name="T", - feature_names=["A", "C"], + extra_names=["B"], ), Table({"A": [1, 4], "C": [3, 6]}), ), diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_from_table.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_from_table.py deleted file mode 100644 index 10b73579c..000000000 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_from_table.py +++ /dev/null @@ -1,150 +0,0 @@ -import pytest -from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table -from safeds.exceptions import UnknownColumnNameError - - -@pytest.mark.parametrize( - ("table", "target_name", "feature_names", "error", "error_msg"), - [ - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - ["A", "B", "C", "D", "E"], - UnknownColumnNameError, - r"Could not find column\(s\) 'D, E'", - ), - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "D", - ["A", "B", "C"], - UnknownColumnNameError, - r"Could not find column\(s\) 'D'", - ), - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "A", - ["A", "B", "C"], - ValueError, - r"Column 'A' cannot be both feature and target.", - ), - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "A", - [], - ValueError, - r"At least one feature column must be specified.", - ), - ( - Table( - { - "A": [1, 4], - }, - ), - "A", - None, - ValueError, - r"At least one feature column must be specified.", - ), - ], - ids=[ - "feature_does_not_exist", - "target_does_not_exist", - "target_and_feature_overlap", - "features_are_empty-explicitly", - "features_are_empty_implicitly", - ], -) -def test_should_raise_error( - table: Table, - target_name: str, - feature_names: list[str] | None, - error: type[Exception], - error_msg: str, -) -> None: - with pytest.raises(error, match=error_msg): - TabularDataset._from_table(table, target_name=target_name, feature_names=feature_names) - - -@pytest.mark.parametrize( - ("table", "target_name", "feature_names"), - [ - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - ["A", "B", "C"], - ), - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - ["A", "C"], - ), - ( - Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - None, - ), - ], - ids=[ - "create_tabular_dataset", - "tabular_dataset_not_all_columns_are_features", - "tabular_dataset_with_feature_names_as_None", - ], -) -def test_should_create_a_tabular_dataset(table: Table, target_name: str, feature_names: list[str] | None) -> None: - tabular_dataset = TabularDataset._from_table(table, target_name=target_name, feature_names=feature_names) - feature_names = feature_names if feature_names is not None else table.remove_columns([target_name]).column_names - assert isinstance(tabular_dataset, TabularDataset) - assert tabular_dataset._features.column_names == feature_names - assert tabular_dataset._target.name == target_name - assert tabular_dataset._features == table.keep_only_columns(feature_names) - assert tabular_dataset._target == table.get_column(target_name) diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_hash.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_hash.py index 918041a47..e86e5197f 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_hash.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_hash.py @@ -5,14 +5,17 @@ @pytest.mark.parametrize( ("table1", "table2"), [ - (TabularDataset({"a": [], "b": []}, "b", ["a"]), TabularDataset({"a": [], "b": []}, "b", ["a"])), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [], "b": []}, "b"), + TabularDataset({"a": [], "b": []}, "b"), ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + ), + ( + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), + TabularDataset({"a": [1, 1, 3], "b": [4, 5, 6]}, "b"), ), ], ids=[ @@ -29,20 +32,23 @@ def test_should_return_same_hash_for_equal_tabular_datasets(table1: TabularDatas ("table1", "table2"), [ ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["b"]), ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["d"]), ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), - TabularDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset( + {"a": [1, 2, 3], "b": [4, 5, 6]}, + "b", + ), + TabularDataset({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b"), ), ( - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), ), ], ids=[ diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_init.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_init.py index 9e6116e1a..0ac34111e 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_init.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_init.py @@ -5,7 +5,7 @@ @pytest.mark.parametrize( - ("data", "target_name", "feature_names", "error", "error_msg"), + ("data", "target_name", "extra_names", "error", "error_msg"), [ ( { @@ -15,7 +15,7 @@ "T": [0, 1], }, "T", - ["A", "B", "C", "D", "E"], + ["D", "E"], UnknownColumnNameError, r"Could not find column\(s\) 'D, E'", ), @@ -27,7 +27,7 @@ "T": [0, 1], }, "D", - ["A", "B", "C"], + [], UnknownColumnNameError, r"Could not find column\(s\) 'D'", ), @@ -39,9 +39,9 @@ "T": [0, 1], }, "A", - ["A", "B", "C"], + ["A"], ValueError, - r"Column 'A' cannot be both feature and target.", + r"Column 'A' cannot be both target and extra.", ), ( { @@ -50,42 +50,114 @@ "C": [3, 6], "T": [0, 1], }, - "D", - [], + "T", + ["A", "B", "C"], ValueError, - r"At least one feature column must be specified.", + r"At least one feature column must remain.", ), ( { "A": [1, 4], }, "A", - None, + [], + ValueError, + r"At least one feature column must remain.", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "D", + [], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + ["A"], ValueError, - r"At least one feature column must be specified.", + r"Column 'A' cannot be both target and extra.", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["A", "B", "C"], + ValueError, + r"At least one feature column must remain.", + ), + ( + Table( + { + "A": [1, 4], + }, + ), + "A", + [], + ValueError, + r"At least one feature column must remain.", ), ], ids=[ - "feature_does_not_exist", - "target_does_not_exist", - "target_and_feature_overlap", - "features_are_empty-explicitly", - "features_are_empty_implicitly", + "dict_extra_does_not_exist", + "dict_target_does_not_exist", + "dict_target_and_extra_overlap", + "dict_features_are_empty_explicitly", + "dict_features_are_empty_implicitly", + "table_extra_does_not_exist", + "table_target_does_not_exist", + "table_target_and_extra_overlap", + "table_features_are_empty_explicitly", + "table_features_are_empty_implicitly", ], ) def test_should_raise_error( data: dict[str, list[int]], target_name: str, - feature_names: list[str] | None, + extra_names: list[str] | None, error: type[Exception], error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): - TabularDataset(data, target_name=target_name, feature_names=feature_names) + TabularDataset(data, target_name=target_name, extra_names=extra_names) @pytest.mark.parametrize( - ("data", "target_name", "feature_names"), + ("data", "target_name", "extra_names"), [ ( { @@ -95,7 +167,7 @@ def test_should_raise_error( "T": [0, 1], }, "T", - ["A", "B", "C"], + [], ), ( { @@ -117,24 +189,66 @@ def test_should_raise_error( "T", None, ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + [], + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["A", "C"], + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + None, + ), ], ids=[ - "create_tabular_dataset", - "tabular_dataset_not_all_columns_are_features", - "tabular_dataset_with_feature_names_as_None", + "dict_create_tabular_dataset", + "dict_tabular_dataset_not_all_columns_are_features", + "dict_tabular_dataset_with_extra_names_as_None", + "table_create_tabular_dataset", + "table_tabular_dataset_not_all_columns_are_features", + "table_tabular_dataset_with_extra_names_as_None", ], ) def test_should_create_a_tabular_dataset( - data: dict[str, list[int]], + data: Table | dict[str, list[int]], target_name: str, - feature_names: list[str] | None, + extra_names: list[str] | None, ) -> None: - tabular_dataset = TabularDataset(data, target_name=target_name, feature_names=feature_names) - if feature_names is None: - feature_names = list(data.keys()) - feature_names.remove(target_name) + tabular_dataset = TabularDataset(data, target_name=target_name, extra_names=extra_names) + if not isinstance(data, Table): + data = Table(data) + + if extra_names is None: + extra_names = [] + assert isinstance(tabular_dataset, TabularDataset) - assert tabular_dataset._features.column_names == feature_names + assert tabular_dataset._extras.column_names == extra_names assert tabular_dataset._target.name == target_name - assert tabular_dataset._features == Table(data).keep_only_columns(feature_names) - assert tabular_dataset._target == Table(data).get_column(target_name) + assert tabular_dataset._extras == data.keep_only_columns(extra_names) + assert tabular_dataset._target == data.get_column(target_name) diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py index bc35f883c..a2a217401 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py @@ -4,7 +4,7 @@ @pytest.mark.parametrize( - ("data", "target_name", "feature_names"), + ("data", "target_name", "extra_names"), [ ( { @@ -14,7 +14,7 @@ "T": [0, 1], }, "T", - ["A", "B", "C"], + [], ), ], ids=[ @@ -24,8 +24,8 @@ def test_should_create_dataloader( data: dict[str, list[int]], target_name: str, - feature_names: list[str] | None, + extra_names: list[str] | None, ) -> None: - tabular_dataset = Table.from_dict(data).to_tabular_dataset(target_name, feature_names) + tabular_dataset = Table.from_dict(data).to_tabular_dataset(target_name, extra_names) data_loader = tabular_dataset._into_dataloader_with_classes(1, 2) assert isinstance(data_loader, DataLoader) diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_sizeof.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_sizeof.py index aeae315e0..a7097deec 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_sizeof.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_sizeof.py @@ -14,7 +14,6 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"], ), TabularDataset( { @@ -24,10 +23,10 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"], + ["other"], ), ], - ids=["normal", "table_with_column_as_non_feature"], + ids=["normal", "table_with_extra_column"], ) def test_should_size_be_greater_than_normal_object(tabular_dataset: TabularDataset) -> None: assert sys.getsizeof(tabular_dataset) > sys.getsizeof(object()) diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_to_table.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_to_table.py index accf240b3..71e9d5db8 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_to_table.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_to_table.py @@ -14,7 +14,6 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"], ), Table( { @@ -33,7 +32,7 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"], + ["other"], ), Table( { @@ -45,7 +44,7 @@ ), ), ], - ids=["normal", "table_with_column_as_non_feature"], + ids=["normal", "table_with_extra_column"], ) def test_should_return_table(tabular_dataset: TabularDataset, expected: Table) -> None: table = tabular_dataset.to_table() diff --git a/tests/safeds/data/tabular/containers/_time_series/test_eq.py b/tests/safeds/data/tabular/containers/_time_series/test_eq.py index bde59d432..0e39f828f 100644 --- a/tests/safeds/data/tabular/containers/_time_series/test_eq.py +++ b/tests/safeds/data/tabular/containers/_time_series/test_eq.py @@ -87,7 +87,7 @@ def test_should_return_true_if_objects_are_identical(table1: TimeSeries) -> None (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), Table()), ( TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), - TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), + TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), ), ], ids=[ diff --git a/tests/safeds/data/tabular/containers/_time_series/test_from_tagged_table.py b/tests/safeds/data/tabular/containers/_time_series/test_from_tagged_table.py deleted file mode 100644 index 0cc96a575..000000000 --- a/tests/safeds/data/tabular/containers/_time_series/test_from_tagged_table.py +++ /dev/null @@ -1,199 +0,0 @@ -import pytest -from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table, TimeSeries -from safeds.exceptions import UnknownColumnNameError - - -@pytest.mark.parametrize( - ("table", "target_name", "time_name", "feature_names", "error", "error_msg"), - [ - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - "time", - ["A", "B", "C", "D", "E"], - UnknownColumnNameError, - r"Could not find column\(s\) 'D, E'", - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "D", - "time", - ["A", "B", "C"], - UnknownColumnNameError, - r"Could not find column\(s\) 'D'", - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "A", - "time", - ["A", "B", "C"], - ValueError, - r"Column 'A' cannot be both feature and target.", - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "A", - "time", - [], - ValueError, - r"At least one feature column must be specified.", - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "time", - "time", - ["A", "B", "C"], - ValueError, - r"Column 'time' cannot be both time column and target.", - ), - ( - Table( - { - "r": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - "time", - ["A", "B", "C"], - UnknownColumnNameError, - r"Could not find column\(s\) 'time'", - ), - ], - ids=[ - "feature_does_not_exist", - "target_does_not_exist", - "target_and_feature_overlap", - "features_are_empty-explicitly", - "time_name_is_target", - "time_does_not_exist", - ], -) -def test_should_raise_error( - table: Table, - target_name: str, - time_name: str, - feature_names: list[str] | None, - error: type[Exception], - error_msg: str, -) -> None: - with pytest.raises(error, match=error_msg): - TimeSeries._from_tabular_dataset( - TabularDataset._from_table(table, target_name=target_name, feature_names=feature_names), - time_name=time_name, - ) - - -@pytest.mark.parametrize( - ("table", "target_name", "time_name", "feature_names"), - [ - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - "time", - ["A", "B", "C"], - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - "time", - ["A", "C"], - ), - ( - Table( - { - "time": [0, 1], - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ), - "T", - "time", - None, - ), - ], - ids=[ - "create_tabular_dataset", - "tabular_dataset_not_all_columns_are_features", - "tabular_dataset_with_feature_names_as_None", - ], -) -def test_should_create_a_time_series( - table: Table, - target_name: str, - time_name: str, - feature_names: list[str] | None, -) -> None: - tabular_dataset = TabularDataset._from_table(table, target_name=target_name, feature_names=feature_names) - time_series = TimeSeries._from_tabular_dataset(tabular_dataset, time_name=time_name) - feature_names = ( - feature_names if feature_names is not None else table.remove_columns([target_name, time_name]).column_names - ) - assert isinstance(time_series, TimeSeries) - assert time_series._features.column_names == feature_names - assert time_series._target.name == target_name - assert time_series._features == table.keep_only_columns(feature_names) - assert time_series._target == table.get_column(target_name) - assert time_series.time == table.get_column(time_name) diff --git a/tests/safeds/ml/classical/classification/test_ada_boost.py b/tests/safeds/ml/classical/classification/test_ada_boost.py index 22bf98567..91fde488b 100644 --- a/tests/safeds/ml/classical/classification/test_ada_boost.py +++ b/tests/safeds/ml/classical/classification/test_ada_boost.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestLearner: diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index a08ae7b5f..5d8803c8a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -63,7 +63,7 @@ def valid_data() -> TabularDataset: "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]) + ).to_tabular_dataset(target_name="target", extra_names=["id"]) @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) @@ -93,7 +93,7 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), NonNumericColumnError, ( r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou" @@ -110,7 +110,7 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), MissingValuesColumnError, ( r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou" @@ -127,7 +127,7 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi "feat2": [], "target": [], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), DatasetMissesDataError, r"Dataset contains no rows", ), diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting.py b/tests/safeds/ml/classical/classification/test_gradient_boosting.py index 5306daa23..c48ecd15d 100644 --- a/tests/safeds/ml/classical/classification/test_gradient_boosting.py +++ b/tests/safeds/ml/classical/classification/test_gradient_boosting.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfTrees: diff --git a/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py b/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py index b2f17dda6..775ceb51a 100644 --- a/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfNeighbors: diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index 6edee8743..14e87e6a0 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfTrees: diff --git a/tests/safeds/ml/classical/classification/test_support_vector_machine.py b/tests/safeds/ml/classical/classification/test_support_vector_machine.py index 21df23650..2a19fd80a 100644 --- a/tests/safeds/ml/classical/classification/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/classification/test_support_vector_machine.py @@ -31,7 +31,7 @@ def kernels() -> list[SupportVectorMachineKernel]: @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestC: diff --git a/tests/safeds/ml/classical/regression/test_ada_boost.py b/tests/safeds/ml/classical/regression/test_ada_boost.py index fb0b50989..44cfcbd83 100644 --- a/tests/safeds/ml/classical/regression/test_ada_boost.py +++ b/tests/safeds/ml/classical/regression/test_ada_boost.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestLearner: diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py index 1c8041a32..66f10699d 100644 --- a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py +++ b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestAlpha: diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting.py b/tests/safeds/ml/classical/regression/test_gradient_boosting.py index 931493dc6..f1ef8549d 100644 --- a/tests/safeds/ml/classical/regression/test_gradient_boosting.py +++ b/tests/safeds/ml/classical/regression/test_gradient_boosting.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfTrees: diff --git a/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py b/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py index e2578db2a..a01e27f0b 100644 --- a/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfNeighbors: diff --git a/tests/safeds/ml/classical/regression/test_lasso_regression.py b/tests/safeds/ml/classical/regression/test_lasso_regression.py index 1a9a7d191..90d771b16 100644 --- a/tests/safeds/ml/classical/regression/test_lasso_regression.py +++ b/tests/safeds/ml/classical/regression/test_lasso_regression.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestAlpha: diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index cb35759a5..2f5f97579 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestNumberOfTrees: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index ca0759119..7d2a2f5b4 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -74,7 +74,7 @@ def valid_data() -> TabularDataset: "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]) + ).to_tabular_dataset(target_name="target", extra_names=["id"]) @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) @@ -104,7 +104,7 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), NonNumericColumnError, r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}", ), @@ -116,7 +116,7 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt "feat2": [3, 6], "target": [0, 1], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), MissingValuesColumnError, r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}", ), @@ -128,7 +128,7 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt "feat2": [], "target": [], }, - ).to_tabular_dataset(target_name="target", feature_names=["feat1", "feat2"]), + ).to_tabular_dataset(target_name="target", extra_names=["id"]), DatasetMissesDataError, r"Dataset contains no rows", ), diff --git a/tests/safeds/ml/classical/regression/test_ridge_regression.py b/tests/safeds/ml/classical/regression/test_ridge_regression.py index c30e17e31..3dd2054ce 100644 --- a/tests/safeds/ml/classical/regression/test_ridge_regression.py +++ b/tests/safeds/ml/classical/regression/test_ridge_regression.py @@ -8,7 +8,7 @@ @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestAlpha: diff --git a/tests/safeds/ml/classical/regression/test_support_vector_machine.py b/tests/safeds/ml/classical/regression/test_support_vector_machine.py index 6ed483b9d..a2015964c 100644 --- a/tests/safeds/ml/classical/regression/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/regression/test_support_vector_machine.py @@ -31,7 +31,7 @@ def kernels() -> list[SupportVectorMachineKernel]: @pytest.fixture() def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1", feature_names=["col2"]) + return table.to_tabular_dataset(target_name="col1") class TestC: