From b8949420e2c018fb686f971559ef98ed68209db4 Mon Sep 17 00:00:00 2001 From: Antonio Carlos <falcaopetri@gmail.com> Date: Sun, 17 Oct 2021 14:37:23 -0300 Subject: [PATCH 01/12] Use new transformer.get_feature_names_out function - Prefer `estimator.get_feature_names_out()` over `estimator.get_features_names()` - Configure nox to run tests with both scikit-learn 0.23 and 1.0 --- noxfile.py | 4 +++- sklearn_pandas/dataframe_mapper.py | 12 +++++++----- tests/test_dataframe_mapper.py | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index ee6f1e7..6ab16a7 100644 --- a/noxfile.py +++ b/noxfile.py @@ -10,13 +10,15 @@ def lint(session): @nox.session @nox.parametrize('numpy', ['1.18.1', '1.19.4', '1.20.1']) +@nox.parametrize('sklearn', ['0.23.0', '0.24.2', '1.0']) @nox.parametrize('scipy', ['1.5.4', '1.6.0']) @nox.parametrize('pandas', ['1.1.4', '1.2.2']) -def tests(session, numpy, scipy, pandas): +def tests(session, numpy, sklearn, scipy, pandas): session.install('pytest>=5.3.5', 'setuptools>=45.2', 'wheel>=0.34.2', f'numpy=={numpy}', + f'scikit-learn=={sklearn}', f'scipy=={scipy}', f'pandas=={pandas}' ) diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index ca4568e..35a4320 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -47,6 +47,8 @@ def _get_feature_names(estimator): """ if hasattr(estimator, 'classes_'): return estimator.classes_ + elif hasattr(estimator, 'get_feature_names_out'): + return estimator.get_feature_names_out() elif hasattr(estimator, 'get_feature_names'): return estimator.get_feature_names() return None @@ -290,11 +292,11 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', else: names = _get_feature_names(transformer) - if names is not None and len(names) == num_cols: - output = [f"{name}_{o}" for o in names] - # otherwise, return name concatenated with '_1', '_2', etc. - else: - output = [name + '_' + str(o) for o in range(num_cols)] + if names is None or len(names) != num_cols: + # return name concatenated with '_0', '_1', etc. + names = range(num_cols) + + output = [f"{name}_{o}" for o in names] else: output = [name] diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py index 7e75f1b..29ff619 100644 --- a/tests/test_dataframe_mapper.py +++ b/tests/test_dataframe_mapper.py @@ -344,6 +344,24 @@ def test_onehot_df(): assert cols[3] == 'target_x0_3' +def test_onehot_2cols_df(): + """ + Check level ids from one-hot when mapping 2 columns + """ + df = pd.DataFrame({ + 'col': [0, 0, 1, 1, 2, 3, 0], + 'target': [0, 0, 1, 1, 2, 3, 0] + }) + mapper = DataFrameMapper([ + (['col', 'target'], OneHotEncoder()) + ], df_out=True) + transformed = mapper.fit_transform(df) + cols = transformed.columns + assert len(cols) == 8 + assert cols[0] == 'col_target_x0_0' + assert cols[4] == 'col_target_x1_0' + + def test_customtransform_df(): """ Check level ids from a transformer in which From bc5a5f403d5b067c75dc1f65ccf4ba0024edfcae Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 19:42:02 -0700 Subject: [PATCH 02/12] Included input_features as one of the parameter * changed the order to use get_features_names_out as the first option . Otherwise fallback on classes_ * Passing input_features --- .gitignore | 1 + .tool-versions | 2 ++ README.rst | 8 ++++- pyproject.toml | 20 +++++++++++ setup.py | 7 ++-- sklearn_pandas/__init__.py | 4 +-- sklearn_pandas/dataframe_mapper.py | 22 ++++++------ sklearn_pandas/transformers.py | 55 ------------------------------ tests/test_transformers.py | 47 ------------------------- 9 files changed, 48 insertions(+), 118 deletions(-) create mode 100644 .tool-versions create mode 100644 pyproject.toml delete mode 100644 sklearn_pandas/transformers.py delete mode 100644 tests/test_transformers.py diff --git a/.gitignore b/.gitignore index 250c89a..1e83d4e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist/ .cache/ .idea/ .pytest_cache/ +.DS_Store diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..6581a35 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,2 @@ +python 3.9.10 +poetry 1.1.12 diff --git a/README.rst b/README.rst index 681ae7c..1f5003b 100644 --- a/README.rst +++ b/README.rst @@ -194,7 +194,7 @@ In some situations the columns are not known before hand and we would like to dy >>> t.fit(df).transform(df).shape (3, 6) >>> t.transformed_names_ - ['x_0', 'x_1', 'x_2', 'x_3', 'petal_0', 'petal_1'] + ['x_sepal length (cm)', 'x_sepal width (cm)', 'x_petal length (cm)', 'x_petal width (cm)', 'petal_0', 'petal_1'] @@ -513,6 +513,12 @@ Below example shows how to change logging level. Changelog --------- +3.0.0 (2022-08-07) +****************** +* Leveraging `get_feature_names_out` to get vectorized feature names. Note that this is a breaking change as some of the generated +features names will be different from those generated in the previous major version. +* Removed support for `NumericalTransformer` + 2.2.0 (2021-05-07) ****************** diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..045e210 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "sklearn-pandas" +version = "2.2.0" +description = "Coordinates transformation between pandas and sklearn" +authors = ["Your Name <you@example.com>"] + +[tool.poetry.dependencies] +python = "^3.7" +scikit-learn = "^1.0.0" +scipy = "^1.5.1" +pandas = "^1.1.4" +numpy = "^1.18.1" + +[tool.poetry.dev-dependencies] +pytest = "^7.1.2" +nox = "^2022.1.7" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/setup.py b/setup.py index fb5af98..be9198e 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import re + from setuptools import setup from setuptools.command.test import test as TestCommand -import re for line in open('sklearn_pandas/__init__.py'): match = re.match("__version__ *= *'(.*)'", line) @@ -38,10 +39,10 @@ def run(self): packages=['sklearn_pandas'], keywords=['scikit', 'sklearn', 'pandas'], install_requires=[ - 'scikit-learn>=0.23.0', + 'scikit-learn>=1.0.0', 'scipy>=1.5.1', 'pandas>=1.1.4', - 'numpy>=1.18.1' + 'numpy>=1.19.4' ], tests_require=['pytest', 'mock'], cmdclass={'test': PyTest}, diff --git a/sklearn_pandas/__init__.py b/sklearn_pandas/__init__.py index 535ecf6..f0a6d64 100644 --- a/sklearn_pandas/__init__.py +++ b/sklearn_pandas/__init__.py @@ -1,8 +1,8 @@ -__version__ = '2.2.0' +__version__ = '3.0.0' import logging + logger = logging.getLogger(__name__) from .dataframe_mapper import DataFrameMapper # NOQA from .features_generator import gen_features # NOQA -from .transformers import NumericalTransformer # NOQA diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index 35a4320..1f9da6f 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -1,12 +1,14 @@ import contextlib from datetime import datetime -import pandas as pd + import numpy as np +import pandas as pd from scipy import sparse from sklearn.base import BaseEstimator, TransformerMixin -from .cross_validation import DataWrapper -from .pipeline import make_transformer_pipeline, _call_fit, TransformerPipeline + from . import logger +from .cross_validation import DataWrapper +from .pipeline import TransformerPipeline, _call_fit, make_transformer_pipeline string_types = text_type = str @@ -41,16 +43,16 @@ def _elapsed_secs(t1): return (datetime.now()-t1).total_seconds() -def _get_feature_names(estimator): +def _get_feature_names(estimator, input_features=None): """ Attempt to extract feature names based on a given estimator """ - if hasattr(estimator, 'classes_'): - return estimator.classes_ - elif hasattr(estimator, 'get_feature_names_out'): - return estimator.get_feature_names_out() + if hasattr(estimator, 'get_feature_names_out'): + return estimator.get_feature_names_out(input_features=input_features) elif hasattr(estimator, 'get_feature_names'): return estimator.get_feature_names() + elif hasattr(estimator, 'classes_'): + return estimator.classes_ return None @@ -286,11 +288,11 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', if isinstance(transformer, TransformerPipeline): inverse_steps = transformer.steps[::-1] estimators = (estimator for name, estimator in inverse_steps) - names_steps = (_get_feature_names(e) for e in estimators) + names_steps = (_get_feature_names(e, input_features = columns) for e in estimators) # noqa names = next((n for n in names_steps if n is not None), None) # Otherwise use the only estimator present else: - names = _get_feature_names(transformer) + names = _get_feature_names(transformer, input_features=columns) if names is None or len(names) != num_cols: # return name concatenated with '_0', '_1', etc. diff --git a/sklearn_pandas/transformers.py b/sklearn_pandas/transformers.py deleted file mode 100644 index ae693c2..0000000 --- a/sklearn_pandas/transformers.py +++ /dev/null @@ -1,55 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.base import TransformerMixin -import warnings - - -def _get_mask(X, value): - """ - Compute the boolean mask X == missing_values. - """ - if value == "NaN" or \ - value is None or \ - (isinstance(value, float) and np.isnan(value)): - return pd.isnull(X) - else: - return X == value - - -class NumericalTransformer(TransformerMixin): - """ - Provides commonly used numerical transformers. - """ - SUPPORTED_FUNCTIONS = ['log', 'log1p'] - - def __init__(self, func): - """ - Params - - func function to apply to input columns. The function will be - applied to each value. Supported functions are defined - in SUPPORTED_FUNCTIONS variable. Throws assertion error if the - not supported. - """ - - warnings.warn(""" - NumericalTransformer will be deprecated in 3.0 version. - Please use Sklearn.base.TransformerMixin to write - customer transformers - """, DeprecationWarning) - - assert func in self.SUPPORTED_FUNCTIONS, \ - f"Only following func are supported: {self.SUPPORTED_FUNCTIONS}" - super(NumericalTransformer, self).__init__() - self.__func = func - - def fit(self, X, y=None): - return self - - def transform(self, X, y=None): - if self.__func == 'log1p': - return np.vectorize(np.log1p)(X) - elif self.__func == 'log': - return np.vectorize(np.log)(X) - - raise ValueError(f"Invalid function name: {self.__func}") diff --git a/tests/test_transformers.py b/tests/test_transformers.py deleted file mode 100644 index 0cd8d0b..0000000 --- a/tests/test_transformers.py +++ /dev/null @@ -1,47 +0,0 @@ -import tempfile -import pytest -import numpy as np -from pandas import DataFrame -import joblib - -from sklearn_pandas import DataFrameMapper -from sklearn_pandas import NumericalTransformer - - -@pytest.fixture -def simple_dataset(): - return DataFrame({ - 'feat1': [1, 2, 1, 3, 1], - 'feat2': [1, 2, 2, 2, 3], - 'feat3': [1, 2, 3, 4, 5], - }) - - -def test_common_numerical_transformer(simple_dataset): - """ - Test log transformation - """ - transfomer = DataFrameMapper([ - ('feat1', NumericalTransformer('log')) - ], df_out=True) - df = simple_dataset - outDF = transfomer.fit_transform(df) - assert list(outDF.columns) == ['feat1'] - assert np.array_equal(df['feat1'].apply(np.log).values, outDF.feat1.values) - - -def test_numerical_transformer_serialization(simple_dataset): - """ - Test if you can serialize transformer - """ - transfomer = DataFrameMapper([ - ('feat1', NumericalTransformer('log')) - ]) - - df = simple_dataset - transfomer.fit(df) - f = tempfile.NamedTemporaryFile(delete=True) - joblib.dump(transfomer, f.name) - transfomer2 = joblib.load(f.name) - np.array_equal(transfomer.transform(df), transfomer2.transform(df)) - f.close() From 10eebaea29a5ea158d84d9d7858062fbf0a142cd Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 19:45:31 -0700 Subject: [PATCH 03/12] add author name --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 1f5003b..fb92b66 100644 --- a/README.rst +++ b/README.rst @@ -675,6 +675,7 @@ The code for ``DataFrameMapper`` is based on code originally written by `Ben Ham Other contributors: * Ariel Rossanigo (@arielrossanigo) +* Antonio Carlos Falcão Petri (@falcaopetri) * Arnau Gil Amat (@arnau126) * Assaf Ben-David (@AssafBenDavid) * Brendan Herger (@bjherger) From 2a184fb5794fe3174665abe2d0131e101f0b153b Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 20:25:58 -0700 Subject: [PATCH 04/12] fixing column names fixing column names --- README.rst | 21 --------------------- pyproject.toml | 2 +- sklearn_pandas/dataframe_mapper.py | 8 ++++---- tests/test_dataframe_mapper.py | 4 ++-- 4 files changed, 7 insertions(+), 28 deletions(-) diff --git a/README.rst b/README.rst index fb92b66..ec544d7 100644 --- a/README.rst +++ b/README.rst @@ -478,27 +478,6 @@ a sparse array whenever any of the extracted features is sparse. Example: The stacking of the sparse features is done without ever densifying them. -Using ``NumericalTransformer`` -*********************************** - -While you can use ``FunctionTransformation`` to generate arbitrary transformers, it can present serialization issues -when pickling. Use ``NumericalTransformer`` instead, which takes the function name as a string parameter and hence -can be easily serialized. - - >>> from sklearn_pandas import NumericalTransformer - >>> mapper5 = DataFrameMapper([ - ... ('children', NumericalTransformer('log')), - ... ]) - >>> mapper5.fit_transform(data) - array([[1.38629436], - [1.79175947], - [1.09861229], - [1.09861229], - [0.69314718], - [1.09861229], - [1.60943791], - [1.38629436]]) - Changing Logging level *********************************** diff --git a/pyproject.toml b/pyproject.toml index 045e210..f93b075 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ python = "^3.7" scikit-learn = "^1.0.0" scipy = "^1.5.1" pandas = "^1.1.4" -numpy = "^1.18.1" +numpy = "^1.19.4" [tool.poetry.dev-dependencies] pytest = "^7.1.2" diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index 1f9da6f..f651f9d 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -43,7 +43,7 @@ def _elapsed_secs(t1): return (datetime.now()-t1).total_seconds() -def _get_feature_names(estimator, input_features=None): +def _get_feature_names(estimator, input_features=None, alias=None): """ Attempt to extract feature names based on a given estimator """ @@ -287,17 +287,17 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', # last one if isinstance(transformer, TransformerPipeline): inverse_steps = transformer.steps[::-1] - estimators = (estimator for name, estimator in inverse_steps) + estimators = (estimator for _, estimator in inverse_steps) names_steps = (_get_feature_names(e, input_features = columns) for e in estimators) # noqa names = next((n for n in names_steps if n is not None), None) # Otherwise use the only estimator present else: - names = _get_feature_names(transformer, input_features=columns) + names = _get_feature_names(transformer, input_features=columns, alias = alias) if names is None or len(names) != num_cols: # return name concatenated with '_0', '_1', etc. names = range(num_cols) - + output = [f"{name}_{o}" for o in names] else: output = [name] diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py index 29ff619..e09bcbe 100644 --- a/tests/test_dataframe_mapper.py +++ b/tests/test_dataframe_mapper.py @@ -340,8 +340,8 @@ def test_onehot_df(): transformed = mapper.fit_transform(df) cols = transformed.columns assert len(cols) == 4 - assert cols[0] == 'target_x0_0' - assert cols[3] == 'target_x0_3' + assert cols[0] == 'target_target_0' + assert cols[3] == 'target_target_3' def test_onehot_2cols_df(): From 1a43a5ae37d02b748bde02def2dfad04aed49779 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 20:36:38 -0700 Subject: [PATCH 05/12] fixed column output names --- tests/test_dataframe_mapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py index e09bcbe..5c9f35c 100644 --- a/tests/test_dataframe_mapper.py +++ b/tests/test_dataframe_mapper.py @@ -358,8 +358,8 @@ def test_onehot_2cols_df(): transformed = mapper.fit_transform(df) cols = transformed.columns assert len(cols) == 8 - assert cols[0] == 'col_target_x0_0' - assert cols[4] == 'col_target_x1_0' + assert cols[0] == 'col_target_col_0' + assert cols[4] == 'col_target_target_0' def test_customtransform_df(): From a76b0b6641c3c70aa2f57301e6285a111d0804c4 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 20:42:54 -0700 Subject: [PATCH 06/12] removed poetry --- pyproject.toml | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index f93b075..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -[tool.poetry] -name = "sklearn-pandas" -version = "2.2.0" -description = "Coordinates transformation between pandas and sklearn" -authors = ["Your Name <you@example.com>"] - -[tool.poetry.dependencies] -python = "^3.7" -scikit-learn = "^1.0.0" -scipy = "^1.5.1" -pandas = "^1.1.4" -numpy = "^1.19.4" - -[tool.poetry.dev-dependencies] -pytest = "^7.1.2" -nox = "^2022.1.7" - -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" From c6457e0ce81120e1d2ecc50b0e8ac6cd16f18c2b Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 20:48:19 -0700 Subject: [PATCH 07/12] removed tool versions --- .tool-versions | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .tool-versions diff --git a/.tool-versions b/.tool-versions deleted file mode 100644 index 6581a35..0000000 --- a/.tool-versions +++ /dev/null @@ -1,2 +0,0 @@ -python 3.9.10 -poetry 1.1.12 From 71e6c627867fc6536c8b0eb34e3a14a7d2cae4f5 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 20:52:43 -0700 Subject: [PATCH 08/12] fixed lint --- sklearn_pandas/dataframe_mapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index f651f9d..6b9f44d 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -292,12 +292,12 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', names = next((n for n in names_steps if n is not None), None) # Otherwise use the only estimator present else: - names = _get_feature_names(transformer, input_features=columns, alias = alias) + names = _get_feature_names(transformer, input_features=columns) if names is None or len(names) != num_cols: # return name concatenated with '_0', '_1', etc. names = range(num_cols) - + output = [f"{name}_{o}" for o in names] else: output = [name] From 8f0c2615790cfa8a6dd38ce7baaf52449059b076 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 21:01:35 -0700 Subject: [PATCH 09/12] fixed lint issues --- sklearn_pandas/dataframe_mapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index 6b9f44d..8c768cc 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -43,7 +43,7 @@ def _elapsed_secs(t1): return (datetime.now()-t1).total_seconds() -def _get_feature_names(estimator, input_features=None, alias=None): +def _get_feature_names(estimator, input_features=None): """ Attempt to extract feature names based on a given estimator """ @@ -292,7 +292,7 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', names = next((n for n in names_steps if n is not None), None) # Otherwise use the only estimator present else: - names = _get_feature_names(transformer, input_features=columns) + names = _get_feature_names(transformer, input_features = columns) if names is None or len(names) != num_cols: # return name concatenated with '_0', '_1', etc. From 5343faaad0b9089d31f429e5224c006416685226 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 21:08:19 -0700 Subject: [PATCH 10/12] fixed lint issues --- sklearn_pandas/dataframe_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index 8c768cc..5ca7b61 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -292,7 +292,7 @@ def get_names(self, columns, transformer, x, alias=None, prefix='', names = next((n for n in names_steps if n is not None), None) # Otherwise use the only estimator present else: - names = _get_feature_names(transformer, input_features = columns) + names = _get_feature_names(transformer, input_features=columns) if names is None or len(names) != num_cols: # return name concatenated with '_0', '_1', etc. From f38baa3c88bc0217ddb6df32b0e2d46ee1c53289 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 21:29:48 -0700 Subject: [PATCH 11/12] reduced number of versions --- noxfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index 6ab16a7..bb1f701 100644 --- a/noxfile.py +++ b/noxfile.py @@ -9,10 +9,10 @@ def lint(session): session.run('flake8', 'sklearn_pandas/', 'tests') @nox.session -@nox.parametrize('numpy', ['1.18.1', '1.19.4', '1.20.1']) -@nox.parametrize('sklearn', ['0.23.0', '0.24.2', '1.0']) -@nox.parametrize('scipy', ['1.5.4', '1.6.0']) -@nox.parametrize('pandas', ['1.1.4', '1.2.2']) +@nox.parametrize('numpy', ['1.18.1', '1.20.1']) +@nox.parametrize('sklearn', ['1.0', '1.1.2']) +@nox.parametrize('scipy', ['1.6.0']) +@nox.parametrize('pandas', ['1.2.2']) def tests(session, numpy, sklearn, scipy, pandas): session.install('pytest>=5.3.5', 'setuptools>=45.2', From 65ae37657d1efa5df08981374cfaba6b056822f4 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal <ragrawal@gmail.com> Date: Sun, 7 Aug 2022 21:41:35 -0700 Subject: [PATCH 12/12] set minimum version for scikit learn to 1.1.0 --- noxfile.py | 2 +- setup.py | 2 +- tests/test_dataframe_mapper.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index bb1f701..1c1210a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -10,7 +10,7 @@ def lint(session): @nox.session @nox.parametrize('numpy', ['1.18.1', '1.20.1']) -@nox.parametrize('sklearn', ['1.0', '1.1.2']) +@nox.parametrize('sklearn', ['1.1.0', '1.1.2']) @nox.parametrize('scipy', ['1.6.0']) @nox.parametrize('pandas', ['1.2.2']) def tests(session, numpy, sklearn, scipy, pandas): diff --git a/setup.py b/setup.py index be9198e..9cbb69c 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def run(self): packages=['sklearn_pandas'], keywords=['scikit', 'sklearn', 'pandas'], install_requires=[ - 'scikit-learn>=1.0.0', + 'scikit-learn>=1.1.0', 'scipy>=1.5.1', 'pandas>=1.1.4', 'numpy>=1.19.4' diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py index 04391e7..92c7515 100644 --- a/tests/test_dataframe_mapper.py +++ b/tests/test_dataframe_mapper.py @@ -468,8 +468,8 @@ def test_pca(complex_dataframe): transformed = mapper.fit_transform(df) cols = transformed.columns assert len(cols) == 2 - assert cols[0] == 'feat1_feat2_0' - assert cols[1] == 'feat1_feat2_1' + assert cols[0] == 'feat1_feat2_pca0' + assert cols[1] == 'feat1_feat2_pca1' def test_fit_transform(simple_dataframe):