From e2a7703956d8777499ca24cf319d82c22a0d12cd Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Mon, 24 Apr 2023 12:30:59 -0400 Subject: [PATCH 1/9] cast int --- pandas/core/generic.py | 2 +- web/pandas_web.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9a1ba12482570..1cfd75fdbf78a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -658,7 +658,7 @@ def size(self) -> int: """ # error: Incompatible return value type (got "signedinteger[_64Bit]", # expected "int") [return-value] - return np.prod(self.shape) # type: ignore[return-value] + return int(np.prod(self.shape)) # type: ignore[return-value] def set_axis( self, diff --git a/web/pandas_web.py b/web/pandas_web.py index 5e902f1b1919b..5704cff78d772 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -43,6 +43,9 @@ import requests import yaml +from packaging import version +from itertools import groupby + api_token = os.environ.get("GITHUB_TOKEN") if api_token is not None: GITHUB_API_HEADERS = {"Authorization": f"Bearer {api_token}"} @@ -205,6 +208,11 @@ def maintainers_add_info(context): @staticmethod def home_add_releases(context): context["releases"] = [] + non_obsolete_releases = [] + + # create a set of tuples, + # if already inside, don't add + github_repo_url = context["main"]["github_repo_url"] resp = requests.get( @@ -223,9 +231,10 @@ def home_add_releases(context): with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f: json.dump(releases, f, default=datetime.datetime.isoformat) - for release in releases: + for release in non_obsolete_releases: if release["prerelease"]: continue + published = datetime.datetime.strptime( release["published_at"], "%Y-%m-%dT%H:%M:%SZ" ) From cb196f85c9f3013d69e6947ad5b7954a13ca284e Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Mon, 24 Apr 2023 12:43:02 -0400 Subject: [PATCH 2/9] added masnan changes --- web/pandas_web.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/web/pandas_web.py b/web/pandas_web.py index 5704cff78d772..c3e596eb456d1 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -208,12 +208,6 @@ def maintainers_add_info(context): @staticmethod def home_add_releases(context): context["releases"] = [] - non_obsolete_releases = [] - - # create a set of tuples, - # if already inside, don't add - - github_repo_url = context["main"]["github_repo_url"] resp = requests.get( f"https://api.github.com/repos/{github_repo_url}/releases", @@ -227,14 +221,28 @@ def home_add_releases(context): else: resp.raise_for_status() releases = resp.json() - with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f: json.dump(releases, f, default=datetime.datetime.isoformat) - for release in non_obsolete_releases: + # Sort the releases in descending order + sorted_releases = sorted(releases, key=lambda release:version.parse(release["tag_name"]), reverse=True) + sorted_releases = sorted( + releases, + key=lambda release: version.parse(release["tag_name"]), + reverse=True, + ) + + # Gathers minor versions + latest_releases = [] + minor_versions = set() + for release in sorted_releases: + minor_version = ".".join(release["tag_name"].split(".")[:2]) + if minor_version not in minor_versions: + latest_releases.append(release) + minor_versions.add(minor_version) + for release in latest_releases: if release["prerelease"]: continue - published = datetime.datetime.strptime( release["published_at"], "%Y-%m-%dT%H:%M:%SZ" ) @@ -250,7 +258,6 @@ def home_add_releases(context): ), } ) - return context @staticmethod From 7a95ffc19c1c8473a9e249bb8a0367e471dcf4e8 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Wed, 26 Apr 2023 17:16:01 -0400 Subject: [PATCH 3/9] install pre-commit --- web/pandas_web.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/web/pandas_web.py b/web/pandas_web.py index c3e596eb456d1..5e902f1b1919b 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -43,9 +43,6 @@ import requests import yaml -from packaging import version -from itertools import groupby - api_token = os.environ.get("GITHUB_TOKEN") if api_token is not None: GITHUB_API_HEADERS = {"Authorization": f"Bearer {api_token}"} @@ -208,6 +205,7 @@ def maintainers_add_info(context): @staticmethod def home_add_releases(context): context["releases"] = [] + github_repo_url = context["main"]["github_repo_url"] resp = requests.get( f"https://api.github.com/repos/{github_repo_url}/releases", @@ -221,26 +219,11 @@ def home_add_releases(context): else: resp.raise_for_status() releases = resp.json() + with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f: json.dump(releases, f, default=datetime.datetime.isoformat) - # Sort the releases in descending order - sorted_releases = sorted(releases, key=lambda release:version.parse(release["tag_name"]), reverse=True) - sorted_releases = sorted( - releases, - key=lambda release: version.parse(release["tag_name"]), - reverse=True, - ) - - # Gathers minor versions - latest_releases = [] - minor_versions = set() - for release in sorted_releases: - minor_version = ".".join(release["tag_name"].split(".")[:2]) - if minor_version not in minor_versions: - latest_releases.append(release) - minor_versions.add(minor_version) - for release in latest_releases: + for release in releases: if release["prerelease"]: continue published = datetime.datetime.strptime( @@ -258,6 +241,7 @@ def home_add_releases(context): ), } ) + return context @staticmethod From bce4731d0f144c7c4573f87d13d112ab95e7ad19 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Wed, 26 Apr 2023 17:44:21 -0400 Subject: [PATCH 4/9] added pytest testing for size int casting --- pandas/_testing/size_tests.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 pandas/_testing/size_tests.py diff --git a/pandas/_testing/size_tests.py b/pandas/_testing/size_tests.py new file mode 100644 index 0000000000000..5bb5fcffc800d --- /dev/null +++ b/pandas/_testing/size_tests.py @@ -0,0 +1,32 @@ +import numpy as np + +import pandas as pd + + +def test_size(): + # test for Series object + s = pd.Series({"a": 1, "b": 2, "c": 3}) + assert s.size == 3 + assert isinstance(s.size, int) + + # test for DataFrame object + df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + assert df.size == 4 + assert isinstance(df.size, int) + + # test for empty DataFrame object + empty_df = pd.DataFrame() + assert empty_df.size == 0 + assert isinstance(empty_df.size, int) + + # test for DataFrame with missing values + df_with_missing = pd.DataFrame({"col1": [1, np.nan], "col2": [3, 4]}) + assert df_with_missing.size == 4 + assert isinstance(df_with_missing.size, int) + + # test for MultiIndex DataFrame + multi_df = pd.DataFrame( + {"col1": [1, 2], "col2": [3, 4]}, index=[["a", "b"], [1, 2]] + ) + assert multi_df.size == 4 + assert isinstance(multi_df.size, int) From 1670b572069fb2502325c3735828d7b01ebc3586 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Fri, 28 Apr 2023 00:37:54 -0400 Subject: [PATCH 5/9] new changes, removed pd --- pandas/tests/generic/test_generic.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index acc1a8c2e1d05..6257a92d7df07 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -227,6 +227,34 @@ def test_size_compat(self, frame_or_series): assert o.size == np.prod(o.shape) assert o.size == 10 ** len(o.axes) + def test_size(self): + # test for Series object + s = Series({"a": 1, "b": 2, "c": 3}) + assert s.size == 3 + assert isinstance(s.size, int) + + # test for DataFrame object + df = DataFrame({"col1": [1, 2], "col2": [3, 4]}) + assert df.size == 4 + assert isinstance(df.size, int) + + # test for empty DataFrame object + empty_df = DataFrame() + assert empty_df.size == 0 + assert isinstance(empty_df.size, int) + + # test for DataFrame with missing values + df_with_missing = DataFrame({"col1": [1, np.nan], "col2": [3, 4]}) + assert df_with_missing.size == 4 + assert isinstance(df_with_missing.size, int) + + # test for MultiIndex DataFrame + multi_df = DataFrame( + {"col1": [1, 2], "col2": [3, 4]}, index=[["a", "b"], [1, 2]] + ) + assert multi_df.size == 4 + assert isinstance(multi_df.size, int) + def test_split_compat(self, frame_or_series): # xref GH8846 o = construct(frame_or_series, shape=10) From cda60c134997ca4dcab2abc713cf78232a4464d7 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Mon, 1 May 2023 14:23:20 -0400 Subject: [PATCH 6/9] reformat --- pandas/_testing/size_tests.py | 32 ------------------------ pandas/tests/frame/methods/test_size.py | 20 +++++++++++++++ pandas/tests/generic/test_generic.py | 28 --------------------- pandas/tests/series/methods/test_size.py | 21 ++++++++++++++++ 4 files changed, 41 insertions(+), 60 deletions(-) delete mode 100644 pandas/_testing/size_tests.py create mode 100644 pandas/tests/frame/methods/test_size.py create mode 100644 pandas/tests/series/methods/test_size.py diff --git a/pandas/_testing/size_tests.py b/pandas/_testing/size_tests.py deleted file mode 100644 index 5bb5fcffc800d..0000000000000 --- a/pandas/_testing/size_tests.py +++ /dev/null @@ -1,32 +0,0 @@ -import numpy as np - -import pandas as pd - - -def test_size(): - # test for Series object - s = pd.Series({"a": 1, "b": 2, "c": 3}) - assert s.size == 3 - assert isinstance(s.size, int) - - # test for DataFrame object - df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) - assert df.size == 4 - assert isinstance(df.size, int) - - # test for empty DataFrame object - empty_df = pd.DataFrame() - assert empty_df.size == 0 - assert isinstance(empty_df.size, int) - - # test for DataFrame with missing values - df_with_missing = pd.DataFrame({"col1": [1, np.nan], "col2": [3, 4]}) - assert df_with_missing.size == 4 - assert isinstance(df_with_missing.size, int) - - # test for MultiIndex DataFrame - multi_df = pd.DataFrame( - {"col1": [1, 2], "col2": [3, 4]}, index=[["a", "b"], [1, 2]] - ) - assert multi_df.size == 4 - assert isinstance(multi_df.size, int) diff --git a/pandas/tests/frame/methods/test_size.py b/pandas/tests/frame/methods/test_size.py new file mode 100644 index 0000000000000..9649ee958ec38 --- /dev/null +++ b/pandas/tests/frame/methods/test_size.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest + +from pandas import DataFrame + + +@pytest.mark.parametrize( + "data, index, expected", + [ + ({"col1": [1], "col2": [3]}, None, 2), + ({}, None, 0), + ({"col1": [1, np.nan], "col2": [3, 4]}, None, 4), + ({"col1": [1, 2], "col2": [3, 4]}, [["a", "b"], [1, 2]], 4), + ({"col1": [1, 2, 3, 4], "col2": [3, 4, 5, 6]}, ["x", "y", "a", "b"], 8), + ], +) +def test_size(data, index, expected): + df = DataFrame(data, index=index) + assert df.size == expected + assert isinstance(df.size, int) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 6257a92d7df07..acc1a8c2e1d05 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -227,34 +227,6 @@ def test_size_compat(self, frame_or_series): assert o.size == np.prod(o.shape) assert o.size == 10 ** len(o.axes) - def test_size(self): - # test for Series object - s = Series({"a": 1, "b": 2, "c": 3}) - assert s.size == 3 - assert isinstance(s.size, int) - - # test for DataFrame object - df = DataFrame({"col1": [1, 2], "col2": [3, 4]}) - assert df.size == 4 - assert isinstance(df.size, int) - - # test for empty DataFrame object - empty_df = DataFrame() - assert empty_df.size == 0 - assert isinstance(empty_df.size, int) - - # test for DataFrame with missing values - df_with_missing = DataFrame({"col1": [1, np.nan], "col2": [3, 4]}) - assert df_with_missing.size == 4 - assert isinstance(df_with_missing.size, int) - - # test for MultiIndex DataFrame - multi_df = DataFrame( - {"col1": [1, 2], "col2": [3, 4]}, index=[["a", "b"], [1, 2]] - ) - assert multi_df.size == 4 - assert isinstance(multi_df.size, int) - def test_split_compat(self, frame_or_series): # xref GH8846 o = construct(frame_or_series, shape=10) diff --git a/pandas/tests/series/methods/test_size.py b/pandas/tests/series/methods/test_size.py new file mode 100644 index 0000000000000..79306969bed8e --- /dev/null +++ b/pandas/tests/series/methods/test_size.py @@ -0,0 +1,21 @@ +import pytest + +from pandas import Series + + +@pytest.mark.parametrize( + "data, index, expected", + [ + ([1, 2, 3], None, 3), + ({"a": 1, "b": 2, "c": 3}, None, 3), + ([1, 2, 3], ["x", "y", "z"], 3), + ([1, 2, 3, 4, 5], ["x", "y", "z", "w", "n"], 5), + ([1, 2, 3], None, 3), + ([1, 2, 3], ["x", "y", "z"], 3), + ([1, 2, 3, 4], ["x", "y", "z", "w"], 4), + ], +) +def test_series(data, index, expected): + s = Series(data, index=index) + assert s.size == expected + assert isinstance(s.size, int) From e94a338513fbd076edd2606fc54b3c0bf5bbe121 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Wed, 3 May 2023 13:51:34 -0400 Subject: [PATCH 7/9] removed git ignore, added comments to indicate new tests --- pandas/tests/series/methods/test_size.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_size.py b/pandas/tests/series/methods/test_size.py index 79306969bed8e..cb4f0c1d53627 100644 --- a/pandas/tests/series/methods/test_size.py +++ b/pandas/tests/series/methods/test_size.py @@ -15,7 +15,9 @@ ([1, 2, 3, 4], ["x", "y", "z", "w"], 4), ], ) + +# GH#52897 def test_series(data, index, expected): - s = Series(data, index=index) - assert s.size == expected - assert isinstance(s.size, int) + ser = Series(data, index=index) + assert ser.size == expected + assert isinstance(ser.size, int) From d179d9ec9f0d37d38df4254f215d70971cfd175a Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Thu, 4 May 2023 20:48:54 -0400 Subject: [PATCH 8/9] removed ignore --- pandas/core/generic.py | 5 ++--- pandas/tests/frame/methods/test_size.py | 1 + pandas/tests/series/methods/test_size.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1cfd75fdbf78a..3a74604a4051b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -656,9 +656,8 @@ def size(self) -> int: >>> df.size 4 """ - # error: Incompatible return value type (got "signedinteger[_64Bit]", - # expected "int") [return-value] - return int(np.prod(self.shape)) # type: ignore[return-value] + + return int(np.prod(self.shape)) def set_axis( self, diff --git a/pandas/tests/frame/methods/test_size.py b/pandas/tests/frame/methods/test_size.py index 9649ee958ec38..0c8b6473c85ea 100644 --- a/pandas/tests/frame/methods/test_size.py +++ b/pandas/tests/frame/methods/test_size.py @@ -15,6 +15,7 @@ ], ) def test_size(data, index, expected): + # GH#52897 df = DataFrame(data, index=index) assert df.size == expected assert isinstance(df.size, int) diff --git a/pandas/tests/series/methods/test_size.py b/pandas/tests/series/methods/test_size.py index cb4f0c1d53627..20a454996fa44 100644 --- a/pandas/tests/series/methods/test_size.py +++ b/pandas/tests/series/methods/test_size.py @@ -15,9 +15,8 @@ ([1, 2, 3, 4], ["x", "y", "z", "w"], 4), ], ) - -# GH#52897 def test_series(data, index, expected): + # GH#52897 ser = Series(data, index=index) assert ser.size == expected assert isinstance(ser.size, int) From 0bac984d301879c1fea313ee61f7676e3296ce05 Mon Sep 17 00:00:00 2001 From: HappyHorse Date: Thu, 4 May 2023 23:30:33 -0400 Subject: [PATCH 9/9] entry sorted alphabetical --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3355757350b9..85bbd4f37d8d8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -301,8 +301,8 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.corrwith` raising ``NotImplementedError`` for pyarrow-backed dtypes (:issue:`52314`) +- Bug in :meth:`DataFrame.size` and :meth:`Series.size` returning 64-bit integer instead of int (:issue:`52897`) - Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`) -- Conversion ^^^^^^^^^^