Skip to content

Commit cad0d87

Browse files
authored
Validate dir for pd.errors and pd.util (#57140)
* Ensure pandas.errors only imports its __all__ * Make public util API accessible * Fix import * Type input
1 parent 29a3682 commit cad0d87

File tree

9 files changed

+114
-78
lines changed

9 files changed

+114
-78
lines changed

doc/source/reference/index.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,14 @@ The following subpackages are public.
2424
`pandas-stubs <https://github.com/pandas-dev/pandas-stubs>`_ package
2525
which has classes in addition to those that occur in pandas for type-hinting.
2626

27-
In addition, public functions in ``pandas.io`` and ``pandas.tseries`` submodules
28-
are mentioned in the documentation.
27+
In addition, public functions in ``pandas.io``, ``pandas.tseries``, ``pandas.util`` submodules
28+
are explicitly mentioned in the documentation. Further APIs in these modules are not guaranteed
29+
to be stable.
2930

3031

3132
.. warning::
3233

33-
The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed.
34+
The ``pandas.core``, ``pandas.compat`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed.
3435

3536
.. If you update this toctree, also update the manual toctree in the
3637
.. main index.rst.template

pandas/core/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
from pandas.errors import (
6161
ChainedAssignmentError,
6262
InvalidIndexError,
63+
)
64+
from pandas.errors.cow import (
6365
_chained_assignment_method_msg,
6466
_chained_assignment_msg,
6567
_chained_assignment_warning_method_msg,

pandas/core/generic.py

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@
101101
InvalidIndexError,
102102
SettingWithCopyError,
103103
SettingWithCopyWarning,
104+
)
105+
from pandas.errors.cow import (
104106
_chained_assignment_method_msg,
105107
_chained_assignment_warning_method_msg,
106108
_check_cacher,

pandas/core/indexing.py

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
IndexingError,
2828
InvalidIndexError,
2929
LossySetitemError,
30+
)
31+
from pandas.errors.cow import (
3032
_chained_assignment_msg,
3133
_chained_assignment_warning_msg,
3234
_check_cacher,

pandas/core/series.py

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
from pandas.errors import (
4646
ChainedAssignmentError,
4747
InvalidIndexError,
48+
)
49+
from pandas.errors.cow import (
4850
_chained_assignment_method_msg,
4951
_chained_assignment_msg,
5052
_chained_assignment_warning_method_msg,

pandas/errors/__init__.py

+1-75
Original file line numberDiff line numberDiff line change
@@ -475,81 +475,6 @@ class ChainedAssignmentError(Warning):
475475
"""
476476

477477

478-
_chained_assignment_msg = (
479-
"A value is trying to be set on a copy of a DataFrame or Series "
480-
"through chained assignment.\n"
481-
"When using the Copy-on-Write mode, such chained assignment never works "
482-
"to update the original DataFrame or Series, because the intermediate "
483-
"object on which we are setting values always behaves as a copy.\n\n"
484-
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
485-
"the assignment in a single step.\n\n"
486-
"See the caveats in the documentation: "
487-
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
488-
"indexing.html#returning-a-view-versus-a-copy"
489-
)
490-
491-
492-
_chained_assignment_method_msg = (
493-
"A value is trying to be set on a copy of a DataFrame or Series "
494-
"through chained assignment using an inplace method.\n"
495-
"When using the Copy-on-Write mode, such inplace method never works "
496-
"to update the original DataFrame or Series, because the intermediate "
497-
"object on which we are setting values always behaves as a copy.\n\n"
498-
"For example, when doing 'df[col].method(value, inplace=True)', try "
499-
"using 'df.method({col: value}, inplace=True)' instead, to perform "
500-
"the operation inplace on the original object.\n\n"
501-
)
502-
503-
504-
_chained_assignment_warning_msg = (
505-
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
506-
"You are setting values through chained assignment. Currently this works "
507-
"in certain cases, but when using Copy-on-Write (which will become the "
508-
"default behaviour in pandas 3.0) this will never work to update the "
509-
"original DataFrame or Series, because the intermediate object on which "
510-
"we are setting values will behave as a copy.\n"
511-
"A typical example is when you are setting values in a column of a "
512-
"DataFrame, like:\n\n"
513-
'df["col"][row_indexer] = value\n\n'
514-
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
515-
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
516-
"See the caveats in the documentation: "
517-
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
518-
"indexing.html#returning-a-view-versus-a-copy\n"
519-
)
520-
521-
522-
_chained_assignment_warning_method_msg = (
523-
"A value is trying to be set on a copy of a DataFrame or Series "
524-
"through chained assignment using an inplace method.\n"
525-
"The behavior will change in pandas 3.0. This inplace method will "
526-
"never work because the intermediate object on which we are setting "
527-
"values always behaves as a copy.\n\n"
528-
"For example, when doing 'df[col].method(value, inplace=True)', try "
529-
"using 'df.method({col: value}, inplace=True)' or "
530-
"df[col] = df[col].method(value) instead, to perform "
531-
"the operation inplace on the original object.\n\n"
532-
)
533-
534-
535-
def _check_cacher(obj) -> bool:
536-
# This is a mess, selection paths that return a view set the _cacher attribute
537-
# on the Series; most of them also set _item_cache which adds 1 to our relevant
538-
# reference count, but iloc does not, so we have to check if we are actually
539-
# in the item cache
540-
if hasattr(obj, "_cacher"):
541-
parent = obj._cacher[1]()
542-
# parent could be dead
543-
if parent is None:
544-
return False
545-
if hasattr(parent, "_item_cache"):
546-
if obj._cacher[0] in parent._item_cache:
547-
# Check if we are actually the item from item_cache, iloc creates a
548-
# new object
549-
return obj is parent._item_cache[obj._cacher[0]]
550-
return False
551-
552-
553478
class NumExprClobberingError(NameError):
554479
"""
555480
Exception raised when trying to use a built-in numexpr name as a variable name.
@@ -831,6 +756,7 @@ class InvalidComparison(Exception):
831756
"AbstractMethodError",
832757
"AttributeConflictWarning",
833758
"CategoricalConversionWarning",
759+
"ChainedAssignmentError",
834760
"ClosedFileError",
835761
"CSSWarning",
836762
"DatabaseError",

pandas/errors/cow.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from typing import Any
2+
3+
_chained_assignment_msg = (
4+
"A value is trying to be set on a copy of a DataFrame or Series "
5+
"through chained assignment.\n"
6+
"When using the Copy-on-Write mode, such chained assignment never works "
7+
"to update the original DataFrame or Series, because the intermediate "
8+
"object on which we are setting values always behaves as a copy.\n\n"
9+
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
10+
"the assignment in a single step.\n\n"
11+
"See the caveats in the documentation: "
12+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
13+
"indexing.html#returning-a-view-versus-a-copy"
14+
)
15+
16+
17+
_chained_assignment_method_msg = (
18+
"A value is trying to be set on a copy of a DataFrame or Series "
19+
"through chained assignment using an inplace method.\n"
20+
"When using the Copy-on-Write mode, such inplace method never works "
21+
"to update the original DataFrame or Series, because the intermediate "
22+
"object on which we are setting values always behaves as a copy.\n\n"
23+
"For example, when doing 'df[col].method(value, inplace=True)', try "
24+
"using 'df.method({col: value}, inplace=True)' instead, to perform "
25+
"the operation inplace on the original object.\n\n"
26+
)
27+
28+
29+
_chained_assignment_warning_msg = (
30+
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
31+
"You are setting values through chained assignment. Currently this works "
32+
"in certain cases, but when using Copy-on-Write (which will become the "
33+
"default behaviour in pandas 3.0) this will never work to update the "
34+
"original DataFrame or Series, because the intermediate object on which "
35+
"we are setting values will behave as a copy.\n"
36+
"A typical example is when you are setting values in a column of a "
37+
"DataFrame, like:\n\n"
38+
'df["col"][row_indexer] = value\n\n'
39+
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
40+
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
41+
"See the caveats in the documentation: "
42+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
43+
"indexing.html#returning-a-view-versus-a-copy\n"
44+
)
45+
46+
_chained_assignment_warning_method_msg = (
47+
"A value is trying to be set on a copy of a DataFrame or Series "
48+
"through chained assignment using an inplace method.\n"
49+
"The behavior will change in pandas 3.0. This inplace method will "
50+
"never work because the intermediate object on which we are setting "
51+
"values always behaves as a copy.\n\n"
52+
"For example, when doing 'df[col].method(value, inplace=True)', try "
53+
"using 'df.method({col: value}, inplace=True)' or "
54+
"df[col] = df[col].method(value) instead, to perform "
55+
"the operation inplace on the original object.\n\n"
56+
)
57+
58+
59+
def _check_cacher(obj: Any) -> bool:
60+
# This is a mess, selection paths that return a view set the _cacher attribute
61+
# on the Series; most of them also set _item_cache which adds 1 to our relevant
62+
# reference count, but iloc does not, so we have to check if we are actually
63+
# in the item cache
64+
if hasattr(obj, "_cacher"):
65+
parent = obj._cacher[1]()
66+
# parent could be dead
67+
if parent is None:
68+
return False
69+
if hasattr(parent, "_item_cache"):
70+
if obj._cacher[0] in parent._item_cache:
71+
# Check if we are actually the item from item_cache, iloc creates a
72+
# new object
73+
return obj is parent._item_cache[obj._cacher[0]]
74+
return False

pandas/tests/api/test_api.py

+23
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,29 @@ def test_api_extensions(self):
357357
self.check(api_extensions, self.allowed_api_extensions)
358358

359359

360+
class TestErrors(Base):
361+
def test_errors(self):
362+
self.check(pd.errors, pd.errors.__all__, ignored=["ctypes", "cow"])
363+
364+
365+
class TestUtil(Base):
366+
def test_util(self):
367+
self.check(
368+
pd.util,
369+
["hash_array", "hash_pandas_object"],
370+
ignored=[
371+
"_decorators",
372+
"_test_decorators",
373+
"_exceptions",
374+
"_validators",
375+
"capitalize_first_letter",
376+
"version",
377+
"_print_versions",
378+
"_tester",
379+
],
380+
)
381+
382+
360383
class TestTesting(Base):
361384
funcs = [
362385
"assert_frame_equal",

pandas/util/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,9 @@ def __getattr__(key: str):
2525
raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")
2626

2727

28+
def __dir__():
29+
return list(globals().keys()) + ["hash_array", "hash_pandas_object"]
30+
31+
2832
def capitalize_first_letter(s: str) -> str:
2933
return s[:1].upper() + s[1:]

0 commit comments

Comments
 (0)