Skip to content

Commit eb965cf

Browse files
authored
CLN,TYP: Use subsitutions in info docstrings (#33673)
* add subs in info.py * substitute docstring in frame.py * Add type annotations * reinstate data parameter * revert removed line * indent docstring * fix validation of docstrings * fix indentation of max_columns
1 parent cf61be6 commit eb965cf

File tree

2 files changed

+128
-99
lines changed

2 files changed

+128
-99
lines changed

pandas/core/frame.py

+108-1
Original file line numberDiff line numberDiff line change
@@ -2331,9 +2331,116 @@ def to_html(
23312331
)
23322332

23332333
# ----------------------------------------------------------------------
2334+
@Substitution(
2335+
klass="DataFrame",
2336+
type_sub=" and columns",
2337+
max_cols_sub=(
2338+
"""max_cols : int, optional
2339+
When to switch from the verbose to the truncated output. If the
2340+
DataFrame has more than `max_cols` columns, the truncated output
2341+
is used. By default, the setting in
2342+
``pandas.options.display.max_info_columns`` is used.
2343+
"""
2344+
),
2345+
examples_sub=(
2346+
"""
2347+
>>> int_values = [1, 2, 3, 4, 5]
2348+
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
2349+
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
2350+
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
2351+
... "float_col": float_values})
2352+
>>> df
2353+
int_col text_col float_col
2354+
0 1 alpha 0.00
2355+
1 2 beta 0.25
2356+
2 3 gamma 0.50
2357+
3 4 delta 0.75
2358+
4 5 epsilon 1.00
2359+
2360+
Prints information of all columns:
2361+
2362+
>>> df.info(verbose=True)
2363+
<class 'pandas.core.frame.DataFrame'>
2364+
RangeIndex: 5 entries, 0 to 4
2365+
Data columns (total 3 columns):
2366+
# Column Non-Null Count Dtype
2367+
--- ------ -------------- -----
2368+
0 int_col 5 non-null int64
2369+
1 text_col 5 non-null object
2370+
2 float_col 5 non-null float64
2371+
dtypes: float64(1), int64(1), object(1)
2372+
memory usage: 248.0+ bytes
2373+
2374+
Prints a summary of columns count and its dtypes but not per column
2375+
information:
2376+
2377+
>>> df.info(verbose=False)
2378+
<class 'pandas.core.frame.DataFrame'>
2379+
RangeIndex: 5 entries, 0 to 4
2380+
Columns: 3 entries, int_col to float_col
2381+
dtypes: float64(1), int64(1), object(1)
2382+
memory usage: 248.0+ bytes
2383+
2384+
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
2385+
buffer content and writes to a text file:
2386+
2387+
>>> import io
2388+
>>> buffer = io.StringIO()
2389+
>>> df.info(buf=buffer)
2390+
>>> s = buffer.getvalue()
2391+
>>> with open("df_info.txt", "w",
2392+
... encoding="utf-8") as f: # doctest: +SKIP
2393+
... f.write(s)
2394+
260
2395+
2396+
The `memory_usage` parameter allows deep introspection mode, specially
2397+
useful for big DataFrames and fine-tune memory optimization:
2398+
2399+
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
2400+
>>> df = pd.DataFrame({
2401+
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2402+
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2403+
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
2404+
... })
2405+
>>> df.info()
2406+
<class 'pandas.core.frame.DataFrame'>
2407+
RangeIndex: 1000000 entries, 0 to 999999
2408+
Data columns (total 3 columns):
2409+
# Column Non-Null Count Dtype
2410+
--- ------ -------------- -----
2411+
0 column_1 1000000 non-null object
2412+
1 column_2 1000000 non-null object
2413+
2 column_3 1000000 non-null object
2414+
dtypes: object(3)
2415+
memory usage: 22.9+ MB
2416+
2417+
>>> df.info(memory_usage='deep')
2418+
<class 'pandas.core.frame.DataFrame'>
2419+
RangeIndex: 1000000 entries, 0 to 999999
2420+
Data columns (total 3 columns):
2421+
# Column Non-Null Count Dtype
2422+
--- ------ -------------- -----
2423+
0 column_1 1000000 non-null object
2424+
1 column_2 1000000 non-null object
2425+
2 column_3 1000000 non-null object
2426+
dtypes: object(3)
2427+
memory usage: 188.8 MB"""
2428+
),
2429+
see_also_sub=(
2430+
"""
2431+
DataFrame.describe: Generate descriptive statistics of DataFrame
2432+
columns.
2433+
DataFrame.memory_usage: Memory usage of DataFrame columns."""
2434+
),
2435+
)
23342436
@doc(info)
23352437
def info(
2336-
self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
2438+
self,
2439+
verbose: Optional[bool] = None,
2440+
buf: Optional[IO[str]] = None,
2441+
max_cols: Optional[int] = None,
2442+
memory_usage: Optional[Union[bool, str]] = None,
2443+
null_counts: Optional[bool] = None,
23372444
) -> None:
23382445
return info(self, verbose, buf, max_cols, memory_usage, null_counts)
23392446

pandas/io/formats/info.py

+20-98
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import sys
2+
from typing import IO, Optional, Union
23

34
from pandas._config import get_option
45

6+
from pandas._typing import FrameOrSeries
7+
58
from pandas.io.formats import format as fmt
69
from pandas.io.formats.printing import pprint_thing
710

@@ -11,32 +14,33 @@ def _put_str(s, space):
1114

1215

1316
def info(
14-
data, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
17+
data: FrameOrSeries,
18+
verbose: Optional[bool] = None,
19+
buf: Optional[IO[str]] = None,
20+
max_cols: Optional[int] = None,
21+
memory_usage: Optional[Union[bool, str]] = None,
22+
null_counts: Optional[bool] = None,
1523
) -> None:
1624
"""
17-
Print a concise summary of a DataFrame.
25+
Print a concise summary of a %(klass)s.
1826
19-
This method prints information about a DataFrame including
20-
the index dtype and column dtypes, non-null values and memory usage.
27+
This method prints information about a %(klass)s including
28+
the index dtype%(type_sub)s, non-null values and memory usage.
2129
2230
Parameters
2331
----------
24-
data : DataFrame
25-
DataFrame to print information about.
32+
data : %(klass)s
33+
%(klass)s to print information about.
2634
verbose : bool, optional
2735
Whether to print the full summary. By default, the setting in
2836
``pandas.options.display.max_info_columns`` is followed.
2937
buf : writable buffer, defaults to sys.stdout
3038
Where to send the output. By default, the output is printed to
3139
sys.stdout. Pass a writable buffer if you need to further process
3240
the output.
33-
max_cols : int, optional
34-
When to switch from the verbose to the truncated output. If the
35-
DataFrame has more than `max_cols` columns, the truncated output
36-
is used. By default, the setting in
37-
``pandas.options.display.max_info_columns`` is used.
41+
%(max_cols_sub)s
3842
memory_usage : bool, str, optional
39-
Specifies whether total memory usage of the DataFrame
43+
Specifies whether total memory usage of the %(klass)s
4044
elements (including the index) should be displayed. By default,
4145
this follows the ``pandas.options.display.memory_usage`` setting.
4246
@@ -50,105 +54,23 @@ def info(
5054
at the cost of computational resources.
5155
null_counts : bool, optional
5256
Whether to show the non-null counts. By default, this is shown
53-
only if the frame is smaller than
57+
only if the %(klass)s is smaller than
5458
``pandas.options.display.max_info_rows`` and
5559
``pandas.options.display.max_info_columns``. A value of True always
5660
shows the counts, and False never shows the counts.
5761
5862
Returns
5963
-------
6064
None
61-
This method prints a summary of a DataFrame and returns None.
65+
This method prints a summary of a %(klass)s and returns None.
6266
6367
See Also
6468
--------
65-
DataFrame.describe: Generate descriptive statistics of DataFrame
66-
columns.
67-
DataFrame.memory_usage: Memory usage of DataFrame columns.
69+
%(see_also_sub)s
6870
6971
Examples
7072
--------
71-
>>> int_values = [1, 2, 3, 4, 5]
72-
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
73-
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
74-
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
75-
... "float_col": float_values})
76-
>>> df
77-
int_col text_col float_col
78-
0 1 alpha 0.00
79-
1 2 beta 0.25
80-
2 3 gamma 0.50
81-
3 4 delta 0.75
82-
4 5 epsilon 1.00
83-
84-
Prints information of all columns:
85-
86-
>>> df.info(verbose=True)
87-
<class 'pandas.core.frame.DataFrame'>
88-
RangeIndex: 5 entries, 0 to 4
89-
Data columns (total 3 columns):
90-
# Column Non-Null Count Dtype
91-
--- ------ -------------- -----
92-
0 int_col 5 non-null int64
93-
1 text_col 5 non-null object
94-
2 float_col 5 non-null float64
95-
dtypes: float64(1), int64(1), object(1)
96-
memory usage: 248.0+ bytes
97-
98-
Prints a summary of columns count and its dtypes but not per column
99-
information:
100-
101-
>>> df.info(verbose=False)
102-
<class 'pandas.core.frame.DataFrame'>
103-
RangeIndex: 5 entries, 0 to 4
104-
Columns: 3 entries, int_col to float_col
105-
dtypes: float64(1), int64(1), object(1)
106-
memory usage: 248.0+ bytes
107-
108-
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
109-
buffer content and writes to a text file:
110-
111-
>>> import io
112-
>>> buffer = io.StringIO()
113-
>>> df.info(buf=buffer)
114-
>>> s = buffer.getvalue()
115-
>>> with open("df_info.txt", "w",
116-
... encoding="utf-8") as f: # doctest: +SKIP
117-
... f.write(s)
118-
260
119-
120-
The `memory_usage` parameter allows deep introspection mode, specially
121-
useful for big DataFrames and fine-tune memory optimization:
122-
123-
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
124-
>>> df = pd.DataFrame({
125-
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
126-
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
127-
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
128-
... })
129-
>>> df.info()
130-
<class 'pandas.core.frame.DataFrame'>
131-
RangeIndex: 1000000 entries, 0 to 999999
132-
Data columns (total 3 columns):
133-
# Column Non-Null Count Dtype
134-
--- ------ -------------- -----
135-
0 column_1 1000000 non-null object
136-
1 column_2 1000000 non-null object
137-
2 column_3 1000000 non-null object
138-
dtypes: object(3)
139-
memory usage: 22.9+ MB
140-
141-
>>> df.info(memory_usage='deep')
142-
<class 'pandas.core.frame.DataFrame'>
143-
RangeIndex: 1000000 entries, 0 to 999999
144-
Data columns (total 3 columns):
145-
# Column Non-Null Count Dtype
146-
--- ------ -------------- -----
147-
0 column_1 1000000 non-null object
148-
1 column_2 1000000 non-null object
149-
2 column_3 1000000 non-null object
150-
dtypes: object(3)
151-
memory usage: 188.8 MB
73+
%(examples_sub)s
15274
"""
15375
if buf is None: # pragma: no cover
15476
buf = sys.stdout

0 commit comments

Comments
 (0)