Skip to content

Commit 0999051

Browse files
authored
Add examples (#267)
* remove unused typevars, make dtype type alias * Rename `get_column_names` to `column_names` and make property (#254) * get_column_names -> column_names property * get_column_names -> column_names * add namespace protocol * fixup * add examples * add plotting example * fixup; * redirect examples * type check; * exclude examples from conf.py * post-merge fixup * add other example * fixup example; * noop
1 parent 56f56a7 commit 0999051

File tree

10 files changed

+175
-40
lines changed

10 files changed

+175
-40
lines changed

Diff for: .github/workflows/mypy.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ jobs:
2929
- name: install-reqs
3030
run: python -m pip install --upgrade mypy==1.4.0
3131
- name: run mypy
32-
run: cd spec/API_specification && mypy dataframe_api
32+
run: cd spec/API_specification && mypy dataframe_api && mypy examples

Diff for: spec/API_specification/dataframe_api/_types.py

+118-4
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,22 @@
33
"""
44
from __future__ import annotations
55

6-
from dataclasses import dataclass
76
from typing import (
7+
TYPE_CHECKING,
88
Any,
99
List,
1010
Literal,
11+
Mapping,
1112
Optional,
13+
Protocol,
1214
Sequence,
1315
Tuple,
1416
Union,
15-
TYPE_CHECKING,
1617
)
17-
from enum import Enum
18+
19+
if TYPE_CHECKING:
20+
from .dataframe_object import DataFrame as DataFrameType
21+
from .column_object import Column as ColumnType
1822

1923
if TYPE_CHECKING:
2024
from .dtypes import (
@@ -41,6 +45,117 @@
4145
NullType = Any
4246

4347

48+
class Namespace(Protocol):
49+
__dataframe_api_version__: str
50+
51+
@staticmethod
52+
def DataFrame() -> DataFrameType:
53+
...
54+
55+
@staticmethod
56+
def Column() -> ColumnType:
57+
...
58+
59+
@staticmethod
60+
def Int64() -> Int64:...
61+
@staticmethod
62+
def Int16() -> Int16:...
63+
64+
@staticmethod
65+
def Int32() -> Int32:
66+
...
67+
68+
69+
@staticmethod
70+
def Int8() -> Int8:
71+
...
72+
73+
@staticmethod
74+
def UInt64() -> UInt64:
75+
...
76+
77+
@staticmethod
78+
def UInt32() -> UInt32:
79+
...
80+
81+
@staticmethod
82+
def UInt16() -> UInt16:
83+
...
84+
85+
@staticmethod
86+
def UInt8() -> UInt8:
87+
...
88+
89+
@staticmethod
90+
def Float64() -> Float64:
91+
...
92+
93+
@staticmethod
94+
def Float32() -> Float32:
95+
...
96+
97+
@staticmethod
98+
def Bool() -> Bool:
99+
...
100+
101+
@staticmethod
102+
def concat(dataframes: Sequence[DataFrameType]) -> DataFrameType:
103+
...
104+
105+
@staticmethod
106+
def column_from_sequence(
107+
sequence: Sequence[Any],
108+
*,
109+
dtype: Any,
110+
name: str = "",
111+
api_version: str | None = None,
112+
) -> ColumnType:
113+
...
114+
115+
@staticmethod
116+
def dataframe_from_dict(
117+
data: Mapping[str, ColumnType], *, api_version: str | None = None
118+
) -> DataFrameType:
119+
...
120+
121+
@staticmethod
122+
def column_from_1d_array(
123+
array: Any, *, dtype: Any, name: str = "", api_version: str | None = None
124+
) -> ColumnType:
125+
...
126+
127+
@staticmethod
128+
def dataframe_from_2d_array(
129+
array: Any,
130+
*,
131+
names: Sequence[str],
132+
dtypes: Mapping[str, Any],
133+
api_version: str | None = None,
134+
) -> DataFrameType:
135+
...
136+
137+
@staticmethod
138+
def is_null(value: object, /) -> bool:
139+
...
140+
141+
@staticmethod
142+
def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool:
143+
...
144+
145+
146+
class SupportsDataFrameAPI(Protocol):
147+
def __dataframe_consortium_standard__(
148+
self, *, api_version: str | None = None
149+
) -> DataFrameType:
150+
...
151+
152+
class SupportsColumnAPI(Protocol):
153+
def __column_consortium_standard__(
154+
self, *, api_version: str | None = None
155+
) -> ColumnType:
156+
...
157+
158+
44159
__all__ = [
45160
"Any",
46161
"DataFrame",
@@ -58,5 +173,4 @@
58173
"device",
59174
"DType",
60175
"ellipsis",
61-
"Enum",
62176
]

Diff for: spec/API_specification/dataframe_api/column_object.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Any,NoReturn, TYPE_CHECKING, Literal, Generic
44

55
if TYPE_CHECKING:
6-
from ._types import NullType, Scalar, DType
6+
from ._types import NullType, Scalar, DType, Namespace
77

88

99
__all__ = ['Column']
@@ -19,7 +19,7 @@ class Column:
1919
2020
"""
2121

22-
def __column_namespace__(self) -> Any:
22+
def __column_namespace__(self) -> Namespace:
2323
"""
2424
Returns an object that has all the Dataframe Standard API functions on it.
2525

Diff for: spec/API_specification/dataframe_api/dataframe_object.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from .column_object import Column
88
from .groupby_object import GroupBy
9-
from ._types import NullType, Scalar, DType
9+
from ._types import NullType, Scalar, Namespace, DType
1010

1111

1212
__all__ = ["DataFrame"]
@@ -36,7 +36,7 @@ class DataFrame:
3636
**Methods and Attributes**
3737
3838
"""
39-
def __dataframe_namespace__(self) -> Any:
39+
def __dataframe_namespace__(self) -> Namespace:
4040
"""
4141
Returns an object that has all the top-level dataframe API functions on it.
4242
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from typing import Any
2+
3+
from dataframe_api._types import SupportsDataFrameAPI
4+
5+
def my_dataframe_agnostic_function(df_non_standard: SupportsDataFrameAPI) -> Any:
6+
df = df_non_standard.__dataframe_consortium_standard__(api_version='2023.09-beta')
7+
8+
for column_name in df.column_names:
9+
if column_name == 'species':
10+
continue
11+
new_column = df.get_column_by_name(column_name)
12+
new_column = (new_column - new_column.mean()) / new_column.std()
13+
df = df.assign(new_column.rename(f'{column_name}_scaled'))
14+
15+
return df.dataframe

Diff for: spec/API_specification/examples/02_plotting.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from typing import Callable, Any
2+
3+
my_plotting_function: Callable[[Any, Any], Any]
4+
5+
from dataframe_api._types import SupportsColumnAPI
6+
7+
def group_by_and_plot(
8+
x_any: SupportsColumnAPI,
9+
y_any: SupportsColumnAPI,
10+
color_any: SupportsColumnAPI,
11+
) -> None:
12+
x = x_any.__column_consortium_standard__()
13+
y = y_any.__column_consortium_standard__()
14+
color = color_any.__column_consortium_standard__()
15+
16+
namespace = x.__column_namespace__()
17+
18+
df = namespace.dataframe_from_dict({"x": x, "y": y, "color": color})
19+
20+
agg = df.group_by("color").mean()
21+
x = agg.get_column_by_name("x").to_array_object(namespace.Float64())
22+
y = agg.get_column_by_name("y").to_array_object(namespace.Float64())
23+
24+
my_plotting_function(x, y)

Diff for: spec/API_specification/examples/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Examples
2+
3+
Here are some examples of how to use the DataFrame API.
4+
5+
These should work for any library which has an implemenation of the Standard.

Diff for: spec/API_specification/examples/__init__.py

Whitespace-only changes.

Diff for: spec/conf.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
('py:class', 'Bool'),
8686
('py:class', 'optional'),
8787
('py:class', 'NullType'),
88+
('py:class', 'Namespace'),
8889
]
8990
# NOTE: this alias handling isn't used yet - added in anticipation of future
9091
# need based on dataframe API aliases.
@@ -112,7 +113,12 @@
112113
# List of patterns, relative to source directory, that match files and
113114
# directories to ignore when looking for source files.
114115
# This pattern also affects html_static_path and html_extra_path.
115-
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
116+
exclude_patterns = [
117+
'_build',
118+
'Thumbs.db',
119+
'.DS_Store',
120+
'API_specification/examples/*',
121+
]
116122

117123
# MyST options
118124
myst_heading_anchors = 3

Diff for: spec/purpose_and_scope.md

+1-30
Original file line numberDiff line numberDiff line change
@@ -275,36 +275,7 @@ latest version of the dataframe API specification. If the given
275275
version is invalid or not implemented for the given module, an
276276
error should be raised. Default: ``None``.
277277

278-
Example:
279-
280-
```python
281-
import pandas as pd
282-
import polars as pl
283-
284-
285-
df_pandas = pd.read_parquet('iris.parquet')
286-
df_polars = pl.scan_parquet('iris.parquet')
287-
288-
def my_dataframe_agnostic_function(df):
289-
df = df.__dataframe_consortium_standard__(api_version='2023.09-beta')
290-
291-
mask = df.get_column_by_name('species') != 'setosa'
292-
df = df.filter(mask)
293-
294-
for column_name in df.column_names:
295-
if column_name == 'species':
296-
continue
297-
new_column = df.get_column_by_name(column_name)
298-
new_column = (new_column - new_column.mean()) / new_column.std()
299-
df = df.assign(new_column.rename(f'{column_name}_scaled'))
300-
301-
return df.dataframe
302-
303-
# Then, either of the following will work as expected:
304-
my_dataframe_agnostic_function(df_pandas)
305-
my_dataframe_agnostic_function(df_polars)
306-
my_dataframe_agnostic_function(df_any_other_library_with_a_standard_compliant_namespace)
307-
```
278+
For some examples, please check https://github.com/data-apis/dataframe-api/tree/main/spec/examples.
308279

309280
### Checking a dataframe object for Compliance
310281

0 commit comments

Comments
 (0)