Skip to content

Add examples #267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Sep 28, 2023
2 changes: 1 addition & 1 deletion .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ jobs:
- name: install-reqs
run: python -m pip install --upgrade mypy==1.4.0
- name: run mypy
run: cd spec/API_specification && mypy dataframe_api
run: cd spec/API_specification && mypy dataframe_api && mypy examples
122 changes: 118 additions & 4 deletions spec/API_specification/dataframe_api/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@
"""
from __future__ import annotations

from dataclasses import dataclass
from typing import (
TYPE_CHECKING,
Any,
List,
Literal,
Mapping,
Optional,
Protocol,
Sequence,
Tuple,
Union,
TYPE_CHECKING,
)
from enum import Enum

if TYPE_CHECKING:
from .dataframe_object import DataFrame as DataFrameType
from .column_object import Column as ColumnType

if TYPE_CHECKING:
from .dtypes import (
Expand All @@ -41,6 +45,117 @@
NullType = Any


class Namespace(Protocol):
__dataframe_api_version__: str

@staticmethod
def DataFrame() -> DataFrameType:
...

@staticmethod
def Column() -> ColumnType:
...

@staticmethod
def Int64() -> Int64:...
@staticmethod
def Int16() -> Int16:...

@staticmethod
def Int32() -> Int32:
...


@staticmethod
def Int8() -> Int8:
...

@staticmethod
def UInt64() -> UInt64:
...

@staticmethod
def UInt32() -> UInt32:
...

@staticmethod
def UInt16() -> UInt16:
...

@staticmethod
def UInt8() -> UInt8:
...

@staticmethod
def Float64() -> Float64:
...

@staticmethod
def Float32() -> Float32:
...

@staticmethod
def Bool() -> Bool:
...

@staticmethod
def concat(dataframes: Sequence[DataFrameType]) -> DataFrameType:
...

@staticmethod
def column_from_sequence(
sequence: Sequence[Any],
*,
dtype: Any,
name: str = "",
api_version: str | None = None,
) -> ColumnType:
...

@staticmethod
def dataframe_from_dict(
data: Mapping[str, ColumnType], *, api_version: str | None = None
) -> DataFrameType:
...

@staticmethod
def column_from_1d_array(
array: Any, *, dtype: Any, name: str = "", api_version: str | None = None
) -> ColumnType:
...

@staticmethod
def dataframe_from_2d_array(
array: Any,
*,
names: Sequence[str],
dtypes: Mapping[str, Any],
api_version: str | None = None,
) -> DataFrameType:
...

@staticmethod
def is_null(value: object, /) -> bool:
...

@staticmethod
def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool:
...


class SupportsDataFrameAPI(Protocol):
def __dataframe_consortium_standard__(
self, *, api_version: str | None = None
) -> DataFrameType:
...

class SupportsColumnAPI(Protocol):
def __column_consortium_standard__(
self, *, api_version: str | None = None
) -> ColumnType:
...


__all__ = [
"Any",
"DataFrame",
Expand All @@ -58,5 +173,4 @@
"device",
"DType",
"ellipsis",
"Enum",
]
4 changes: 2 additions & 2 deletions spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any,NoReturn, TYPE_CHECKING, Literal, Generic

if TYPE_CHECKING:
from ._types import NullType, Scalar, DType
from ._types import NullType, Scalar, DType, Namespace


__all__ = ['Column']
Expand All @@ -19,7 +19,7 @@ class Column:

"""

def __column_namespace__(self) -> Any:
def __column_namespace__(self) -> Namespace:
"""
Returns an object that has all the Dataframe Standard API functions on it.

Expand Down
4 changes: 2 additions & 2 deletions spec/API_specification/dataframe_api/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
if TYPE_CHECKING:
from .column_object import Column
from .groupby_object import GroupBy
from ._types import NullType, Scalar, DType
from ._types import NullType, Scalar, Namespace, DType


__all__ = ["DataFrame"]
Expand Down Expand Up @@ -36,7 +36,7 @@ class DataFrame:
**Methods and Attributes**

"""
def __dataframe_namespace__(self) -> Any:
def __dataframe_namespace__(self) -> Namespace:
"""
Returns an object that has all the top-level dataframe API functions on it.

Expand Down
15 changes: 15 additions & 0 deletions spec/API_specification/examples/01_standardise_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import Any

from dataframe_api._types import SupportsDataFrameAPI

def my_dataframe_agnostic_function(df_non_standard: SupportsDataFrameAPI) -> Any:
df = df_non_standard.__dataframe_consortium_standard__(api_version='2023.09-beta')

for column_name in df.column_names:
if column_name == 'species':
continue
new_column = df.get_column_by_name(column_name)
new_column = (new_column - new_column.mean()) / new_column.std()
df = df.assign(new_column.rename(f'{column_name}_scaled'))

return df.dataframe
24 changes: 24 additions & 0 deletions spec/API_specification/examples/02_plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Callable, Any

my_plotting_function: Callable[[Any, Any], Any]

from dataframe_api._types import SupportsColumnAPI

def group_by_and_plot(
x_any: SupportsColumnAPI,
y_any: SupportsColumnAPI,
color_any: SupportsColumnAPI,
) -> None:
x = x_any.__column_consortium_standard__()
y = y_any.__column_consortium_standard__()
color = color_any.__column_consortium_standard__()

namespace = x.__column_namespace__()

df = namespace.dataframe_from_dict({"x": x, "y": y, "color": color})

agg = df.group_by("color").mean()
x = agg.get_column_by_name("x").to_array_object(namespace.Float64())
y = agg.get_column_by_name("y").to_array_object(namespace.Float64())

my_plotting_function(x, y)
5 changes: 5 additions & 0 deletions spec/API_specification/examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Examples

Here are some examples of how to use the DataFrame API.

These should work for any library which has an implemenation of the Standard.
Empty file.
8 changes: 7 additions & 1 deletion spec/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
('py:class', 'Bool'),
('py:class', 'optional'),
('py:class', 'NullType'),
('py:class', 'Namespace'),
]
# NOTE: this alias handling isn't used yet - added in anticipation of future
# need based on dataframe API aliases.
Expand Down Expand Up @@ -112,7 +113,12 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = [
'_build',
'Thumbs.db',
'.DS_Store',
'API_specification/examples/*',
]

# MyST options
myst_heading_anchors = 3
Expand Down
31 changes: 1 addition & 30 deletions spec/purpose_and_scope.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,36 +275,7 @@ latest version of the dataframe API specification. If the given
version is invalid or not implemented for the given module, an
error should be raised. Default: ``None``.

Example:

```python
import pandas as pd
import polars as pl


df_pandas = pd.read_parquet('iris.parquet')
df_polars = pl.scan_parquet('iris.parquet')

def my_dataframe_agnostic_function(df):
df = df.__dataframe_consortium_standard__(api_version='2023.09-beta')

mask = df.get_column_by_name('species') != 'setosa'
df = df.filter(mask)

for column_name in df.column_names:
if column_name == 'species':
continue
new_column = df.get_column_by_name(column_name)
new_column = (new_column - new_column.mean()) / new_column.std()
df = df.assign(new_column.rename(f'{column_name}_scaled'))

return df.dataframe

# Then, either of the following will work as expected:
my_dataframe_agnostic_function(df_pandas)
my_dataframe_agnostic_function(df_polars)
my_dataframe_agnostic_function(df_any_other_library_with_a_standard_compliant_namespace)
```
For some examples, please check https://github.com/data-apis/dataframe-api/tree/main/spec/examples.

### Checking a dataframe object for Compliance

Expand Down