diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 2b43e115..d7d15dd6 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -3,35 +3,41 @@ """ from __future__ import annotations -from dataclasses import dataclass from typing import ( + TYPE_CHECKING, Any, List, Literal, + Mapping, Optional, + Protocol, Sequence, Tuple, Union, - TYPE_CHECKING, ) -from enum import Enum + +if TYPE_CHECKING: + from .column_object import Column as ColumnType + from .dataframe_object import DataFrame as DataFrameType if TYPE_CHECKING: from .dtypes import ( Bool, - Float64, Float32, - Int64, - Int32, - Int16, + Float64, Int8, - UInt64, - UInt32, - UInt16, + Int16, + Int32, + Int64, UInt8, + UInt16, + UInt32, + UInt64, ) - DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] + DType = Union[ + Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8 + ] # Type alias: Mypy needs Any, but for readability we need to make clear this # is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.) @@ -41,6 +47,113 @@ NullType = Any +class Namespace(Protocol): + __dataframe_api_version__: str + + @staticmethod + def DataFrame() -> DataFrameType: + ... + + @staticmethod + def Column() -> ColumnType: + ... + + @staticmethod + def Int64() -> Int64: + ... + + @staticmethod + def Int32() -> Int32: + ... + + @staticmethod + def Int16() -> Int16: + ... + + @staticmethod + def Int8() -> Int8: + ... + + @staticmethod + def UInt64() -> UInt64: + ... + + @staticmethod + def UInt32() -> UInt32: + ... + + @staticmethod + def UInt16() -> UInt16: + ... + + @staticmethod + def UInt8() -> UInt8: + ... + + @staticmethod + def Float64() -> Float64: + ... + + @staticmethod + def Float32() -> Float32: + ... + + @staticmethod + def Bool() -> Bool: + ... + + @staticmethod + def concat(dataframes: Sequence[DataFrameType]) -> DataFrameType: + ... + + @staticmethod + def column_from_sequence( + sequence: Sequence[Any], + *, + dtype: Any, + name: str = "", + api_version: str | None = None, + ) -> ColumnType: + ... + + @staticmethod + def dataframe_from_dict( + data: Mapping[str, ColumnType], *, api_version: str | None = None + ) -> DataFrameType: + ... + + @staticmethod + def column_from_1d_array( + array: Any, *, dtype: Any, name: str = "", api_version: str | None = None + ) -> ColumnType: + ... + + @staticmethod + def dataframe_from_2d_array( + array: Any, + *, + names: Sequence[str], + dtypes: Mapping[str, Any], + api_version: str | None = None, + ) -> DataFrameType: + ... + + @staticmethod + def is_null(value: object, /) -> bool: + ... + + @staticmethod + def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool: + ... + + +class SupportsDataFrameAPI(Protocol): + def __dataframe_consortium_standard__( + self, *, api_version: str | None = None + ) -> DataFrameType: + ... + + __all__ = [ "Any", "DataFrame", @@ -58,5 +171,4 @@ "device", "DType", "ellipsis", - "Enum", ] diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 54e380d8..e8a9a21e 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from .column_object import Column from .groupby_object import GroupBy - from ._types import NullType, Scalar, DType + from ._types import NullType, Scalar, Namespace, DType __all__ = ["DataFrame"] @@ -36,7 +36,7 @@ class DataFrame: **Methods and Attributes** """ - def __dataframe_namespace__(self) -> Any: + def __dataframe_namespace__(self) -> Namespace: """ Returns an object that has all the top-level dataframe API functions on it. diff --git a/spec/conf.py b/spec/conf.py index c3aabb4d..94782518 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -85,6 +85,7 @@ ('py:class', 'Bool'), ('py:class', 'optional'), ('py:class', 'NullType'), + ('py:class', 'Namespace'), ] # NOTE: this alias handling isn't used yet - added in anticipation of future # need based on dataframe API aliases.