narwhals-dev · MarcoGorelli · Feb 10, 2025 · Jan 25, 2025 · Jan 25, 2025 · Jan 25, 2025
diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
@@ -141,8 +141,13 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> pa
             ]
         )
     if isinstance_or_issubclass(dtype, dtypes.Array):  # pragma: no cover
-        msg = "Converting to Array dtype is not supported yet"
-        return NotImplementedError(msg)
+        inner = narwhals_to_native_dtype(
+            dtype.inner,  # type: ignore[union-attr]
+            version=version,
+        )
+        list_size = dtype.size  # type: ignore[union-attr]
+        return pa.list_(inner, list_size=list_size)
+
     msg = f"Unknown dtype: {dtype}"  # pragma: no cover
     raise AssertionError(msg)
 
@@ -220,7 +225,7 @@ def broadcast_and_extract_dataframe_comparand(
 
     if isinstance(other, ArrowSeries):
         len_other = len(other)
-        if len_other == 1:
+        if len_other == 1 and length != 1:
             import numpy as np  # ignore-banned-import
 
             value = other._native_series[0]

diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py
@@ -133,10 +133,13 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType:
         )
     if match_ := re.match(r"(.*)\[\]$", duckdb_dtype):
         return dtypes.List(native_to_narwhals_dtype(match_.group(1), version))
-    if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype):
+    if match_ := re.match(r"(\w+)((?:\[\d+\])+)", duckdb_dtype):
+        duckdb_inner_type = match_.group(1)
+        duckdb_shape = match_.group(2)
+        shape = tuple(int(value) for value in re.findall(r"\[(\d+)\]", duckdb_shape))
         return dtypes.Array(
-            native_to_narwhals_dtype(match_.group(1), version),
-            int(match_.group(2)),
+            inner=native_to_narwhals_dtype(duckdb_inner_type, version),
+            shape=shape,
         )
     if duckdb_dtype.startswith("DECIMAL("):
         return dtypes.Decimal()
@@ -193,8 +196,11 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> st
         )
         return f"STRUCT({inner})"
     if isinstance_or_issubclass(dtype, dtypes.Array):  # pragma: no cover
-        msg = "todo"
-        raise NotImplementedError(msg)
+        duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape)  # type: ignore[union-attr]
+        while isinstance(dtype.inner, dtypes.Array):  # type: ignore[union-attr]
+            dtype = dtype.inner  # type: ignore[union-attr]
+        inner = narwhals_to_native_dtype(dtype.inner, version)  # type: ignore[union-attr]
+        return f"{inner}{duckdb_shape_fmt}"
     msg = f"Unknown dtype: {dtype}"  # pragma: no cover
     raise AssertionError(msg)
 

diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
@@ -160,7 +160,7 @@ def broadcast_and_extract_dataframe_comparand(index: Any, other: Any) -> Any:
     if isinstance(other, PandasLikeSeries):
         len_other = other.len()
 
-        if len_other == 1:
+        if len_other == 1 and len(index) != 1:
             # broadcast
             s = other._native_series
             return s.__class__(s.iloc[0], index=index, dtype=s.dtype, name=s.name)
@@ -387,9 +387,7 @@ def rename(
 
 
 @lru_cache(maxsize=16)
-def non_object_native_to_narwhals_dtype(
-    dtype: str, version: Version, _implementation: Implementation
-) -> DType:
+def non_object_native_to_narwhals_dtype(dtype: str, version: Version) -> DType:
     dtypes = import_dtypes_module(version)
     if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
         return dtypes.Int64()
@@ -465,7 +463,7 @@ def native_to_narwhals_dtype(
             return arrow_native_to_narwhals_dtype(native_dtype.to_arrow(), version)
         return arrow_native_to_narwhals_dtype(native_dtype.pyarrow_dtype, version)
     if dtype != "object":
-        return non_object_native_to_narwhals_dtype(dtype, version, implementation)
+        return non_object_native_to_narwhals_dtype(dtype, version)
     if implementation is Implementation.DASK:
         # Dask columns are lazy, so we can't inspect values.
         # The most useful assumption is probably String
@@ -649,68 +647,25 @@ def narwhals_to_native_dtype(  # noqa: PLR0915
     if isinstance_or_issubclass(dtype, dtypes.Enum):
         msg = "Converting to Enum is not (yet) supported"
         raise NotImplementedError(msg)
-    if isinstance_or_issubclass(dtype, dtypes.List):
-        from narwhals._arrow.utils import (
-            narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
-        )
-
-        if implementation is Implementation.PANDAS and backend_version >= (2, 2):
-            try:
-                import pandas as pd
-                import pyarrow as pa  # ignore-banned-import
-            except ImportError as exc:  # pragma: no cover
-                msg = f"Unable to convert to {dtype} to to the following exception: {exc.msg}"
-                raise ImportError(msg) from exc
-
-            return pd.ArrowDtype(
-                pa.list_(
-                    value_type=arrow_narwhals_to_native_dtype(
-                        dtype.inner,  # type: ignore[union-attr]
-                        version=version,
-                    )
-                )
-            )
-        else:  # pragma: no cover
-            msg = (
-                "Converting to List dtype is not supported for implementation "
-                f"{implementation} and version {version}."
-            )
-            return NotImplementedError(msg)
-    if isinstance_or_issubclass(dtype, dtypes.Struct):
+    if isinstance_or_issubclass(dtype, (dtypes.Struct, dtypes.Array, dtypes.List)):
         if implementation is Implementation.PANDAS and backend_version >= (2, 2):
             try:
                 import pandas as pd
-                import pyarrow as pa  # ignore-banned-import
+                import pyarrow as pa  # ignore-banned-import  # noqa: F401
             except ImportError as exc:  # pragma: no cover
                 msg = f"Unable to convert to {dtype} to to the following exception: {exc.msg}"
                 raise ImportError(msg) from exc
             from narwhals._arrow.utils import (
                 narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
             )
 
-            return pd.ArrowDtype(
-                pa.struct(
-                    [
-                        (
-                            field.name,
-                            arrow_narwhals_to_native_dtype(
-                                field.dtype,
-                                version=version,
-                            ),
-                        )
-                        for field in dtype.fields  # type: ignore[union-attr]
-                    ]
-                )
-            )
+            return pd.ArrowDtype(arrow_narwhals_to_native_dtype(dtype, version=version))
         else:  # pragma: no cover
             msg = (
-                "Converting to Struct dtype is not supported for implementation "
+                f"Converting to {dtype} dtype is not supported for implementation "
                 f"{implementation} and version {version}."
             )
-            return NotImplementedError(msg)
-    if isinstance_or_issubclass(dtype, dtypes.Array):  # pragma: no cover
-        msg = "Converting to Array dtype is not supported yet"
-        return NotImplementedError(msg)
+            raise NotImplementedError(msg)
     msg = f"Unknown dtype: {dtype}"  # pragma: no cover
     raise AssertionError(msg)
 

diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py
@@ -132,16 +132,11 @@ def native_to_narwhals_dtype(
             native_to_narwhals_dtype(dtype.inner, version, backend_version)  # type: ignore[attr-defined]
         )
     if dtype == pl.Array:
-        if backend_version < (0, 20, 30):  # pragma: no cover
-            return dtypes.Array(
-                native_to_narwhals_dtype(dtype.inner, version, backend_version),  # type: ignore[attr-defined]
-                dtype.width,  # type: ignore[attr-defined]
-            )
-        else:
-            return dtypes.Array(
-                native_to_narwhals_dtype(dtype.inner, version, backend_version),  # type: ignore[attr-defined]
-                dtype.size,  # type: ignore[attr-defined]
-            )
+        outer_shape = dtype.width if backend_version < (0, 20, 30) else dtype.size  # type: ignore[attr-defined]
+        return dtypes.Array(
+            inner=native_to_narwhals_dtype(dtype.inner, version, backend_version),  # type: ignore[attr-defined]
+            shape=outer_shape,
+        )
     if dtype == pl.Decimal:
         return dtypes.Decimal()
     return dtypes.Unknown()
@@ -205,8 +200,10 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> pl
             ]
         )
     if dtype == dtypes.Array:  # pragma: no cover
-        msg = "Converting to Array dtype is not supported yet"
-        raise NotImplementedError(msg)
+        return pl.Array(
+            inner=narwhals_to_native_dtype(dtype.inner, version),  # type: ignore[union-attr]
+            shape=dtype.size,  # type: ignore[union-attr]
+        )
     return pl.Unknown()  # pragma: no cover
 
 

diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py
@@ -41,26 +41,24 @@ def native_to_narwhals_dtype(
         return dtypes.Int16()
     if isinstance(dtype, pyspark_types.ByteType):
         return dtypes.Int8()
-    string_types = [
-        pyspark_types.StringType,
-        pyspark_types.VarcharType,
-        pyspark_types.CharType,
-    ]
-    if any(isinstance(dtype, t) for t in string_types):
+    if isinstance(
+        dtype,
+        (pyspark_types.StringType, pyspark_types.VarcharType, pyspark_types.CharType),
+    ):
         return dtypes.String()
     if isinstance(dtype, pyspark_types.BooleanType):
         return dtypes.Boolean()
     if isinstance(dtype, pyspark_types.DateType):
         return dtypes.Date()
-    datetime_types = [
-        pyspark_types.TimestampType,
-        pyspark_types.TimestampNTZType,
-    ]
-    if any(isinstance(dtype, t) for t in datetime_types):
+    if isinstance(dtype, (pyspark_types.TimestampType, pyspark_types.TimestampNTZType)):
         return dtypes.Datetime()
     if isinstance(dtype, pyspark_types.DecimalType):  # pragma: no cover
         # TODO(unassigned): cover this in dtypes_test.py
         return dtypes.Decimal()
+    if isinstance(dtype, pyspark_types.ArrayType):  # pragma: no cover
+        return dtypes.List(
+            inner=native_to_narwhals_dtype(dtype.elementType, version=version)
+        )
     return dtypes.Unknown()
 
 
@@ -97,8 +95,12 @@ def narwhals_to_native_dtype(
         msg = "Converting to Struct dtype is not supported yet"
         raise NotImplementedError(msg)
     if isinstance_or_issubclass(dtype, dtypes.Array):  # pragma: no cover
-        msg = "Converting to Array dtype is not supported yet"
-        raise NotImplementedError(msg)
+        inner = narwhals_to_native_dtype(
+            dtype.inner,  # type: ignore[union-attr]
+            version=version,
+        )
+        return pyspark_types.ArrayType(elementType=inner)
+
     if isinstance_or_issubclass(
         dtype, (dtypes.UInt64, dtypes.UInt32, dtypes.UInt16, dtypes.UInt8)
     ):  # pragma: no cover

diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py
@@ -731,14 +731,37 @@ class Array(DType):
         Array(Int32, 2)
     """
 
+    inner: DType | type[DType]
+    size: int
+    shape: tuple[int, ...]
+
     def __init__(
-        self: Self, inner: DType | type[DType], width: int | None = None
+        self: Self,
+        inner: DType | type[DType],
+        shape: int | tuple[int, ...] | None = None,
     ) -> None:
-        self.inner = inner
-        if width is None:
-            error = "`width` must be specified when initializing an `Array`"
-            raise TypeError(error)
-        self.width = width
+        inner_shape: tuple[int, ...] = inner.shape if isinstance(inner, Array) else ()
+
+        if shape is None:  # pragma: no cover
+            msg = "Array constructor is missing the required argument `shape`"
+            raise TypeError(msg)
+
+        if isinstance(shape, int):
+            self.inner = inner
+            self.size = shape
+            self.shape = (shape, *inner_shape)
+
+        elif isinstance(shape, tuple):
+            if len(shape) > 1:
+                inner = Array(inner, shape[1:])
+
+            self.inner = inner
+            self.size = shape[0]
+            self.shape = shape + inner_shape
+
+        else:
+            msg = f"invalid input for shape: {shape!r}"
+            raise TypeError(msg)
 
     def __eq__(self: Self, other: DType | type[DType]) -> bool:  # type: ignore[override]
         # This equality check allows comparison of type classes and type instances.
@@ -751,16 +774,24 @@ def __eq__(self: Self, other: DType | type[DType]) -> bool:  # type: ignore[over
         if type(other) is type and issubclass(other, self.__class__):
             return True
         elif isinstance(other, self.__class__):
-            return self.inner == other.inner
+            if self.shape != other.shape:
+                return False
+            else:
+                return self.inner == other.inner
         else:
             return False
 
     def __hash__(self: Self) -> int:
-        return hash((self.__class__, self.inner, self.width))
+        return hash((self.__class__, self.inner, self.shape))
+
+    def __repr__(self) -> str:
+        # Get leaf type
+        dtype = self.inner
+        while isinstance(dtype, Array):
+            dtype = dtype.inner
 elif isinstance(shape, tuple) and isinstance(shape[0], int): 
     if len(shape) > 1: 
         inner = Array(inner, shape[1:]) 
 elif isinstance(shape, tuple) and isinstance(shape[0], int): 
     if len(shape) > 1: 
         inner = Array(inner, shape[1:]) 
 
-    def __repr__(self: Self) -> str:
         class_name = self.__class__.__name__
-        return f"{class_name}({self.inner!r}, {self.width})"
+        return f"{class_name}({dtype!r}, shape={self.shape})"
 
 
 class Date(TemporalType):

diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py
@@ -3,6 +3,7 @@
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
+from typing import TYPE_CHECKING
 from typing import Literal
 
 import numpy as np
@@ -15,6 +16,9 @@
 from tests.utils import PANDAS_VERSION
 from tests.utils import POLARS_VERSION
 
+if TYPE_CHECKING:
+    from tests.utils import Constructor
+
 
 @pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])
 @pytest.mark.parametrize("time_zone", ["Europe/Rome", timezone.utc, None])
@@ -75,15 +79,15 @@ def test_array_valid() -> None:
     assert dtype == nw.Array
     assert dtype != nw.Array(nw.Float32, 2)
     assert dtype != nw.Duration
-    assert repr(dtype) == "Array(<class 'narwhals.dtypes.Int64'>, 2)"
+    assert repr(dtype) == "Array(<class 'narwhals.dtypes.Int64'>, shape=(2,))"
     dtype = nw.Array(nw.Array(nw.Int64, 2), 2)
     assert dtype == nw.Array(nw.Array(nw.Int64, 2), 2)
     assert dtype == nw.Array
     assert dtype != nw.Array(nw.Array(nw.Float32, 2), 2)
     assert dtype in {nw.Array(nw.Array(nw.Int64, 2), 2)}
 
     with pytest.raises(
-        TypeError, match="`width` must be specified when initializing an `Array`"
+        TypeError, match="Array constructor is missing the required argument `shape`"
     ):
         dtype = nw.Array(nw.Int64)
 
@@ -133,13 +137,23 @@ def test_polars_2d_array() -> None:
     df = pl.DataFrame(
         {"a": [[[1, 2], [3, 4], [5, 6]]]}, schema={"a": pl.Array(pl.Int64, (3, 2))}
     )
-    assert nw.from_native(df).collect_schema()["a"] == nw.Array(nw.Array(nw.Int64, 2), 3)
+    assert nw.from_native(df).collect_schema()["a"] == nw.Array(nw.Int64(), (3, 2))
     assert nw.from_native(df.to_arrow()).collect_schema()["a"] == nw.Array(
-        nw.Array(nw.Int64, 2), 3
+        nw.Array(nw.Int64(), 2), 3
     )
     assert nw.from_native(
         df.to_pandas(use_pyarrow_extension_array=True)
-    ).collect_schema()["a"] == nw.Array(nw.Array(nw.Int64, 2), 3)
+    ).collect_schema()["a"] == nw.Array(nw.Array(nw.Int64(), 2), 3)
+
+
+def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+    if any(x in str(constructor) for x in ("dask", "modin", "cudf", "pyspark")):
+        request.applymarker(pytest.mark.xfail)
+    data = {"a": [[[1, 2], [3, 4], [5, 6]]]}
+    df = nw.from_native(constructor(data)).with_columns(
+        a=nw.col("a").cast(nw.Array(nw.Int64(), (3, 2)))
+    )
+    assert df.collect_schema()["a"] == nw.Array(nw.Int64(), (3, 2))
 
 
 def test_second_time_unit() -> None: