Skip to content

Commit f375858

Browse files
committed
feat: add to_dicts
fixes #9185
1 parent dc23b9f commit f375858

File tree

3 files changed

+92
-1
lines changed

3 files changed

+92
-1
lines changed

Diff for: ibis/backends/__init__.py

+38
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,44 @@ def to_delta(
586586
with expr.to_pyarrow_batches(params=params) as batch_reader:
587587
write_deltalake(path, batch_reader, **kwargs)
588588

589+
@util.experimental
590+
def to_dicts(
591+
self, expr: ir.Table, *, chunk_size: int = 1_000_000
592+
) -> Iterable[dict[str, Any]]:
593+
"""Iterate through each row as a `dict` of column_name -> value.
594+
595+
Parameters
596+
----------
597+
expr
598+
The ibis expression to materialize as an iterable of row dictionaries.
599+
chunk_size
600+
We materialize the results in chunks of this size, to keep memory usage under control.
601+
Larger values probably will be faster but consume more memory.
602+
603+
Returns
604+
-------
605+
Iterable[dict[str, Any]]
606+
An iterator of dictionaries, each representing a row in the table.
607+
608+
Examples
609+
--------
610+
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
611+
>>> list(t.to_dicts())
612+
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
613+
614+
Single Columns are returned as dictionaries with a single key:
615+
616+
>>> column = t.i
617+
>>> list(column.to_dicts())
618+
[{'i': 1}, {'i': 2}, {'i': 3}]
619+
620+
See Also
621+
--------
622+
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
623+
"""
624+
for batch in self.to_pyarrow_batches(expr, chunk_size=chunk_size):
625+
yield from batch.to_pylist()
626+
589627
@util.experimental
590628
def to_json(
591629
self,

Diff for: ibis/backends/tests/test_export.py

+18
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,24 @@ def test_table_to_csv(tmp_path, backend, awards_players):
350350
backend.assert_frame_equal(awards_players.to_pandas(), df)
351351

352352

353+
@pytest.mark.parametrize("chunk_size", [1, 1000])
354+
def test_to_dicts(con, chunk_size):
355+
t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
356+
t = con.create_table("to_dicts_table", t, overwrite=True)
357+
358+
result = list(t.to_dicts(chunk_size=chunk_size))
359+
expected = [{"i": 1, "s": "a"}, {"i": 2, "s": "b"}, {"i": 3, "s": "c"}]
360+
assert result == expected
361+
362+
result = list(t.limit(0).to_dicts(chunk_size=chunk_size))
363+
expected = []
364+
assert result == expected
365+
366+
result = list(t.i.to_dicts(chunk_size=chunk_size))
367+
expected = [{"i": 1}, {"i": 2}, {"i": 3}]
368+
assert result == expected
369+
370+
353371
@pytest.mark.notimpl(
354372
[
355373
"athena",

Diff for: ibis/expr/types/core.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from ibis.util import experimental
2121

2222
if TYPE_CHECKING:
23-
from collections.abc import Iterator, Mapping
23+
from collections.abc import Iterable, Iterator, Mapping
2424
from pathlib import Path
2525

2626
import pandas as pd
@@ -771,6 +771,41 @@ def to_delta(
771771
"""
772772
self._find_backend(use_default=True).to_delta(self, path, **kwargs)
773773

774+
@experimental
775+
def to_dicts(self, *, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
776+
"""Iterate through each row as a `dict` of column_name -> value.
777+
778+
Parameters
779+
----------
780+
chunk_size
781+
We materialize the results in chunks of this size, to keep memory usage under control.
782+
Larger values probably will be faster but consume more memory.
783+
784+
Returns
785+
-------
786+
Iterable[dict[str, Any]]
787+
An iterator of dictionaries, each representing a row in the table.
788+
789+
Examples
790+
--------
791+
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
792+
>>> list(t.to_dicts())
793+
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
794+
795+
Single Columns are returned as dictionaries with a single key:
796+
797+
>>> column = t.i
798+
>>> list(column.to_dicts())
799+
[{'i': 1}, {'i': 2}, {'i': 3}]
800+
801+
See Also
802+
--------
803+
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
804+
"""
805+
return self._find_backend(use_default=True).to_dicts(
806+
self, chunk_size=chunk_size
807+
)
808+
774809
@experimental
775810
def to_json(
776811
self,

0 commit comments

Comments
 (0)