Skip to content

Commit a576fc9

Browse files
castediceFokko
andauthored
Arrow: Support Large Binary when using to_arrow (#409)
* Arrow: Support Large Binary * Merge with binary --------- Co-authored-by: Fokko Driesprong <[email protected]>
1 parent 2e67308 commit a576fc9

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType:
533533
return pa.binary(16)
534534

535535
def visit_binary(self, _: BinaryType) -> pa.DataType:
536-
return pa.binary()
536+
return pa.large_binary()
537537

538538

539539
def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
@@ -882,7 +882,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
882882
return TimestamptzType()
883883
elif primitive.tz is None:
884884
return TimestampType()
885-
elif pa.types.is_binary(primitive):
885+
elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive):
886886
return BinaryType()
887887
elif pa.types.is_fixed_size_binary(primitive):
888888
primitive = cast(pa.FixedSizeBinaryType, primitive)

tests/integration/test_writes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def pa_schema() -> pa.Schema:
140140
# ("time", pa.time64("us")),
141141
# Not natively supported by Arrow
142142
# ("uuid", pa.fixed(16)),
143-
("binary", pa.binary()),
143+
("binary", pa.large_binary()),
144144
("fixed", pa.binary(16)),
145145
])
146146

tests/io/test_pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def test_string_type_to_pyarrow() -> None:
467467

468468
def test_binary_type_to_pyarrow() -> None:
469469
iceberg_type = BinaryType()
470-
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary()
470+
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
471471

472472

473473
def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:

tests/io/test_pyarrow_visitor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def test_pyarrow_string_to_iceberg() -> None:
215215

216216

217217
def test_pyarrow_variable_binary_to_iceberg() -> None:
218-
pyarrow_type = pa.binary()
218+
pyarrow_type = pa.large_binary()
219219
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
220220
assert converted_iceberg_type == BinaryType()
221221
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type

0 commit comments

Comments
 (0)