Skip to content

Commit 66330ee

Browse files
committed
BUG: Fix factorize to ensure proper use of null_encoding parameter and backwards compatibility maintained
1 parent 166405d commit 66330ee

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

pandas/core/arrays/arrow/array.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1207,9 +1207,15 @@ def factorize(
12071207
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
12081208
data = data.cast(pa.int64())
12091209

1210-
if pa.types.is_dictionary(data.type) and null_encoding == "encode":
1211-
data = data.cast(data.type.value_type)
1212-
encoded = data.dictionary_encode(null_encoding=null_encoding)
1210+
if pa.types.is_dictionary(data.type):
1211+
if null_encoding == "encode":
1212+
# dictionary encode does nothing if an already encoded array is given
1213+
data = data.cast(data.type.value_type)
1214+
encoded = data.dictionary_encode(null_encoding=null_encoding)
1215+
else:
1216+
encoded = data
1217+
else:
1218+
encoded = data.dictionary_encode(null_encoding=null_encoding)
12131219
if encoded.length() == 0:
12141220
indices = np.array([], dtype=np.intp)
12151221
uniques = type(self)(pa.chunked_array([], type=encoded.type.value_type))

0 commit comments

Comments
 (0)