File tree 1 file changed +9
-3
lines changed
1 file changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -1207,9 +1207,15 @@ def factorize(
1207
1207
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1208
1208
data = data .cast (pa .int64 ())
1209
1209
1210
- if pa .types .is_dictionary (data .type ) and null_encoding == "encode" :
1211
- data = data .cast (data .type .value_type )
1212
- encoded = data .dictionary_encode (null_encoding = null_encoding )
1210
+ if pa .types .is_dictionary (data .type ):
1211
+ if null_encoding == "encode" :
1212
+ # dictionary encode does nothing if an already encoded array is given
1213
+ data = data .cast (data .type .value_type )
1214
+ encoded = data .dictionary_encode (null_encoding = null_encoding )
1215
+ else :
1216
+ encoded = data
1217
+ else :
1218
+ encoded = data .dictionary_encode (null_encoding = null_encoding )
1213
1219
if encoded .length () == 0 :
1214
1220
indices = np .array ([], dtype = np .intp )
1215
1221
uniques = type (self )(pa .chunked_array ([], type = encoded .type .value_type ))
You can’t perform that action at this time.
0 commit comments