Skip to content

Commit 0199dad

Browse files
LilithHafnerKristofferC
authored and
KristofferC
committed
Fix sorting bugs (esp MissingOptimization) that come up when using SortingAlgorithms.TimSort (#50171)
(cherry picked from commit ba251e8)
1 parent 0cfea7d commit 0199dad

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

base/sort.jl

+15-14
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ export # not exported by Base
4343
SMALL_ALGORITHM,
4444
SMALL_THRESHOLD
4545

46+
abstract type Algorithm end
4647

4748
## functions requiring only ordering ##
4849

@@ -420,7 +421,7 @@ for (sym, exp, type) in [
420421
(:mn, :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))),
421422
(:mx, :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))),
422423
(:scratch, nothing, :(Union{Nothing, Vector})), # could have different eltype
423-
(:allow_legacy_dispatch, true, Bool)]
424+
(:legacy_dispatch_entry, nothing, Union{Nothing, Algorithm})]
424425
usym = Symbol(:_, sym)
425426
@eval function $usym(v, o, kw)
426427
# using missing instead of nothing because scratch could === nothing.
@@ -483,8 +484,6 @@ internal or recursive calls.
483484
"""
484485
function _sort! end
485486

486-
abstract type Algorithm end
487-
488487

489488
"""
490489
MissingOptimization(next) <: Algorithm
@@ -508,12 +507,12 @@ struct WithoutMissingVector{T, U} <: AbstractVector{T}
508507
new{nonmissingtype(eltype(data)), typeof(data)}(data)
509508
end
510509
end
511-
Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i)
510+
Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer)
512511
out = v.data[i]
513512
@assert !(out isa Missing)
514513
out::eltype(v)
515514
end
516-
Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i)
515+
Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i::Integer)
517516
v.data[i] = x
518517
v
519518
end
@@ -574,8 +573,10 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
574573
# we can assume v is equal to eachindex(o.data) which allows a copying partition
575574
# without allocations.
576575
lo_i, hi_i = lo, hi
577-
for (i,x) in zip(eachindex(o.data), o.data)
578-
if ismissing(x) == (o.order == Reverse) # should i go at the beginning?
576+
cv = eachindex(o.data) # equal to copy(v)
577+
for i in lo:hi
578+
x = o.data[cv[i]]
579+
if ismissing(x) == (o.order == Reverse) # should x go at the beginning/end?
579580
v[lo_i] = i
580581
lo_i += 1
581582
else
@@ -2119,25 +2120,25 @@ end
21192120
# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way
21202121
sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o)
21212122
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering)
2122-
_sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false))
2123+
_sort!(v, a, o, (; lo, hi, legacy_dispatch_entry=a))
21232124
v
21242125
end
21252126
sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o)
21262127
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector)
2127-
_sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false))
2128+
_sort!(v, a, o, (; lo, hi, scratch, legacy_dispatch_entry=a))
21282129
v
21292130
end
21302131

21312132
# Support dispatch on custom algorithms in the old way
21322133
# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ...
21332134
function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
2134-
@getkw lo hi scratch allow_legacy_dispatch
2135-
if allow_legacy_dispatch
2135+
@getkw lo hi scratch legacy_dispatch_entry
2136+
if legacy_dispatch_entry === a
2137+
# This error prevents infinite recursion for unknown algorithms
2138+
throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined"))
2139+
else
21362140
sort!(v, lo, hi, a, o)
21372141
scratch
2138-
else
2139-
# This error prevents infinite recursion for unknown algorithms
2140-
throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined"))
21412142
end
21422143
end
21432144

test/sorting.jl

+40
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,46 @@ end
955955
test_allocs()
956956
end
957957

958+
@testset "MissingOptimization fastpath for Perm ordering when lo:hi ≠ eachindex(v)" begin
959+
v = [rand() < .5 ? missing : rand() for _ in 1:100]
960+
ix = collect(1:100)
961+
sort!(ix, 1, 10, Base.Sort.DEFAULT_STABLE, Base.Order.Perm(Base.Order.Forward, v))
962+
@test issorted(v[ix[1:10]])
963+
end
964+
965+
struct NonScalarIndexingOfWithoutMissingVectorAlg <: Base.Sort.Algorithm end
966+
function Base.Sort._sort!(v::AbstractVector, ::NonScalarIndexingOfWithoutMissingVectorAlg, o::Base.Order.Ordering, kw)
967+
Base.Sort.@getkw lo hi
968+
first_half = v[lo:lo+(hi-lo)÷2]
969+
second_half = v[lo+(hi-lo)÷2+1:hi]
970+
whole = v[lo:hi]
971+
all(vcat(first_half, second_half) .=== whole) || error()
972+
out = Base.Sort._sort!(whole, Base.Sort.DEFAULT_STABLE, o, (;kw..., lo=1, hi=length(whole)))
973+
v[lo:hi] .= whole
974+
out
975+
end
976+
977+
@testset "Non-scaler indexing of WithoutMissingVector" begin
978+
@testset "Unit test" begin
979+
wmv = Base.Sort.WithoutMissingVector(Union{Missing, Int}[1, 7, 2, 9])
980+
@test wmv[[1, 3]] == [1, 2]
981+
@test wmv[1:3] == [1, 7, 2]
982+
end
983+
@testset "End to end" begin
984+
alg = Base.Sort.InitialOptimizations(NonScalarIndexingOfWithoutMissingVectorAlg())
985+
@test issorted(sort(rand(100); alg))
986+
@test issorted(sort([rand() < .5 ? missing : randstring() for _ in 1:100]; alg))
987+
end
988+
end
989+
990+
struct DispatchLoopTestAlg <: Base.Sort.Algorithm end
991+
function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, ::DispatchLoopTestAlg, order::Base.Order.Ordering)
992+
sort!(view(v, lo:hi); order)
993+
end
994+
@testset "Support dispatch from the old style to the new style and back" begin
995+
@test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward))
996+
end
997+
958998
# This testset is at the end of the file because it is slow.
959999
@testset "searchsorted" begin
9601000
numTypes = [ Int8, Int16, Int32, Int64, Int128,

0 commit comments

Comments
 (0)