From 8463532dde9d83d600ec05ca8afd6d903023f87d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 28 Jul 2018 18:31:00 -0400 Subject: [PATCH] Implement sortslices, deprecate sortrows/sortcols As discussed on triage, `sortslices` is the higher dimensional extension of `sortrows`/`sortcols`. The dimensions being specified are the dimensions (and for higher dimensions the order of the dimensions) to slice along. See the help text for an example of the higher dimensional behavior. Deprecate sortrows/sortcols in favor of sortslices. --- NEWS.md | 2 + base/deprecated.jl | 3 + base/exports.jl | 3 +- base/multidimensional.jl | 156 +++++++++++++++++++++++++++++++ base/sort.jl | 71 -------------- doc/src/base/sort.md | 3 +- test/arrayops.jl | 24 ++++- test/offsetarray.jl | 4 +- test/testhelpers/OffsetArrays.jl | 2 +- 9 files changed, 185 insertions(+), 83 deletions(-) diff --git a/NEWS.md b/NEWS.md index a31e54753a7d5..6bfafb7a60fe7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1335,6 +1335,8 @@ Deprecated or removed * `realmin`/`realmax` are deprecated in favor of `floatmin`/`floatmax` ([#28302]). + * `sortrows`/`sortcols` have been deprecated in favor of the more general `sortslices`. + Command-line option changes --------------------------- diff --git a/base/deprecated.jl b/base/deprecated.jl index 853eb0bb5b71e..1cb84682da286 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1775,6 +1775,9 @@ end @deprecate realmin floatmin @deprecate realmax floatmax +@deprecate sortrows(A::AbstractMatrix; kws...) sortslices(A, dims=1, kws...) +@deprecate sortcols(A::AbstractMatrix; kws...) sortslices(A, dims=2, kws...) + # END 0.7 deprecations # BEGIN 1.0 deprecations diff --git a/base/exports.jl b/base/exports.jl index 97553a8773334..b684eaf9eca49 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -419,10 +419,9 @@ export selectdim, sort!, sort, - sortcols, sortperm, sortperm!, - sortrows, + sortslices, dropdims, step, stride, diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 5fe078a47ca41..d69be1e315bdd 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -1496,3 +1496,159 @@ end function Base.showarg(io::IO, r::Iterators.Pairs{<:CartesianIndex, <:Any, <:Any, T}, toplevel) where T<:AbstractVector print(io, "pairs(IndexCartesian(), ::$T)") end + +## sortslices + +""" + sortslices(A; dims, alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) + +Sort slices of an array `A`. The required keyword argument `dims` must +be either an integer or a tuple of integers. It specifies the +dimension(s) over which the slices are sorted. + +E.g., if `A` is a matrix, `dims=1` will sort rows, `dims=2` will sort columns. +Note that the default comparison function on one dimensional slices sorts +lexicographically. + +For the remaining keyword arguments, see the documentation of [`sort!`](@ref). + +# Examples +```jldoctest +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1) # Sort rows +3×3 Array{Int64,2}: + -1 6 4 + 7 3 5 + 9 -2 8 + +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1, lt=(x,y)->isless(x[2],y[2])) +3×3 Array{Int64,2}: + 9 -2 8 + 7 3 5 + -1 6 4 + +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1, rev=true) +3×3 Array{Int64,2}: + 9 -2 8 + 7 3 5 + -1 6 4 + +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2) # Sort columns +3×3 Array{Int64,2}: + 3 5 7 + -1 -4 6 + -2 8 9 + +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2, alg=InsertionSort, lt=(x,y)->isless(x[2],y[2])) +3×3 Array{Int64,2}: + 5 3 7 + -4 -1 6 + 8 -2 9 + +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2, rev=true) +3×3 Array{Int64,2}: + 7 5 3 + 6 -4 -1 + 9 8 -2 +``` + +# Higher dimensions + +`sortslices` extends naturally to higher dimensions. E.g., if `A` is a +a 2x2x2 array, `sortslices(A, dims=3)` will sort slices within the 3rd dimension, +passing the 2x2 slices `A[:, :, 1]` and `A[:, :, 2]` to the comparison function. +Note that while there is no default order on higher-dimensional slices, you may +use the `by` or `lt` keyword argument to specify such an order. + +If `dims` is a tuple, the order of the dimensions in `dims` is +relevant and specifies the linear order of the slices. E.g., if `A` is three +dimensional and `dims` is `(1, 2)`, the orderings of the first two dimensions +are re-arranged such such that the slices (of the remaining third dimension) are sorted. +If `dims` is `(2, 1)` instead, the same slices will be taken, +but the result order will be row-major instead. + +# Higher dimensional examples +``` +julia> A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 4 3 + 2 1 + +[:, :, 2] = + 'A' 'B' + 'C' 'D' + +julia> sortslices(A, dims=(1,2)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 1 3 + 2 4 + +[:, :, 2] = + 'D' 'B' + 'C' 'A' + +julia> sortslices(A, dims=(2,1)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 1 2 + 3 4 + +[:, :, 2] = + 'D' 'C' + 'B' 'A' + +julia> sortslices(reshape([5; 4; 3; 2; 1], (1,1,5)), dims=3, by=x->x[1,1]) +1×1×5 Array{Int64,3}: +[:, :, 1] = + 1 + +[:, :, 2] = + 2 + +[:, :, 3] = + 3 + +[:, :, 4] = + 4 + +[:, :, 5] = + 5 +``` +""" +function sortslices(A::AbstractArray; dims::Union{Integer, Tuple{Vararg{Integer}}}, kws...) + _sortslices(A, Val{dims}(); kws...) +end + +# Works around inference's lack of ability to recognize partial constness +struct DimSelector{dims, T} + A::T +end +DimSelector{dims}(x::T) where {dims, T} = DimSelector{dims, T}(x) +(ds::DimSelector{dims, T})(i) where {dims, T} = i in dims ? axes(ds.A, i) : (:,) + +_negdims(n, dims) = filter(i->!(i in dims), 1:n) + +function compute_itspace(A, ::Val{dims}) where {dims} + negdims = _negdims(ndims(A), dims) + axs = Iterators.product(ntuple(DimSelector{dims}(A), ndims(A))...) + vec(permutedims(collect(axs), (dims..., negdims...))) +end + +function _sortslices(A::AbstractArray, d::Val{dims}; kws...) where dims + itspace = compute_itspace(A, d) + vecs = map(its->view(A, its...), itspace) + p = sortperm(vecs; kws...) + if ndims(A) == 2 && isa(dims, Integer) && isa(A, Array) + # At the moment, the performance of the generic version is subpar + # (about 5x slower). Hardcode a fast-path until we're able to + # optimize this. + return dims == 1 ? A[p, :] : A[:, p] + else + B = similar(A) + for (x, its) in zip(p, itspace) + B[its...] = vecs[x] + end + B + end +end diff --git a/base/sort.jl b/base/sort.jl index c655f5d355e2b..eb5c47573ce99 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -37,8 +37,6 @@ export # also exported by Base partialsort!, partialsortperm, partialsortperm!, - sortrows, - sortcols, # algorithms: InsertionSort, QuickSort, @@ -933,75 +931,6 @@ end Av end - -""" - sortrows(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) - -Sort the rows of matrix `A` lexicographically. -See [`sort!`](@ref) for a description of possible -keyword arguments. - -# Examples -```jldoctest -julia> sortrows([7 3 5; -1 6 4; 9 -2 8]) -3×3 Array{Int64,2}: - -1 6 4 - 7 3 5 - 9 -2 8 - -julia> sortrows([7 3 5; -1 6 4; 9 -2 8], lt=(x,y)->isless(x[2],y[2])) -3×3 Array{Int64,2}: - 9 -2 8 - 7 3 5 - -1 6 4 - -julia> sortrows([7 3 5; -1 6 4; 9 -2 8], rev=true) -3×3 Array{Int64,2}: - 9 -2 8 - 7 3 5 - -1 6 4 -``` -""" -function sortrows(A::AbstractMatrix; kws...) - rows = [view(A, i, :) for i in axes(A,1)] - p = sortperm(rows; kws...) - A[p,:] -end - -""" - sortcols(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) - -Sort the columns of matrix `A` lexicographically. -See [`sort!`](@ref) for a description of possible -keyword arguments. - -# Examples -```jldoctest -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8]) -3×3 Array{Int64,2}: - 3 5 7 - -1 -4 6 - -2 8 9 - -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8], alg=InsertionSort, lt=(x,y)->isless(x[2],y[2])) -3×3 Array{Int64,2}: - 5 3 7 - -4 -1 6 - 8 -2 9 - -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8], rev=true) -3×3 Array{Int64,2}: - 7 5 3 - 6 -4 -1 - 9 8 -2 -``` -""" -function sortcols(A::AbstractMatrix; kws...) - cols = [view(A, :, i) for i in axes(A,2)] - p = sortperm(cols; kws...) - A[:,p] -end - ## fast clever sorting for floats ## module Float diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md index ffb4725b6bcf6..a3c3b7c3df338 100644 --- a/doc/src/base/sort.md +++ b/doc/src/base/sort.md @@ -111,8 +111,7 @@ Base.sort! Base.sort Base.sortperm Base.Sort.sortperm! -Base.Sort.sortrows -Base.Sort.sortcols +Base.Sort.sortslices ``` ## Order-Related Functions diff --git a/test/arrayops.jl b/test/arrayops.jl index 9306a6e592bf3..c01f10886304a 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -659,7 +659,7 @@ let A, B, C, D # 10 repeats of each row B = A[shuffle!(repeat(1:10, 10)), :] C = unique(B, dims=1) - @test sortrows(C) == sortrows(A) + @test sortslices(C, dims=1) == sortslices(A, dims=1) @test unique(B, dims=2) == B @test unique(B', dims=2)' == C @@ -1173,11 +1173,11 @@ end @testset "sort on arrays" begin local a = rand(3,3) - asr = sortrows(a) + asr = sortslices(a, dims=1) @test isless(asr[1,:],asr[2,:]) @test isless(asr[2,:],asr[3,:]) - asc = sortcols(a) + asc = sortslices(a, dims=2) @test isless(asc[:,1],asc[:,2]) @test isless(asc[:,2],asc[:,3]) @@ -1187,11 +1187,11 @@ end @test m == zeros(3, 4) @test o == fill(1, 3, 4) - asr = sortrows(a, rev=true) + asr = sortslices(a, dims=1, rev=true) @test isless(asr[2,:],asr[1,:]) @test isless(asr[3,:],asr[2,:]) - asc = sortcols(a, rev=true) + asc = sortslices(a, dims=2, rev=true) @test isless(asc[:,2],asc[:,1]) @test isless(asc[:,3],asc[:,2]) @@ -1223,6 +1223,20 @@ end @test all(bs[:,:,1] .<= bs[:,:,2]) end +@testset "higher dimensional sortslices" begin + A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2)) + @test sortslices(A, dims=(1, 2)) == + permutedims(reshape([1 3; 2 4; 'D' 'B'; 'C' 'A'], (2, 2, 2)), (1, 3, 2)) + @test sortslices(A, dims=(2, 1)) == + permutedims(reshape([1 2; 3 4; 'D' 'C'; 'B' 'A'], (2, 2, 2)), (1, 3, 2)) + B = reshape(1:8, (2,2,2)) + @test sortslices(B, dims=(3,1))[:, :, 1] == [ + 1 3; + 5 7 + ] + @test sortslices(B, dims=(1,3)) == B +end + @testset "fill" begin @test fill!(Float64[1.0], -0.0)[1] === -0.0 A = fill(1.,3,3) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 2881261d73c86..b6078f41bab8d 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -434,8 +434,8 @@ amin, amax = extrema(parent(A)) @test unique(A, dims=2) == OffsetArray(parent(A), first(axes(A, 1)) - 1, 0) v = OffsetArray(rand(8), (-2,)) @test sort(v) == OffsetArray(sort(parent(v)), v.offsets) -@test sortrows(A) == OffsetArray(sortrows(parent(A)), A.offsets) -@test sortcols(A) == OffsetArray(sortcols(parent(A)), A.offsets) +@test sortslices(A, dims=1) == OffsetArray(sortslices(parent(A), dims=1), A.offsets) +@test sortslices(A, dims=2) == OffsetArray(sortslices(parent(A), dims=2), A.offsets) @test sort(A, dims=1) == OffsetArray(sort(parent(A), dims=1), A.offsets) @test sort(A, dims=2) == OffsetArray(sort(parent(A), dims=2), A.offsets) diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl index 15ce7ac7337e4..eff9e8b4a72be 100644 --- a/test/testhelpers/OffsetArrays.jl +++ b/test/testhelpers/OffsetArrays.jl @@ -45,7 +45,7 @@ Base.eachindex(::IndexLinear, A::OffsetVector) = axes(A, 1) _indices(::Tuple{}, ::Tuple{}) = () Base.axes1(A::OffsetArray{T,0}) where {T} = Base.Slice(1:1) # we only need to specialize this one -const OffsetAxis = Union{Integer, UnitRange, Base.Slice{<:UnitRange}} +const OffsetAxis = Union{Integer, UnitRange, Base.Slice{<:UnitRange}, Base.OneTo} function Base.similar(A::OffsetArray, T::Type, dims::Dims) B = similar(parent(A), T, dims) end