diff --git a/src/ranking.jl b/src/ranking.jl index 90ed8d39d..05a5b4657 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -6,65 +6,70 @@ # The implementations here follow this wikipedia page. # - function _check_randparams(rks, x, p) n = length(rks) length(x) == length(p) == n || raise_dimerror() return n end +# ranking helper function: calls sortperm(x) and then ranking method f! +function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...) + rks = similar(x, R) + ord = reshape(sortperm(vec(x); sortkwargs...), size(x)) + return f!(rks, x, ord) +end +# ranking helper function for arrays with missing values +function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) + inds = findall(!ismissing, vec(x)) + isempty(inds) && return missings(R, size(x)) + xv = disallowmissing(view(vec(x), inds)) + ordv = sortperm(xv; sortkwargs...) + rks = missings(R, size(x)) + f!(view(rks, inds), xv, ordv) + return rks +end # Ordinal ranking ("1234 ranking") -- use the literal order resulted from sort -function ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) - n = _check_randparams(rks, x, p) - - if n > 0 - i = 1 - while i <= n - rks[p[i]] = i - i += 1 - end +function _ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) + _check_randparams(rks, x, p) + @inbounds for i in eachindex(p) + rks[p[i]] = i end - return rks end """ - ordinalrank(x; lt = isless, rev::Bool = false) + ordinalrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [ordinal ranking](https://en.wikipedia.org/wiki/Ranking#Ordinal_ranking_.28.221234.22_ranking.29) -("1234" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. -All items in `x` are given distinct, successive ranks based on their -position in `sort(x; lt = lt, rev = rev)`. +("1234" ranking) of an array. Supports the same keyword arguments as the `sort` function. +All items in `x` are given distinct, successive ranks based on their position +in the sorted vector. Missing values are assigned rank `missing`. """ -ordinalrank(x::AbstractArray; lt = isless, rev::Bool = false) = - ordinalrank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +ordinalrank(x::AbstractArray; sortkwargs...) = + _rank(_ordinalrank!, x; sortkwargs...) # Competition ranking ("1224" ranking) -- resolve tied ranks using min -function competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 p1 = p[1] v = x[p1] rks[p1] = k = 1 - i = 2 - while i <= n + for i in 2:n pi = p[i] xi = x[pi] - if xi == v - rks[pi] = k - else - rks[pi] = k = i + if xi != v v = xi + k = i end - i += 1 + rks[pi] = k end end @@ -73,39 +78,35 @@ end """ - competerank(x; lt = isless, rev::Bool = false) + competerank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [standard competition ranking](http://en.wikipedia.org/wiki/Ranking#Standard_competition_ranking_.28.221224.22_ranking.29) -("1224" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. -Items that compare equal are given the same rank, then a gap is left -in the rankings the size of the number of tied items - 1. +("1224" ranking) of an array. Supports the same keyword arguments as the `sort` function. +Equal (*"tied"*) items are given the same rank, and the next rank comes after a gap +that is equal to the number of tied items - 1. Missing values are assigned rank `missing`. """ -competerank(x::AbstractArray; lt = isless, rev::Bool = false) = - competerank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +competerank(x::AbstractArray; sortkwargs...) = + _rank(_competerank!, x; sortkwargs...) # Dense ranking ("1223" ranking) -- resolve tied ranks using min -function denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 p1 = p[1] v = x[p1] rks[p1] = k = 1 - i = 2 - while i <= n + for i in 2:n pi = p[i] xi = x[pi] - if xi == v - rks[pi] = k - else - rks[pi] = (k += 1) + if xi != v v = xi + k += 1 end - i += 1 + rks[pi] = k end end @@ -114,29 +115,27 @@ end """ - denserank(x) + denserank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [dense ranking](http://en.wikipedia.org/wiki/Ranking#Dense_ranking_.28.221223.22_ranking.29) -("1223" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. Items that -compare equal receive the same ranking, and the next subsequent rank is +("1223" ranking) of an array. Supports the same keyword arguments as the `sort` function. +Equal items receive the same rank, and the next subsequent rank is assigned with no gap. Missing values are assigned rank `missing`. """ -denserank(x::AbstractArray; lt = isless, rev::Bool = false) = - denserank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +denserank(x::AbstractArray; sortkwargs...) = + _rank(_denserank!, x; sortkwargs...) # Tied ranking ("1 2.5 2.5 4" ranking) -- resolve tied ranks using average -function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 v = x[p[1]] s = 1 # starting index of current range - e = 2 # pass-by-end index of current range - while e <= n + for e in 2:n # e is pass-by-end index of current range cx = x[p[e]] if cx != v # fill average rank to s : e-1 @@ -148,10 +147,9 @@ function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) s = e v = cx end - e += 1 end - # the last range (e == n+1) + # the last range ar = (s + n) / 2 for i = s : n rks[p[i]] = ar @@ -161,33 +159,15 @@ function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) return rks end -# order (aka. rank), resolving ties using the mean rank """ - tiedrank(x) + tiedrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [tied ranking](http://en.wikipedia.org/wiki/Ranking#Fractional_ranking_.28.221_2.5_2.5_4.22_ranking.29), also called fractional or "1 2.5 2.5 4" ranking, -of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. -Items that compare equal receive the mean of the -rankings they would have been assigned under ordinal ranking. +of an array. Supports the same keyword arguments as the `sort` function. +Equal (*"tied"*) items receive the mean of the ranks they would +have been assigned under the ordinal ranking (see [`ordinalrank`](@ref)). Missing values are assigned rank `missing`. """ -tiedrank(x::AbstractArray; lt = isless, rev::Bool = false) = - tiedrank!(Array{Float64}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) - -for (f, f!, S) in zip([:ordinalrank, :competerank, :denserank, :tiedrank], - [:ordinalrank!, :competerank!, :denserank!, :tiedrank!], - [Int, Int, Int, Float64]) - @eval begin - function $f(x::AbstractArray{>: Missing}; lt = isless, rev::Bool = false) - inds = findall(!ismissing, x) - isempty(inds) && return missings($S, size(x)) - xv = disallowmissing(view(x, inds)) - sp = sortperm(xv; lt = lt, rev = rev) - rks = missings($S, length(x)) - $(f!)(view(rks, inds), xv, sp) - rks - end - end -end \ No newline at end of file +tiedrank(x::AbstractArray; sortkwargs...) = + _rank(_tiedrank!, x, Float64; sortkwargs...)