From ef2ba05329bba9d6b50a9626b4033b0b61b280ed Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sat, 8 Aug 2020 11:23:55 +0200 Subject: [PATCH 1/7] ordinalrank!(): use eachindex() --- src/ranking.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index 90ed8d39d..cb61fad08 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -20,10 +20,8 @@ function ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) if n > 0 - i = 1 - while i <= n + @inbounds for i in eachindex(p) rks[p[i]] = i - i += 1 end end From 3848eff6a017a7f1b7b9daaccabc291d400b7019 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 9 Aug 2020 15:19:14 +0200 Subject: [PATCH 2/7] ranking: use _rank() helper, @inbounds _rank() helper provides: 1) correct support for n-dim, n>1, input arrays 2) minimizes code duplication 3) passthrough of sortperm() args 4) macro-less support for missing values --- src/ranking.jl | 87 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index cb61fad08..a2aae1312 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -6,48 +6,63 @@ # The implementations here follow this wikipedia page. # - function _check_randparams(rks, x, p) n = length(rks) length(x) == length(p) == n || raise_dimerror() return n end +# ranking helper function: calls sortperm(x) and then ranking method f! +function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...) + rks = similar(x, R) + ord = reshape(sortperm(vec(x); sortkwargs...), size(x)) + return f!(rks, x, ord) +end +# ranking helper function for arrays with missing values +function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) + inds = findall(!ismissing, vec(x)) + isempty(inds) && return missings(R, size(x)) + T = nonmissingtype(eltype(x)) + xv = Vector{T}(undef, length(inds)) + @inbounds for (i, ind) in enumerate(inds) + xv[i] = x[ind] + end + rks = missings(R, size(x)) + ordv = sortperm(xv; sortkwargs...) + f!(view(rks, inds), xv, ordv) + return rks +end # Ordinal ranking ("1234 ranking") -- use the literal order resulted from sort -function ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) - n = _check_randparams(rks, x, p) - - if n > 0 - @inbounds for i in eachindex(p) - rks[p[i]] = i - end +function _ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) + _check_randparams(rks, x, p) + @inbounds for i in eachindex(p) + rks[p[i]] = i end - return rks end """ - ordinalrank(x; lt = isless, rev::Bool = false) + ordinalrank(x; sortkwargs...) Return the [ordinal ranking](https://en.wikipedia.org/wiki/Ranking#Ordinal_ranking_.28.221234.22_ranking.29) ("1234" ranking) of an array. The `lt` keyword allows providing a custom "less than" function; use `rev=true` to reverse the sorting order. All items in `x` are given distinct, successive ranks based on their -position in `sort(x; lt = lt, rev = rev)`. +position in `sort(x; sortkwargs...)`. Missing values are assigned rank `missing`. """ -ordinalrank(x::AbstractArray; lt = isless, rev::Bool = false) = - ordinalrank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +ordinalrank(x::AbstractArray; sortkwargs...) = + _rank(_ordinalrank!, x; sortkwargs...) # Competition ranking ("1224" ranking) -- resolve tied ranks using min -function competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 p1 = p[1] v = x[p1] rks[p1] = k = 1 @@ -71,7 +86,7 @@ end """ - competerank(x; lt = isless, rev::Bool = false) + competerank(x; sortkwargs...) Return the [standard competition ranking](http://en.wikipedia.org/wiki/Ranking#Standard_competition_ranking_.28.221224.22_ranking.29) ("1224" ranking) of an array. The `lt` keyword allows providing a custom "less @@ -80,15 +95,15 @@ Items that compare equal are given the same rank, then a gap is left in the rankings the size of the number of tied items - 1. Missing values are assigned rank `missing`. """ -competerank(x::AbstractArray; lt = isless, rev::Bool = false) = - competerank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +competerank(x::AbstractArray; sortkwargs...) = + _rank(_competerank!, x; sortkwargs...) # Dense ranking ("1223" ranking) -- resolve tied ranks using min -function denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 p1 = p[1] v = x[p1] rks[p1] = k = 1 @@ -112,7 +127,7 @@ end """ - denserank(x) + denserank(x; sortkwargs...) Return the [dense ranking](http://en.wikipedia.org/wiki/Ranking#Dense_ranking_.28.221223.22_ranking.29) ("1223" ranking) of an array. The `lt` keyword allows providing a custom "less @@ -121,15 +136,15 @@ compare equal receive the same ranking, and the next subsequent rank is assigned with no gap. Missing values are assigned rank `missing`. """ -denserank(x::AbstractArray; lt = isless, rev::Bool = false) = - denserank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) +denserank(x::AbstractArray; sortkwargs...) = + _rank(_denserank!, x; sortkwargs...) # Tied ranking ("1 2.5 2.5 4" ranking) -- resolve tied ranks using average -function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) +function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) n = _check_randparams(rks, x, p) - if n > 0 + @inbounds if n > 0 v = x[p[1]] s = 1 # starting index of current range @@ -161,7 +176,7 @@ end # order (aka. rank), resolving ties using the mean rank """ - tiedrank(x) + tiedrank(x; sortkwargs...) Return the [tied ranking](http://en.wikipedia.org/wiki/Ranking#Fractional_ranking_.28.221_2.5_2.5_4.22_ranking.29), also called fractional or "1 2.5 2.5 4" ranking, @@ -171,21 +186,5 @@ Items that compare equal receive the mean of the rankings they would have been assigned under ordinal ranking. Missing values are assigned rank `missing`. """ -tiedrank(x::AbstractArray; lt = isless, rev::Bool = false) = - tiedrank!(Array{Float64}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev)) - -for (f, f!, S) in zip([:ordinalrank, :competerank, :denserank, :tiedrank], - [:ordinalrank!, :competerank!, :denserank!, :tiedrank!], - [Int, Int, Int, Float64]) - @eval begin - function $f(x::AbstractArray{>: Missing}; lt = isless, rev::Bool = false) - inds = findall(!ismissing, x) - isempty(inds) && return missings($S, size(x)) - xv = disallowmissing(view(x, inds)) - sp = sortperm(xv; lt = lt, rev = rev) - rks = missings($S, length(x)) - $(f!)(view(rks, inds), xv, sp) - rks - end - end -end \ No newline at end of file +tiedrank(x::AbstractArray; sortkwargs...) = + _rank(_tiedrank!, x, Float64; sortkwargs...) From d050fdb5bcd24d47332474e6661dfd8c3a694992 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 2 Oct 2020 16:44:32 +0200 Subject: [PATCH 3/7] revert to disallowmissing() --- src/ranking.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index a2aae1312..18cffc203 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -24,12 +24,9 @@ function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) inds = findall(!ismissing, vec(x)) isempty(inds) && return missings(R, size(x)) T = nonmissingtype(eltype(x)) - xv = Vector{T}(undef, length(inds)) - @inbounds for (i, ind) in enumerate(inds) - xv[i] = x[ind] - end - rks = missings(R, size(x)) + xv = disallowmissing(view(vec(x), inds)) ordv = sortperm(xv; sortkwargs...) + rks = missings(R, size(x)) f!(view(rks, inds), xv, ordv) return rks end From 8a296bafd10f2089f26e54b9e8fbb747775746af Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 2 Oct 2020 16:46:03 +0200 Subject: [PATCH 4/7] replace while-loops with for-loops --- src/ranking.jl | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index 18cffc203..ebf2b5c27 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -64,17 +64,14 @@ function _competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) v = x[p1] rks[p1] = k = 1 - i = 2 - while i <= n + for i in 2:n pi = p[i] xi = x[pi] - if xi == v - rks[pi] = k - else - rks[pi] = k = i + if xi != v v = xi + k = i end - i += 1 + rks[pi] = k end end @@ -105,17 +102,14 @@ function _denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) v = x[p1] rks[p1] = k = 1 - i = 2 - while i <= n + for i in 2:n pi = p[i] xi = x[pi] - if xi == v - rks[pi] = k - else - rks[pi] = (k += 1) + if xi != v v = xi + k += 1 end - i += 1 + rks[pi] = k end end @@ -145,8 +139,7 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) v = x[p[1]] s = 1 # starting index of current range - e = 2 # pass-by-end index of current range - while e <= n + for e in 2:n # e is pass-by-end index of current range cx = x[p[e]] if cx != v # fill average rank to s : e-1 @@ -158,10 +151,9 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) s = e v = cx end - e += 1 end - # the last range (e == n+1) + # the last range ar = (s + n) / 2 for i = s : n rks[p[i]] = ar From 831a0e56130cc015ed100f0555793eaf72cdfc32 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 2 Oct 2020 16:59:52 +0200 Subject: [PATCH 5/7] rankings: expand sortkwargs in docstring --- src/ranking.jl | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index ebf2b5c27..4a894d332 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -42,12 +42,11 @@ end """ - ordinalrank(x; sortkwargs...) + ordinalrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [ordinal ranking](https://en.wikipedia.org/wiki/Ranking#Ordinal_ranking_.28.221234.22_ranking.29) -("1234" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. -All items in `x` are given distinct, successive ranks based on their +("1234" ranking) of an array. Supports the same keyword arguments as `sort(x; sortkwargs...)` +function. All items in `x` are given distinct, successive ranks based on their position in `sort(x; sortkwargs...)`. Missing values are assigned rank `missing`. """ @@ -80,11 +79,10 @@ end """ - competerank(x; sortkwargs...) + competerank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [standard competition ranking](http://en.wikipedia.org/wiki/Ranking#Standard_competition_ranking_.28.221224.22_ranking.29) -("1224" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. +("1224" ranking) of an array. Supports the same keyword arguments as `sort(x)` function. Items that compare equal are given the same rank, then a gap is left in the rankings the size of the number of tied items - 1. Missing values are assigned rank `missing`. @@ -118,12 +116,11 @@ end """ - denserank(x; sortkwargs...) + denserank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [dense ranking](http://en.wikipedia.org/wiki/Ranking#Dense_ranking_.28.221223.22_ranking.29) -("1223" ranking) of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. Items that -compare equal receive the same ranking, and the next subsequent rank is +("1223" ranking) of an array. Supports the same keyword arguments as `sort(x)` function. +Items that compare equal receive the same ranking, and the next subsequent rank is assigned with no gap. Missing values are assigned rank `missing`. """ @@ -165,12 +162,11 @@ end # order (aka. rank), resolving ties using the mean rank """ - tiedrank(x; sortkwargs...) + tiedrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [tied ranking](http://en.wikipedia.org/wiki/Ranking#Fractional_ranking_.28.221_2.5_2.5_4.22_ranking.29), also called fractional or "1 2.5 2.5 4" ranking, -of an array. The `lt` keyword allows providing a custom "less -than" function; use `rev=true` to reverse the sorting order. +of an array. Supports the same keyword arguments as `sort(x)` function. Items that compare equal receive the mean of the rankings they would have been assigned under ordinal ranking. Missing values are assigned rank `missing`. From 88aeee5316cefc8fe47474d0eb7bc9c01e546c9d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 2 Oct 2020 19:28:48 +0200 Subject: [PATCH 6/7] rankings: cleanup docstring --- src/ranking.jl | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index 4a894d332..721778548 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -45,9 +45,9 @@ end ordinalrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [ordinal ranking](https://en.wikipedia.org/wiki/Ranking#Ordinal_ranking_.28.221234.22_ranking.29) -("1234" ranking) of an array. Supports the same keyword arguments as `sort(x; sortkwargs...)` -function. All items in `x` are given distinct, successive ranks based on their -position in `sort(x; sortkwargs...)`. +("1234" ranking) of an array. Supports the same keyword arguments as the `sort` function. +All items in `x` are given distinct, successive ranks based on their position +in the sorted vector. Missing values are assigned rank `missing`. """ ordinalrank(x::AbstractArray; sortkwargs...) = @@ -82,9 +82,9 @@ end competerank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [standard competition ranking](http://en.wikipedia.org/wiki/Ranking#Standard_competition_ranking_.28.221224.22_ranking.29) -("1224" ranking) of an array. Supports the same keyword arguments as `sort(x)` function. -Items that compare equal are given the same rank, then a gap is left -in the rankings the size of the number of tied items - 1. +("1224" ranking) of an array. Supports the same keyword arguments as the `sort` function. +Equal (*"tied"*) items are given the same rank, and the next rank comes after a gap +that is equal to the number of tied items - 1. Missing values are assigned rank `missing`. """ competerank(x::AbstractArray; sortkwargs...) = @@ -119,8 +119,8 @@ end denserank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [dense ranking](http://en.wikipedia.org/wiki/Ranking#Dense_ranking_.28.221223.22_ranking.29) -("1223" ranking) of an array. Supports the same keyword arguments as `sort(x)` function. -Items that compare equal receive the same ranking, and the next subsequent rank is +("1223" ranking) of an array. Supports the same keyword arguments as the `sort` function. +Equal items receive the same rank, and the next subsequent rank is assigned with no gap. Missing values are assigned rank `missing`. """ @@ -160,15 +160,14 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray) return rks end -# order (aka. rank), resolving ties using the mean rank """ tiedrank(x; lt=isless, by=identity, rev::Bool=false, ...) Return the [tied ranking](http://en.wikipedia.org/wiki/Ranking#Fractional_ranking_.28.221_2.5_2.5_4.22_ranking.29), also called fractional or "1 2.5 2.5 4" ranking, -of an array. Supports the same keyword arguments as `sort(x)` function. -Items that compare equal receive the mean of the -rankings they would have been assigned under ordinal ranking. +of an array. Supports the same keyword arguments as the `sort` function. +Equal (*"tied"*) items receive the mean of the ranks they would +have been assigned under the ordinal ranking (see [`ordinalrank`](@ref)). Missing values are assigned rank `missing`. """ tiedrank(x::AbstractArray; sortkwargs...) = From c842c070e0c692680c0458eaa09bce5ef1801284 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 6 Oct 2020 18:20:59 +0200 Subject: [PATCH 7/7] remove unused --- src/ranking.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ranking.jl b/src/ranking.jl index 721778548..05a5b4657 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -23,7 +23,6 @@ end function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) inds = findall(!ismissing, vec(x)) isempty(inds) && return missings(R, size(x)) - T = nonmissingtype(eltype(x)) xv = disallowmissing(view(vec(x), inds)) ordv = sortperm(xv; sortkwargs...) rks = missings(R, size(x))