From f0cccd6ee80f84d1281f562cae1ece19ad37598d Mon Sep 17 00:00:00 2001
From: Lilith Orion Hafner <60898866+LilithHafner@users.noreply.github.com>
Date: Wed, 8 Jun 2022 03:39:44 -0400
Subject: [PATCH] make counting more robust to input datatype (#722)

---
 Project.toml    |   3 +-
 src/counts.jl   | 178 +++++++++++++++----------
 src/weights.jl  |   2 +
 test/counts.jl  | 347 +++++++++++++++++++++++++++---------------------
 test/weights.jl |   2 +
 5 files changed, 304 insertions(+), 228 deletions(-)

diff --git a/Project.toml b/Project.toml
index c4781f7d9..6a35bceb7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -28,8 +28,9 @@ julia = "1"
 [extras]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Dates", "DelimitedFiles", "StableRNGs", "Test"]
+test = ["Dates", "DelimitedFiles", "OffsetArrays", "StableRNGs", "Test"]
diff --git a/src/counts.jl b/src/counts.jl
index 580870598..4333edc25 100644
--- a/src/counts.jl
+++ b/src/counts.jl
@@ -16,24 +16,24 @@ end
 
 #### functions for counting a single list of integers (1D)
 """
-    addcounts!(r, x, levels::UnitRange{<:Int}, [wv::AbstractWeights])
+    addcounts!(r, x, levels::UnitRange{<:Integer}, [wv::AbstractWeights])
 
 Add the number of occurrences in `x` of each value in `levels` to an existing
-array `r`. If a weighting vector `wv` is specified, the sum of weights is used
-rather than the raw counts.
+array `r`. For each `xi ∈ x`, if `xi == levels[j]`, then we increment `r[j]`.
+
+If a weighting vector `wv` is specified, the sum of weights is used rather than the
+raw counts.
 """
 function addcounts!(r::AbstractArray, x::IntegerArray, levels::IntUnitRange)
-    # add counts of integers from x to r
+    # add counts of integers from x that fall within levels to r
 
-    k = length(levels)
-    length(r) == k || throw(DimensionMismatch())
+    checkbounds(r, axes(levels)...)
 
-    m0 = levels[1]
-    m1 = levels[end]
-    b = m0 - 1
+    m0 = first(levels)
+    m1 = last(levels)
+    b = m0 - firstindex(levels) # firstindex(levels) == 1 because levels::IntUnitRange
 
-    @inbounds for i in 1 : length(x)
-        xi = x[i]
+    @inbounds for xi in x
         if m0 <= xi <= m1
             r[xi - b] += 1
         end
@@ -42,15 +42,21 @@ function addcounts!(r::AbstractArray, x::IntegerArray, levels::IntUnitRange)
 end
 
 function addcounts!(r::AbstractArray, x::IntegerArray, levels::IntUnitRange, wv::AbstractWeights)
-    k = length(levels)
-    length(r) == k || throw(DimensionMismatch())
+    # add wv weighted counts of integers from x that fall within levels to r
+
+    length(x) == length(wv) ||
+        throw(DimensionMismatch("x and wv must have the same length, got $(length(x)) and $(length(wv))"))
+
+    xv = vec(x) # discard shape because weights() discards shape
+
+    checkbounds(r, axes(levels)...)
 
-    m0 = levels[1]
-    m1 = levels[end]
+    m0 = first(levels)
+    m1 = last(levels)
     b = m0 - 1
 
-    @inbounds for i in 1 : length(x)
-        xi = x[i]
+    @inbounds for i in eachindex(xv, wv)
+        xi = xv[i]
         if m0 <= xi <= m1
             r[xi - b] += wv[i]
         end
@@ -69,8 +75,8 @@ falling in that range will be considered (the others will be ignored without
 raising an error or a warning). If an integer `k` is provided, only values in the
 range `1:k` will be considered.
 
-If a weighting vector `wv` is specified, the sum of the weights is used rather than the
-raw counts.
+If a vector of weights `wv` is provided, the proportion of weights is computed rather
+than the proportion of raw counts.
 
 The output is a vector of length `length(levels)`.
 """
@@ -90,8 +96,10 @@ counts(x::IntegerArray, wv::AbstractWeights) = counts(x, span(x), wv)
     proportions(x, levels=span(x), [wv::AbstractWeights])
 
 Return the proportion of values in the range `levels` that occur in `x`.
-Equivalent to `counts(x, levels) / length(x)`. If a weighting vector `wv`
-is specified, the sum of the weights is used rather than the raw counts.
+Equivalent to `counts(x, levels) / length(x)`.
+
+If a vector of weights `wv` is provided, the proportion of weights is computed rather
+than the proportion of raw counts.
 """
 proportions(x::IntegerArray, levels::IntUnitRange) = counts(x, levels) .* inv(length(x))
 proportions(x::IntegerArray, levels::IntUnitRange, wv::AbstractWeights) =
@@ -101,6 +109,9 @@ proportions(x::IntegerArray, levels::IntUnitRange, wv::AbstractWeights) =
     proportions(x, k::Integer, [wv::AbstractWeights])
 
 Return the proportion of integers in 1 to `k` that occur in `x`.
+
+If a vector of weights `wv` is provided, the proportion of weights is computed rather
+than the proportion of raw counts.
 """
 proportions(x::IntegerArray, k::Integer) = proportions(x, 1:k)
 proportions(x::IntegerArray, k::Integer, wv::AbstractWeights) = proportions(x, 1:k, wv)
@@ -110,26 +121,22 @@ proportions(x::IntegerArray, wv::AbstractWeights) = proportions(x, span(x), wv)
 #### functions for counting a single list of integers (2D)
 
 function addcounts!(r::AbstractArray, x::IntegerArray, y::IntegerArray, levels::NTuple{2,IntUnitRange})
-    # add counts of integers from x to r
-
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch())
+    # add counts of pairs from zip(x,y) to r
 
     xlevels, ylevels = levels
 
-    kx = length(xlevels)
-    ky = length(ylevels)
-    size(r) == (kx, ky) || throw(DimensionMismatch())
 
-    mx0 = xlevels[1]
-    mx1 = xlevels[end]
-    my0 = ylevels[1]
-    my1 = ylevels[end]
+    checkbounds(r, axes(xlevels, 1), axes(ylevels, 1))
+
+    mx0 = first(xlevels)
+    mx1 = last(xlevels)
+    my0 = first(ylevels)
+    my1 = last(ylevels)
 
     bx = mx0 - 1
     by = my0 - 1
 
-    for i = 1:n
+    for i in eachindex(vec(x), vec(y))
         xi = x[i]
         yi = y[i]
         if (mx0 <= xi <= mx1) && (my0 <= yi <= my1)
@@ -141,28 +148,31 @@ end
 
 function addcounts!(r::AbstractArray, x::IntegerArray, y::IntegerArray,
                     levels::NTuple{2,IntUnitRange}, wv::AbstractWeights)
-    # add counts of integers from x to r
+    # add counts of pairs from zip(x,y) to r
+
+    length(x) == length(y) == length(wv) ||
+        throw(DimensionMismatch("x, y, and wv must have the same length, but got $(length(x)), $(length(y)), and $(length(wv))"))
 
-    n = length(x)
-    length(y) == length(wv) == n || throw(DimensionMismatch())
+    axes(x) == axes(y) ||
+        throw(DimensionMismatch("x and y must have the same axes, but got $(axes(x)) and $(axes(y))"))
+
+    xv, yv = vec(x), vec(y) # discard shape because weights() discards shape
 
     xlevels, ylevels = levels
 
-    kx = length(xlevels)
-    ky = length(ylevels)
-    size(r) == (kx, ky) || throw(DimensionMismatch())
+    checkbounds(r, axes(xlevels, 1), axes(ylevels, 1))
 
-    mx0 = xlevels[1]
-    mx1 = xlevels[end]
-    my0 = ylevels[1]
-    my1 = ylevels[end]
+    mx0 = first(xlevels)
+    mx1 = last(xlevels)
+    my0 = first(ylevels)
+    my1 = last(ylevels)
 
     bx = mx0 - 1
     by = my0 - 1
 
-    for i = 1:n
-        xi = x[i]
-        yi = y[i]
+    for i in eachindex(xv, yv, wv)
+        xi = xv[i]
+        yi = yv[i]
         if (mx0 <= xi <= mx1) && (my0 <= yi <= my1)
             r[xi - bx, yi - by] += wv[i]
         end
@@ -235,13 +245,15 @@ end
 
 
 """
-    addcounts!(dict, x[, wv]; alg = :auto)
+    addcounts!(dict, x; alg = :auto)
+    addcounts!(dict, x, wv)
 
 Add counts based on `x` to a count map. New entries will be added if new values come up.
+
 If a weighting vector `wv` is specified, the sum of the weights is used rather than the
 raw counts.
 
-`alg` can be one of:
+`alg` is only allowed for unweighted counting and can be one of:
 - `:auto` (default): if `StatsBase.radixsort_safe(eltype(x)) == true` then use
                      `:radixsort`, otherwise use `:dict`.
 
@@ -284,9 +296,9 @@ function addcounts_dict!(cm::Dict{T}, x) where T
 end
 
 # If the bits type is of small size i.e. it can have up to 65536 distinct values
-# then it is always better to apply a counting-sort like reduce algorithm for 
+# then it is always better to apply a counting-sort like reduce algorithm for
 # faster results and less memory usage. However we still wish to enable others
-# to write generic algorithms, therefore the methods below still accept the 
+# to write generic algorithms, therefore the methods below still accept the
 # `alg` argument but it is ignored.
 function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x::AbstractArray{Bool}; alg = :ignored)
     sumx = sum(x)
@@ -335,32 +347,42 @@ const BaseRadixSortSafeTypes = Union{Int8, Int16, Int32, Int64, Int128,
 "Can the type be safely sorted by radixsort"
 radixsort_safe(::Type{T}) where T = T<:BaseRadixSortSafeTypes
 
-function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractArray{T}) where T
+function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where T
     isempty(sx) && return cm
-    last_sx = sx[1]
-    tmpcount = get(cm, last_sx, 0) + 1
+    last_sx = first(sx)
+    start_i = firstindex(sx)
 
     # now the data is sorted: can just run through and accumulate values before
     # adding into the Dict
-    @inbounds for i in 2:length(sx)
+    @inbounds for i in start_i+1:lastindex(sx)
         sxi = sx[i]
-        if last_sx == sxi
-            tmpcount += 1
-        else
-            cm[last_sx] = tmpcount
+        if last_sx != sxi
+            cm[last_sx] = get(cm, last_sx, 0) + i - start_i
             last_sx = sxi
-            tmpcount = get(cm, last_sx, 0) + 1
+            start_i = i
         end
     end
 
-    cm[sx[end]] = tmpcount
+    last_sx = last(sx)
+    cm[last_sx] = get(cm, last_sx, 0) + lastindex(sx) + 1 - start_i
 
     return cm
 end
 
+function _alg(x::AbstractArray)
+    @static if VERSION >= v"1.9.0-DEV"
+        return Base.DEFAULT_UNSTABLE
+    else
+        firstindex(x) == 1 ||
+            throw(ArgumentError("alg = :radixsort requires either one based indexing or Julia >= 1.9. " *
+                                "Use `alg = :dict` as an alternative."))
+        return SortingAlgorithms.RadixSort
+    end
+end
+
 function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T
     # sort the x using radixsort
-    sx = sort(x, alg = RadixSort)
+    sx = sort(vec(x), alg=_alg(x))
 
     # Delegate the loop to a separate function since sort might not
     # be inferred in Julia 0.6 after SortingAlgorithms is loaded.
@@ -369,18 +391,24 @@ function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T
 end
 
 # fall-back for `x` an iterator
-function addcounts_radixsort!(cm::Dict{T}, x) where T 
-    sx = sort!(collect(x), alg = RadixSort)
+function addcounts_radixsort!(cm::Dict{T}, x) where T
+    cx = vec(collect(x))
+    sx = sort!(cx, alg = _alg(cx))
     return _addcounts_radix_sort_loop!(cm, sx)
 end
 
 function addcounts!(cm::Dict{T}, x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real}
-    n = length(x)
-    length(wv) == n || throw(DimensionMismatch())
+    # add wv weighted counts of integers from x to cm
+
+    length(x) == length(wv) ||
+        throw(DimensionMismatch("x and wv must have the same length, got $(length(x)) and $(length(wv))"))
+
+    xv = vec(x) # discard shape because weights() discards shape
+
     z = zero(W)
 
-    for i = 1 : n
-        @inbounds xi = x[i]
+    for i in eachindex(xv, wv)
+        @inbounds xi = xv[i]
         @inbounds wi = wv[i]
         cm[xi] = get(cm, xi, z) + wi
     end
@@ -390,11 +418,14 @@ end
 
 """
     countmap(x; alg = :auto)
-    countmap(x::AbstractVector, w::AbstractVector{<:Real}; alg = :auto)
+    countmap(x::AbstractVector, wv::AbstractVector{<:Real})
 
-Return a dictionary mapping each unique value in `x` to its number
-of occurrences. A vector of weights `w` can be provided when `x` is a vector.
+Return a dictionary mapping each unique value in `x` to its number of occurrences.
 
+If a weighting vector `wv` is specified, the sum of weights is used rather than the
+raw counts.
+
+`alg` is only allowed for unweighted counting and can be one of:
 - `:auto` (default): if `StatsBase.radixsort_safe(eltype(x)) == true` then use
                      `:radixsort`, otherwise use `:dict`.
 
@@ -414,9 +445,12 @@ countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} = addcoun
 
 """
     proportionmap(x)
+    proportionmap(x::AbstractVector, w::AbstractVector{<:Real})
+
+Return a dictionary mapping each unique value in `x` to its proportion in `x`.
 
-Return a dictionary mapping each unique value in `x` to its
-proportion in `x`.
+If a vector of weights `wv` is provided, the proportion of weights is computed rather
+than the proportion of raw counts.
 """
 proportionmap(x::AbstractArray) = _normalize_countmap(countmap(x), length(x))
 proportionmap(x::AbstractArray, wv::AbstractWeights) = _normalize_countmap(countmap(x, wv), sum(wv))
diff --git a/src/weights.jl b/src/weights.jl
index 50043226b..9cd1a98db 100644
--- a/src/weights.jl
+++ b/src/weights.jl
@@ -21,6 +21,7 @@ length(wv::AbstractWeights) = length(wv.values)
 sum(wv::AbstractWeights) = wv.sum
 isempty(wv::AbstractWeights) = isempty(wv.values)
 size(wv::AbstractWeights) = size(wv.values)
+Base.axes(wv::AbstractWeights) = Base.axes(wv.values)
 
 Base.dataids(wv::AbstractWeights) = Base.dataids(wv.values)
 
@@ -301,6 +302,7 @@ sum(wv::UnitWeights{T}) where T = convert(T, length(wv))
 isempty(wv::UnitWeights) = iszero(wv.len)
 length(wv::UnitWeights) = wv.len
 size(wv::UnitWeights) = tuple(length(wv))
+Base.axes(wv::UnitWeights) = tuple(Base.OneTo(length(wv)))
 
 Base.convert(::Type{Vector}, wv::UnitWeights{T}) where {T} = ones(T, length(wv))
 
diff --git a/test/counts.jl b/test/counts.jl
index d7b6fea0b..f5d6ae69f 100644
--- a/test/counts.jl
+++ b/test/counts.jl
@@ -1,166 +1,203 @@
 using StatsBase
 using Test
+using OffsetArrays
 
 n = 5000
 
-# 1D integer counts
-
-x = rand(1:5, n)
-w = weights(rand(n))
-
-c = counts(x, 5)
-@test size(c) == (5,)
-c0 = Int[count(v->v == i, x) for i in 1:5]
-@test c == c0
-@test counts(x .+ 1, 2:6) == c0
-@test proportions(x, 1:5) ≈ (c0 ./ n)
-
-c = counts(x)
-@test size(c) == (5,)
-c0 = Int[count(v->v == i, x) for i in 1:5]
-@test c == c0
-@test counts(x .+ 1, 2:6) == c0
-@test proportions(x) ≈ (c0 ./ n)
-
-c = counts(x, 5, w)
-@test size(c) == (5,)
-c0 = Float64[sum(w.values[x .== i]) for i in 1:5]
-@test c                      ≈ c0
-@test counts(x .+ 1, 2:6, w) ≈ c0
-@test proportions(x, 1:5, w) ≈ (c0 ./ sum(w))
-
-c = counts(x, w)
-@test size(c) == (5,)
-c0 = Float64[sum(w.values[x .== i]) for i in 1:5]
-@test c                      ≈ c0
-@test counts(x .+ 1, 2:6, w) ≈ c0
-@test proportions(x, w)      ≈ (c0 ./ sum(w))
-
-# 2D integer counts
-
-x = rand(1:4, n)
-y = rand(1:5, n)
-w = weights(rand(n))
-
-c = counts(x, y, (4, 5))
-@test size(c) == (4, 5)
-c0 = Int[count(t->t != 0,  (x .== i) .& (y .== j)) for i in 1:4, j in 1:5]
-@test c == c0
-@test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0
-@test proportions(x, y, (1:4, 1:5)) ≈ (c0 ./ n)
-
-c = counts(x, y)
-@test size(c) == (4, 5)
-c0 = Int[count(t->t != 0, (x .== i) .& (y .== j)) for i in 1:4, j in 1:5]
-@test c == c0
-@test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0
-@test proportions(x, y,) ≈ (c0 ./ n)
-
-c = counts(x, y, (4, 5), w)
-@test size(c) == (4, 5)
-c0 = Float64[sum(w.values[(x .== i) .& (y .== j)]) for i in 1:4, j in 1:5]
-@test c                                     ≈ c0
-@test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0
-@test proportions(x, y, (1:4, 1:5), w)      ≈ (c0 ./ sum(w))
-
-c = counts(x, y, w)
-@test size(c) == (4, 5)
-c0 = Float64[sum(w.values[(x .== i) .& (y .== j)]) for i in 1:4, j in 1:5]
-@test c                                     ≈ c0
-@test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0
-@test proportions(x, y, w)                  ≈ (c0 ./ sum(w))
-
-
-# count map
-
-x = ["a", "b", "a", "a", "b", "c"]
-w = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]
-
-cm = countmap(x)
-@test cm["a"] == 3
-@test cm["b"] == 2
-@test cm["c"] == 1
-
-# iterator, non-radixsort
-cm_missing = countmap(skipmissing(x))
-cm_any_itr = countmap((i for i in x))
-@test cm_missing == cm_any_itr == cm
-@test cm_missing isa Dict{String, Int}
-@test cm_any_itr isa Dict{Any, Int}
-
-pm = proportionmap(x)
-@test pm["a"] ≈ (1/2)
-@test pm["b"] ≈ (1/3)
-@test pm["c"] ≈ (1/6)
-
-
-# testing the radixsort branch of countmap
-xx = repeat([6, 1, 3, 1], outer=100_000)
-cm = countmap(xx)
-@test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
-
-# with iterator
-cm_missing = countmap(skipmissing(xx))
-@test cm_missing isa Dict{Int, Int}
-@test cm_missing == cm
-
-cm_any_itr = countmap((i for i in xx)) 
-@test cm_any_itr isa Dict{Any,Int} # no knowledge about type
-@test cm_missing == cm
-
-# with empty array
-@test countmap(Int[]) == Dict{Int, Int}()
-
-# testing the radixsort-based addcounts
-xx = repeat([6, 1, 3, 1], outer=100_000)
-cm = Dict{Int, Int}()
-StatsBase.addcounts_radixsort!(cm,xx)
-@test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
-xx2 = repeat([7, 1, 3, 1], outer=100_000)
-StatsBase.addcounts_radixsort!(cm,xx2)
-@test cm == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000)
-# with iterator
-cm_missing = Dict{Int, Int}()
-StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx))
-@test cm_missing == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
-StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx2))
-@test cm_missing == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000)
-
-# testing the Dict-based addcounts
-cm = Dict{Int, Int}()
-cm_itr = Dict{Int, Int}()
-StatsBase.addcounts_dict!(cm,xx)
-StatsBase.addcounts_dict!(cm_itr,skipmissing(xx))
-@test cm_itr == cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
-@test cm_itr isa Dict{Int, Int}
-
-cm = countmap(x, weights(w))
-@test cm["a"] == 5.5
-@test cm["b"] == 4.5
-@test cm["c"] == 3.5
-
-@test cm == countmap(x, w)
-
-pm = proportionmap(x, weights(w))
-@test pm["a"] ≈ (5.5 / 13.5)
-@test pm["b"] ≈ (4.5 / 13.5)
-@test pm["c"] ≈ (3.5 / 13.5)
-
-# testing small bits type
-bx = [true, false, true, true, false]
-cm_bx_missing = countmap(skipmissing(bx))
-@test cm_bx_missing == countmap(bx) == Dict(true => 3, false => 2)
-@test cm_bx_missing isa Dict{Bool, Int}
-
-for T in [UInt8, UInt16, Int8, Int16]
-    tx = T[typemin(T), 8, typemax(T), 19, 8]
-    tx_missing = skipmissing(T[typemin(T), 8, typemax(T), 19, 8])
-    cm_tx_missing = countmap(tx_missing)
-    @test cm_tx_missing == countmap(tx) == Dict(typemin(T) => 1, typemax(T) => 1, 8 => 2, 19 => 1)
-    @test cm_tx_missing isa Dict{T, Int}
+@testset "1D integer counts" begin
+    x = rand(1:5, n)
+    w = weights(rand(n))
+    x0 = deepcopy(x)
+    w0 = deepcopy(w)
+
+    c0 = Int[count(v->v == i, x) for i in 1:5]
+    @test counts(x, 5)                      == c0
+    @test counts(x .+ 1, 2:6)               == c0
+    @test proportions(x, 1:5)               ≈ (c0 ./ n)
+    @test counts(reshape(x, 10, 50, 10), 5) == c0
+
+    @test counts(x)                      == c0
+    @test proportions(x)                 ≈ (c0 ./ n)
+    @test counts(reshape(x, 10, 50, 10)) == c0
+
+    c0 = reshape(c0, 1, 5)
+    @test addcounts!(fill(0, 1, 5),                     x,  1:5) == c0
+    @test addcounts!(fill(0, 1, 5), reshape(x, 10, 50, 10), 1:5) == c0
+
+    c0 = Float64[sum(w.values[x .== i]) for i in 1:5]
+    @test counts(x, 5, w)                      ≈ c0
+    @test counts(x .+ 1, 2:6, w)               ≈ c0
+    @test proportions(x, 1:5, w)               ≈ (c0 ./ sum(w))
+    @test counts(reshape(x, 10, 50, 10), 5, w) ≈ c0 # Perhaps this should not be allowed
+
+    @test counts(x, w)                      ≈ c0
+    @test counts(x .+ 1, 2:6, w)            ≈ c0
+    @test proportions(x, w)                 ≈ (c0 ./ sum(w))
+    @test counts(reshape(x, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed
+
+    #addcounts! to row matrix
+    c0 = reshape(c0, 1, 5)
+    @test addcounts!(fill(0.0, 1, 5),                     x,  1:5, w) ≈ c0
+    @test addcounts!(fill(0.0, 1, 5), reshape(x, 10, 50, 10), 1:5, w) ≈ c0 # Perhaps this should not be allowed
+
+    @test x == x0
+    @test w == w0
+end
+
+
+@testset "2D integer counts" begin
+    x = rand(1:4, n)
+    y = rand(1:5, n)
+    w = weights(rand(n))
+    x0 = deepcopy(x)
+    y0 = deepcopy(y)
+    w0 = deepcopy(w)
+
+    c0 = Int[count(t->t != 0,  (x .== i) .& (y .== j)) for i in 1:4, j in 1:5]
+    @test counts(x, y, (4, 5))                                           == c0
+    @test counts(x .+ 2, y .+ 3, (3:6, 4:8))                             == c0
+    @test proportions(x, y, (1:4, 1:5))                                  ≈ (c0 ./ n)
+    @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5)) == c0
+
+    @test counts(x, y)                                           == c0
+    @test counts(x .+ 2, y .+ 3, (3:6, 4:8))                     == c0
+    @test proportions(x, y,)                                     ≈ (c0 ./ n)
+    @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10)) == c0
+
+    c0 = Float64[sum(w.values[(x .== i) .& (y .== j)]) for i in 1:4, j in 1:5]
+    @test counts(x, y, (4, 5), w)                                           ≈ c0
+    @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w)                             ≈ c0
+    @test proportions(x, y, (1:4, 1:5), w)                                  ≈ (c0 ./ sum(w))
+    @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5), w) ≈ c0 # Perhaps this should not be allowed
+
+    @test counts(x, y, w)                                           ≈ c0
+    @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w)                     ≈ c0
+    @test proportions(x, y, w)                                      ≈ (c0 ./ sum(w))
+    @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed
+
+    @test x == x0
+    @test y == y0
+    @test w == w0
+end
+
+@testset "count map" begin
+    x = ["a", "b", "a", "a", "b", "c"]
+    w = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]
+
+    cm = countmap(x)
+    @test cm["a"] == 3
+    @test cm["b"] == 2
+    @test cm["c"] == 1
+
+    # iterator, non-radixsort
+    cm_missing = countmap(skipmissing(x))
+    cm_any_itr = countmap((i for i in x))
+    @test cm_missing == cm_any_itr == cm
+    @test cm_missing isa Dict{String, Int}
+    @test cm_any_itr isa Dict{Any, Int}
+
+    pm = proportionmap(x)
+    @test pm["a"] ≈ (1/2)
+    @test pm["b"] ≈ (1/3)
+    @test pm["c"] ≈ (1/6)
+
+
+    # testing the radixsort branch of countmap
+    xx = repeat([6, 1, 3, 1], outer=100_000)
+    cm = countmap(xx)
+    @test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
+
+    # with iterator
+    cm_missing = countmap(skipmissing(xx))
+    @test cm_missing isa Dict{Int, Int}
+    @test cm_missing == cm
+
+    cm_any_itr = countmap((i for i in xx))
+    @test cm_any_itr isa Dict{Any,Int} # no knowledge about type
+    @test cm_any_itr == cm
+
+    # with multidimensional array
+    @test countmap(reshape(xx, 20, 100, 20, 10); alg=:radixsort) == cm
+    @test countmap(reshape(xx, 20, 100, 20, 10); alg=:dict)      == cm
+
+    # with empty array
+    @test countmap(Int[]) == Dict{Int, Int}()
+
+    # testing the radixsort-based addcounts
+    xx = repeat([6, 1, 3, 1], outer=100_000)
+    cm = Dict{Int, Int}()
+    StatsBase.addcounts_radixsort!(cm,xx)
+    @test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
+    xx2 = repeat([7, 1, 3, 1], outer=100_000)
+    StatsBase.addcounts_radixsort!(cm,xx2)
+    @test cm == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000)
+    # with iterator
+    cm_missing = Dict{Int, Int}()
+    StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx))
+    @test cm_missing == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
+    StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx2))
+    @test cm_missing == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000)
+
+    # testing the Dict-based addcounts
+    cm = Dict{Int, Int}()
+    cm_itr = Dict{Int, Int}()
+    StatsBase.addcounts_dict!(cm,xx)
+    StatsBase.addcounts_dict!(cm_itr,skipmissing(xx))
+    @test cm_itr == cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000)
+    @test cm_itr isa Dict{Int, Int}
+
+    cm = countmap(x, weights(w))
+    @test cm["a"] == 5.5
+    @test cm["b"] == 4.5
+    @test cm["c"] == 3.5
+
+    @test cm == countmap(x, w)
+    @test cm == countmap(reshape(x, 2, 3), w)
+
+    pm = proportionmap(x, weights(w))
+    @test pm["a"] ≈ (5.5 / 13.5)
+    @test pm["b"] ≈ (4.5 / 13.5)
+    @test pm["c"] ≈ (3.5 / 13.5)
+
+    # testing small bits type
+    bx = [true, false, true, true, false]
+    cm_bx_missing = countmap(skipmissing(bx))
+    @test cm_bx_missing == countmap(bx) == Dict(true => 3, false => 2)
+    @test cm_bx_missing isa Dict{Bool, Int}
+
+    for T in [UInt8, UInt16, Int8, Int16]
+        tx = T[typemin(T), 8, typemax(T), 19, 8]
+        tx_missing = skipmissing(T[typemin(T), 8, typemax(T), 19, 8])
+        cm_tx_missing = countmap(tx_missing)
+        @test cm_tx_missing == countmap(tx) == Dict(typemin(T) => 1, typemax(T) => 1, 8 => 2, 19 => 1)
+        @test cm_tx_missing isa Dict{T, Int}
+    end
 end
 
 @testset "views" begin
     X = view([1,1,1,2,2], 1:5)
     @test countmap(X) == countmap(copy(X))
 end
+
+if VERSION >= v"1.9.0-DEV"
+    @testset "offset arrays" begin
+        x = rand(1:5, n)
+        w = rand(n)
+        xw = weights(w)
+        y = OffsetArray(x, n÷2)
+        yw = weights(OffsetArray(w, n÷2))
+        z = OffsetArray(x, -2n)
+        zw = weights(OffsetArray(w, -2n))
+
+        # proportions calls counts which calls addcounts!
+        @test proportions(x)       == proportions(y)       == proportions(z)
+        @test proportions(x, xw)   == proportions(y, yw)   == proportions(z, zw)
+        @test proportionmap(x)     == proportionmap(y)     == proportionmap(z)
+        @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw)
+        @test countmap(x) == countmap(x; alg = :dict) == countmap(x; alg = :radixsort) ==
+              countmap(y) == countmap(y; alg = :dict) == countmap(y; alg = :radixsort) ==
+              countmap(z) == countmap(z; alg = :dict) == countmap(z; alg = :radixsort)
+        @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw)
+        # countmap and proportionmap only support the :dict algorithm for weighted sums.
+    end
+end
diff --git a/test/weights.jl b/test/weights.jl
index 8562c5691..e8f7febe8 100644
--- a/test/weights.jl
+++ b/test/weights.jl
@@ -13,6 +13,7 @@ weight_funcs = (weights, aweights, fweights, pweights)
 
     @test isempty(f(Float64[]))
     @test size(f([1, 2, 3])) == (3,)
+    @test axes(f([1, 2, 3])) == (Base.OneTo(3),)
 
     w  = [1., 2., 3.]
     wv = f(w)
@@ -107,6 +108,7 @@ end
     @test !isempty(wv)
     @test length(wv) === 3
     @test size(wv) === (3,)
+    @test axes(wv) === (Base.OneTo(3),)
     @test sum(wv) === 3.
     @test wv == fill(1.0, 3)
     @test StatsBase.varcorrection(wv) == 1/3