Merge branch 'main' into 145-problem-object

CDCgov · Mar 15, 2024 · b0bc4d1 · b0bc4d1
2 parents c394501 + 3a66b24
commit b0bc4d1
Show file tree

Hide file tree

Showing 4 changed files with 285 additions and 2 deletions.
diff --git a/EpiAware/src/EpiAware.jl b/EpiAware/src/EpiAware.jl
@@ -48,7 +48,7 @@ export spread_draws, scan, create_discrete_pmf
 include("EpiLatentModels/EpiLatentModels.jl")
 using .EpiLatentModels
 
-export RandomWalk, AR
+export RandomWalk, AR, DiffLatentModel
 
 include("EpiInfModels/EpiInfModels.jl")
 using .EpiInfModels

diff --git a/EpiAware/src/EpiLatentModels/EpiLatentModels.jl b/EpiAware/src/EpiLatentModels/EpiLatentModels.jl
@@ -8,10 +8,12 @@ using ..EpiAwareBase
 
 using Turing, Distributions, DocStringExtensions
 
-export RandomWalk, AR
+export RandomWalk, AR, DiffLatentModel
 
 include("docstrings.jl")
 include("randomwalk.jl")
 include("autoregressive.jl")
+include("difflatentmodel.jl")
 include("utils.jl")
+
 end
diff --git a/EpiAware/src/EpiLatentModels/difflatentmodel.jl b/EpiAware/src/EpiLatentModels/difflatentmodel.jl
@@ -0,0 +1,191 @@
+
+@doc raw"""
+Model the latent process as a `d`-fold differenced version of another process.
+
+## Mathematical specification
+
+Let ``\Delta`` be the differencing operator. If ``\tilde{Z}_t`` is a realisation of
+undifferenced latent model supplied to `DiffLatentModel`, then the differenced process is
+given by,
+
+```math
+\Delta^{(d)} Z_t = \tilde{Z}_t, \quad t = d+1, \ldots.
+```
+
+We can recover ``Z_t`` by applying the inverse differencing operator ``\Delta^{-1}``, which
+corresponds to the cumulative sum operator `cumsum` in Julia, `d`-times. The `d` initial
+terms ``Z_1, \ldots, Z_d`` are inferred.
+
+## Constructors
+
+- `DiffLatentModel(latentmodel, init_prior_distribution::Distribution; d::Int)`
+    Constructs a `DiffLatentModel` for `d`-fold differencing with `latentmodel` as the
+    undifferenced latent process. All initial terms have common prior
+    `init_prior_distribution`.
+- `DiffLatentModel(;undiffmodel, init_priors::Vector{D} where {D <: Distribution})`
+    Constructs a `DiffLatentModel` for `d`-fold differencing with `latentmodel` as the
+    undifferenced latent process. The `d` initial terms have priors given by the vector
+    `init_priors`, therefore `length(init_priors)` sets `d`.
+
+## Example usage with `generate_latent`
+
+`generate_latent` can be used to construct a `Turing` model for the differenced latent
+process. In this example, the underlying undifferenced process is a `RandomWalk` model.
+
+
+First, we construct a `RandomWalk` struct with an initial value prior and a step size
+standard deviation prior.
+
+```julia
+using Distributions, EpiAware
+rw = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf))
+```
+
+Then, we can use `DiffLatentModel` to construct a `DiffLatentModel` for `d`-fold
+differenced process with `rw` as the undifferenced latent process.
+
+We have two constructor options for `DiffLatentModel`. The first option is to supply a
+common prior distribution for the initial terms and specify `d` as follows:
+
+```julia
+diff_model = DiffLatentModel(rw, Normal(); d = 2)
+```
+
+Or we can supply a vector of priors for the initial terms and `d` is inferred as follows:
+```julia
+diff_model2 = DiffLatentModel(;undiffmodel = rw, init_priors = [Normal(), Normal()])
+```
+
+Then, we can use `generate_latent` to construct a Turing model for the differenced latent
+process generating a length `n` process,
+
+```julia
+# Construct a Turing model
+n = 100
+difference_mdl = generate_latent(diff_model, n)
+```
+
+Now we can use the `Turing` PPL API to sample underlying parameters and generate the
+unobserved latent process.
+
+```julia
+#Sample random parameters from prior
+θ = rand(difference_mdl)
+#Get a sampled latent process as a generated quantity from the model
+(Z_t, _) = generated_quantities(difference_mdl, θ)
+Z_t
+```
+
+"""
+struct DiffLatentModel{M <: AbstractLatentModel, P} <: AbstractLatentModel
+    "Underlying latent model for the differenced process"
+    model::M
+    "The prior distribution for the initial latent variables."
+    init_prior::P
+    "Number of times differenced."
+    d::Int
+
+    function DiffLatentModel(
+            model::AbstractLatentModel, init_prior::Distribution; d::Int)
+        init_priors = fill(init_prior, d)
+        return DiffLatentModel(; model = model, init_priors = init_priors)
+    end
+
+    function DiffLatentModel(; model::AbstractLatentModel,
+            init_priors::Vector{D} where {D <: Distribution} = [Normal()])
+        d = length(init_priors)
+        init_prior = _expand_dist(init_priors)
+        return DiffLatentModel(model, init_prior, d)
+    end
+
+    function DiffLatentModel(model::AbstractLatentModel, init_prior::Distribution, d::Int)
+        @assert d>0 "d must be greater than 0"
+        @assert d==length(init_prior) "d must be equal to the length of init_prior"
+        new{typeof(model), typeof(init_prior)}(model, init_prior, d)
+    end
+end
+
+"""
+Generate a `Turing` model for `n`-step latent process ``Z_t`` using a differenced
+latent model defined by `latent_model`.
+
+# Arguments
+- `latent_model::DiffLatentModel`: The differential latent model.
+- `n`: The length of the latent variables.
+
+## Turing model specifications
+
+### Sampled random variables
+- `latent_init`: The initial latent process variables.
+- Other random variables defined by `model<:AbstractLatentModel` field of the undifferenced
+    model.
+
+### Generated quantities
+- A tuple containing the generated latent process as its first argument
+    and a `NamedTuple` of sampled auxiliary variables as second argument.
+
+# Example usage with `DiffLatentModel` model constructor
+
+`generate_latent` can be used to construct a `Turing` model for the differenced latent
+process. In this example, the underlying undifferenced process is a `RandomWalk` model.
+
+First, we construct a `RandomWalk` struct with an initial value prior and a step size
+standard deviation prior.
+
+```julia
+using Distributions, EpiAware
+rw = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf))
+```
+
+Then, we can use `DiffLatentModel` to construct a `DiffLatentModel` for `d`-fold
+differenced process with `rw` as the undifferenced latent process.
+
+We have two constructor options for `DiffLatentModel`. The first option is to supply a
+common prior distribution for the initial terms and specify `d` as follows:
+
+```julia
+diff_model = DiffLatentModel(rw, Normal(); d = 2)
+```
+
+Or we can supply a vector of priors for the initial terms and `d` is inferred as follows:
+```julia
+diff_model2 = DiffLatentModel(;undiffmodel = rw, init_priors = [Normal(), Normal()])
+```
+
+Then, we can use `generate_latent` to construct a Turing model for the differenced latent
+process generating a length `n` process,
+
+```julia
+# Construct a Turing model
+n = 100
+difference_mdl = generate_latent(diff_model, n)
+```
+
+Now we can use the `Turing` PPL API to sample underlying parameters and generate the
+unobserved latent process.
+
+```julia
+#Sample random parameters from prior
+θ = rand(difference_mdl)
+#Get a sampled latent process as a generated quantity from the model
+(Z_t, _) = generated_quantities(difference_mdl, θ)
+Z_t
+```
+"""
+@model function EpiAwareBase.generate_latent(latent_model::DiffLatentModel, n)
+    d = latent_model.d
+    @assert n>d "n must be longer than d"
+    latent_init ~ latent_model.init_prior
+
+    @submodel diff_latent, diff_latent_aux = generate_latent(latent_model.model, n - d)
+
+    return _combine_diff(latent_init, diff_latent, d), (; latent_init, diff_latent_aux...)
+end
+
+function _combine_diff(init, diff, d)
+    combined = vcat(init, diff)
+    for _ in 1:d
+        combined = cumsum(combined)
+    end
+    return combined
+end
diff --git a/EpiAware/test/test_difflatentmodel.jl b/EpiAware/test/test_difflatentmodel.jl
@@ -0,0 +1,90 @@
+@testitem "Testing DiffLatentModel constructor" begin
+    using Distributions, Turing
+
+    model = RandomWalk()
+    @testset "Testing DiffLatentModel with vector of priors" begin
+        init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)]
+        diff_model = DiffLatentModel(; model = model, init_priors = init_priors)
+
+        @test diff_model.model == model
+        @test diff_model.init_prior == arraydist(init_priors)
+        @test diff_model.d == 2
+    end
+
+    @testset "Testing DiffLatentModel with single prior and d" begin
+        d = 3
+        init_prior = Normal()
+        diff_model = DiffLatentModel(model, init_prior; d = d)
+
+        @test diff_model.model == model
+        @test diff_model.init_prior == filldist(init_prior, d)
+        @test diff_model.d == d
+    end
+end
+
+@testitem "Testing DiffLatentModel process" begin
+    using DynamicPPL, Turing
+    using Distributions
+    using HypothesisTests: ExactOneSampleKSTest, pvalue
+
+    n = 100
+    d = 2
+    model = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf))
+    init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)]
+    diff_model = DiffLatentModel(model = model, init_priors = init_priors)
+
+    latent_model = generate_latent(diff_model, n)
+    fixed_model = fix(
+        latent_model, (σ_RW = 0, rw_init = 0.0))
+
+    n_samples = 2000
+    samples = sample(fixed_model, Prior(), n_samples) |>
+              chn -> mapreduce(hcat, generated_quantities(fixed_model, chn)) do gen
+        gen[1]
+    end
+
+    #Because of the recursive d-times cumsum to undifference the process,
+    #The distribution of the second day should be d lots of first day init distribution
+    """
+    Add two normal distributions `x` and `y` together.
+
+    # Returns
+    - `Normal`: The sum of `x` and `y` as a normal distribution.
+    """
+    function _add_normals(x::Normal, y::Normal)
+        return Normal(x.μ + y.μ, sqrt(x.σ^2 + y.σ^2))
+    end
+
+    #Plus day two distribution
+    day2_dist = foldl((x, y) -> _add_normals(x, init_priors[1]), 1:d, init = init_priors[2])
+
+    ks_test_pval_day1 = ExactOneSampleKSTest(samples[1, :], init_priors[1]) |> pvalue
+    ks_test_pval_day2 = ExactOneSampleKSTest(samples[2, :], day2_dist) |> pvalue
+
+    @test size(samples) == (n, n_samples)
+    @test ks_test_pval_day1 > 1e-6 #Very unlikely to fail if the model is correctly implemented
+    @test ks_test_pval_day2 > 1e-6 #Very unlikely to fail if the model is correctly implemented
+end
+
+@testitem "Testing DiffLatentModel runs with AR process" begin
+    using DynamicPPL, Turing
+    using Distributions
+
+    n = 100
+    d = 2
+    model = AR()
+    init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)]
+    diff_model = DiffLatentModel(model = model, init_priors = init_priors)
+
+    latent_model = generate_latent(diff_model, n)
+    fixed_model = fix(latent_model,
+        (latent_init = [0.0, 1.0], σ_AR = 1.0, damp_AR = [0.8], ar_init = [0.0]))
+
+    n_samples = 100
+    samples = sample(fixed_model, Prior(), n_samples) |>
+              chn -> mapreduce(hcat, generated_quantities(fixed_model, chn)) do gen
+        gen[1]
+    end
+
+    @test size(samples) == (n, n_samples)
+end