Merge branch 'main' into stack-based-refactor

MurrellGroup · Jun 19, 2024 · 4a39c6d · 4a39c6d
2 parents 0c962e0 + 27f1539
commit 4a39c6d
Show file tree

Hide file tree

Showing 17 changed files with 551 additions and 33 deletions.
diff --git a/docs/src/models.md b/docs/src/models.md
@@ -8,4 +8,60 @@ Coming soon.
 
 ## Continuous models
 
-## Compound models
+## Compound models
+
+## Lazy models
+
+### LazyPartition
+
+```@docs; canonical=false
+LazyPartition
+```
+
+#### Examples
+
+##### Example 1: Initializing for an upward pass
+Now, we show how to wrap the `CodonPartition`s from [Example 3: FUBAR](@ref) with `LazyPartition`:
+
+You simply go from initializing messages like this:
+```julia
+initial_partition = CodonPartition(Int64(length(seqs[1])/3))
+initial_partition.state .= eq_freqs
+populate_tree!(tree,initial_partition,seqnames,seqs)
+```
+
+To this
+```julia
+initial_partition = CodonPartition(Int64(length(seqs[1])/3))
+initial_partition.state .= eq_freqs
+lazy_initial_partition = LazyPartition{CodonPartition}()
+populate_tree!(tree,lazy_initial_partition,seqnames,seqs)
+lazyprep!(tree, initial_partition)
+```
+
+By this slight modification, we go from initializing and using 554 partitions to 6 during the subsequent `log_likelihood!` and `felsenstein!` calls. There is no significant decrease in performance recorded from this switch.
+
+##### Example 2: Initializing for a downward pass
+Now, we show how to wrap the `GaussianPartition`s from [Quick example: Likelihood calculations under phylogenetic Brownian motion:](@ref) with `LazyPartition`:
+
+You simply go from initializing messages like this:
+```julia
+internal_message_init!(tree, GaussianPartition())
+```
+
+To this (technically we only add 1 LOC)
+```julia
+initial_partition = GaussianPartition()
+lazy_initial_partition = LazyPartition{GaussianPartition}()
+internal_message_init!(tree, lazy_initial_partition)
+lazyprep!(tree, initial_partition, direction=LazyDown(isleafnode))
+```
+!!! note
+    Now, we provided a direction for `lazyprep!`. The direction is an instance of `LazyDown`, which was initialized with the `isleafnode` function. The function `isleafnode` dictates if a node saves its sampled observation after a down pass. If you use `direction=LazyDown()`, every node saves its observation.
+
+#### Surrounding LazyPartition
+```@docs; canonical=false
+lazyprep!
+LazyUp
+LazyDown
+```
diff --git a/src/MolecularEvolution.jl b/src/MolecularEvolution.jl
@@ -31,6 +31,8 @@ abstract type StatePath end
 
 abstract type UnivariateOpt end
 
+abstract type LazyDirection end
+
 #include("core/core.jl")
 include("core/nodes/nodes.jl")
 include("core/algorithms/algorithms.jl")
@@ -131,6 +133,7 @@ export
     copy_partition_to!,
     copy_partition,
     copy_message,
+    partition_from_template,
     equilibrium_message,
     sample_partition!,
     obs2partition!,

diff --git a/src/core/algorithms/ancestors.jl b/src/core/algorithms/ancestors.jl
@@ -29,7 +29,7 @@ function depth_first_reconstruction(
     run_fel_up = true,
     run_fel_down = true,
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     if run_fel_up
         felsenstein!(tree, model_func, partition_list = partition_list)
@@ -56,7 +56,7 @@ function depth_first_reconstruction(
     run_fel_up = true,
     run_fel_down = true,
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     depth_first_reconstruction(
         tree,
@@ -76,7 +76,7 @@ function depth_first_reconstruction(
     run_fel_up = true,
     run_fel_down = true,
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     depth_first_reconstruction(
         tree,
@@ -91,7 +91,7 @@ end
 
 #For marginal reconstructions
 function reconstruct_marginal_node!(
-    node_message_dict::Dict{FelNode,Vector{Partition}},
+    node_message_dict::Dict{FelNode,Vector{<:Partition}},
     node::FelNode,
     model_array::Vector{<:BranchModel},
     partition_list,
@@ -109,7 +109,7 @@ end
 
 export marginal_state_dict
 """
-    marginal_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{Partition}}())
+    marginal_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{<:Partition}}())
 
 Takes in a tree and a model (which can be a single model, an array of models, or a function that maps FelNode->Array{<:BranchModel}), and
 returns a dictionary mapping nodes to their marginal reconstructions (ie. P(state|all observations,model)). A subset of partitions can be specified by partition_list,
@@ -119,7 +119,7 @@ function marginal_state_dict(
     tree::FelNode,
     model;
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     return depth_first_reconstruction(
         tree,
@@ -133,7 +133,7 @@ end
 #For joint max reconstructions
 export dependent_reconstruction!
 function dependent_reconstruction!(
-    node_message_dict::Dict{FelNode,Vector{Partition}},
+    node_message_dict::Dict{FelNode,Vector{<:Partition}},
     node::FelNode,
     model_array::Vector{<:BranchModel},
     partition_list;
@@ -173,7 +173,7 @@ reconstruct_cascading_max_node!(node_message_dict, node, model_array, partition_
     )
 export cascading_max_state_dict
 """
-    cascading_max_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{Partition}}())
+    cascading_max_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{<:Partition}}())
 
 Takes in a tree and a model (which can be a single model, an array of models, or a function that maps FelNode->Array{<:BranchModel}), and
 returns a dictionary mapping nodes to their inferred ancestors under the following scheme: the state that maximizes the marginal likelihood is selected at the root,
@@ -184,7 +184,7 @@ function cascading_max_state_dict(
     tree::FelNode,
     model;
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     return depth_first_reconstruction(
         tree,
@@ -206,7 +206,7 @@ conditioned_sample_node!(node_message_dict, node, model_array, partition_list) =
     )
 export endpoint_conditioned_sample_state_dict
 """
-    endpoint_conditioned_sample_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{Partition}}())
+    endpoint_conditioned_sample_state_dict(tree::FelNode, model; partition_list = 1:length(tree.message), node_message_dict = Dict{FelNode,Vector{<:Partition}}())
 
 Takes in a tree and a model (which can be a single model, an array of models, or a function that maps FelNode->Array{<:BranchModel}), and draws samples under the model
 conditions on the leaf observations. These samples are stored in the node_message_dict, which is returned. A subset of partitions can be specified by partition_list, and a
@@ -216,7 +216,7 @@ function endpoint_conditioned_sample_state_dict(
     tree::FelNode,
     model;
     partition_list = 1:length(tree.message),
-    node_message_dict = Dict{FelNode,Vector{Partition}}(),
+    node_message_dict = Dict{FelNode,Vector{<:Partition}}(),
 )
     return depth_first_reconstruction(
         tree,

diff --git a/src/core/algorithms/branchlength_optim.jl b/src/core/algorithms/branchlength_optim.jl
@@ -1,7 +1,7 @@
 #Model list should be a list of P matrices.
 function branch_LL_up(
     bl::Real,
-    temp_message::Vector{Partition},
+    temp_message::Vector{<:Partition},
     node::FelNode,
     model_list::Vector{<:BranchModel},
     partition_list,
@@ -22,8 +22,8 @@ end
 #I need to add a version of this that takes a generic optimizer function and uses that instead of golden_section_maximize on just the branchlength.
 #This is for cases where the user stores node-level parameters and wants to optimize them.
 function branchlength_optim!(
-    temp_message::Vector{Partition},
-    message_to_set::Vector{Partition},
+    temp_message::Vector{<:Partition},
+    message_to_set::Vector{<:Partition},
     node::FelNode,
     models,
     partition_list,

diff --git a/src/core/algorithms/generative.jl b/src/core/algorithms/generative.jl
@@ -28,12 +28,12 @@ function sample_down!(node::FelNode, models, partition_list)
             if isroot(node)
                 forward!(node.message[part], node.parent_message[part], model_list[part], node)
             else
-                forward!(node.message[part], node.parent.message[part], model_list[part], node)
+                forward!(node.message[part], node.parent.message[part], model_list[part], node) #node.parent['.' vs. '_']message[part]
             end
             sample_partition!(node.message[part])
         end
         if !isleafnode(node)
-            for child in reverse(node.children)
+            for child in reverse(node.children) #We push! in reverse order because of LazyPartition, so that lazysort! is optimal for both felsenstein! and sample_down!
                 push!(stack, child)
             end
         end

diff --git a/src/core/algorithms/nni_optim.jl b/src/core/algorithms/nni_optim.jl
@@ -1,8 +1,8 @@
 
 
 function nni_optim!(
-    temp_message::Vector{Partition},
-    message_to_set::Vector{Partition},
+    temp_message::Vector{<:Partition},
+    message_to_set::Vector{<:Partition},
     node::FelNode,
     models,
     partition_list;
@@ -72,8 +72,8 @@ end
 
 #Unsure if this is the best choice to handle the model,models, and model_func stuff.
 function nni_optim!(
-    temp_message::Vector{Partition},
-    message_to_set::Vector{Partition},
+    temp_message::Vector{<:Partition},
+    message_to_set::Vector{<:Partition},
     node::FelNode,
     models::Vector{<:BranchModel},
     partition_list;
@@ -89,8 +89,8 @@ function nni_optim!(
     )
 end
 function nni_optim!(
-    temp_message::Vector{Partition},
-    message_to_set::Vector{Partition},
+    temp_message::Vector{<:Partition},
+    message_to_set::Vector{<:Partition},
     node::FelNode,
     model::BranchModel,
     partition_list;

diff --git a/src/core/nodes/AbstractTreeNode.jl b/src/core/nodes/AbstractTreeNode.jl
@@ -309,15 +309,15 @@ function gettreefromnewick(str, T::DataType; tagged = false, disable_binarize =
             i += 1
         elseif c == ';'
             try_apply_char_arr(currnode, char_arr)
-            return (tagged ? (currnode, tag_dict) : currnode)
+            break
         else
             push!(char_arr, c)
             #println(char_arr)
             i += 1
         end
     end
 
-    binarize!(currnode)
+    !disable_binarize && binarize!(currnode)
 
     return (tagged ? (currnode, tag_dict) : currnode)
 end

diff --git a/src/models/compound_models/swm.jl b/src/models/compound_models/swm.jl
@@ -59,6 +59,15 @@ function copy_partition(src::SWMPartition{PType}) where {PType <: MultiSiteParti
     return SWMPartition{PType}(copy_partition.(src.parts), copy(src.weights), src.sites, src.states, src.models)
 end
 
+#Overloading the partition_from_template with (indirect) usage of undef
+function partition_from_template(partition_template::SWMPartition{PType}) where {PType <: MultiSitePartition}
+    return SWMPartition{PType}(partition_from_template.(partition_template.parts), 
+        copy(partition_template.weights),
+        partition_template.sites,
+        partition_template.states,
+        partition_template.models)
+end
+
 function combine!(dest::SWMPartition{PType},src::SWMPartition{PType}) where {PType<:MultiSitePartition}
     for i in 1:length(dest.parts)
         combine!(dest.parts[i], src.parts[i])

diff --git a/src/models/discrete_models/discrete_partitions.jl b/src/models/discrete_models/discrete_partitions.jl
@@ -15,6 +15,12 @@ function copy_partition(src::T) where {T<:DiscretePartition}
     return T(copy(src.state), src.states, src.sites, copy(src.scaling))
 end
 
+#Overloading the partition_from_template with usage of undef.
+function partition_from_template(partition_template::T) where {T <: DiscretePartition}
+    states, sites = partition_template.states, partition_template.sites
+    return T(Array{Float64, 2}(undef, states, sites), states, sites, Array{Float64, 1}(undef, sites))
+end
+
 #I should add a constructor that constructs a DiscretePartition from an existing array.
 mutable struct CustomDiscretePartition <: DiscretePartition
     state::Array{Float64,2}