Skip to content

Commit

Permalink
add a few missing things and finish the metabolism example
Browse files Browse the repository at this point in the history
  • Loading branch information
exaexa committed Sep 13, 2023
1 parent d33fcd0 commit 2c84b19
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 4 deletions.
202 changes: 199 additions & 3 deletions docs/src/metabolic-modeling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# First, let's import some packages:

import ConstraintTrees as C
import JuMP, SBML

# We will need a constraint-based metabolic model; for this test we will use
# the usual "E. Coli core metabolism" model as available from BiGG:
Expand All @@ -18,12 +17,15 @@ import Downloads: download

download("http://bigg.ucsd.edu/static/models/e_coli_core.xml", "e_coli_core.xml")

import SBML
ecoli = SBML.readSBML("e_coli_core.xml")

# ## Allocating and constraining variables
#
# Let's first build the constrained representation of the problem. First, we
# will need a variable for each of the reactions in the model:
# will need a variable for each of the reactions in the model.

c = C.allocate_variables(keys = Symbol.(keys(ecoli.reactions)));
c = C.allocate_variables(keys = Symbol.(keys(ecoli.reactions)))

@test length(C.elems(c)) == length(ecoli.reactions) #src

Expand All @@ -34,6 +36,11 @@ C.elems(c)
# ...or much more conveniently using the record dot syntax as properties:
c.R_PFK

# The individual `Value`s in constraint behave like sparse vectors that refer
# to variables: The first field represents the referenced variable indexes, and
# the second field represents the coefficients. Compared to the sparse vectors,
# information about the total number of variables is not stored explicitly.

# Operator `^` is used to name individual constraints and directories in the
# hierarchy. Let us name our constraints as "fluxes" (which is a common name in
# metabolic modeling) and explore the result:
Expand All @@ -49,3 +56,192 @@ collect(keys(C.elems(c)))

# ...which can be explored with the dot access again:
c.fluxes.R_PFK

# Indexing via values is possible via the usual bracket notation, and can be
# freely combined with the dot notation:
c[:fluxes][:R_PFK]

@test c[:fluxes].R_PFK === c.fluxes[:R_PFK] #src

# ## Adding single-variable constraints

# Each element in the constraint tree consists of a linear combination of the
# variables, which can be freely used to construct (and constraint) new linear
# combinations of variables. As the simplest use, we can constraint the
# variables to their valid bounds as defined by the model:
rxn_constraints =
let rxn_bounds = Symbol.(keys(ecoli.reactions)) .=> zip(SBML.flux_bounds(ecoli)...)
C.make_constraint_tree(
r => C.Constraint(value = c.fluxes[r].value, bound = (lb, ub)) for
(r, ((lb, _), (ub, _))) in rxn_bounds # SBML units are ignored for simplicity
)
end

# To combine the constraint trees, we can make a nice directory for the
# constraints and add them to the tree using operator `*`. Making "products" of
# constraint trees combines the trees in a way that they _share_ their
# variables. In particular, using the values from `c.fluxes` in the constraints
# within `rxn_constraints` here will constraint precisely the same variables
# (and thus values) as the ones in the original system.
c = c * :constraints^rxn_constraints;

# Our model representation now contains 2 "directories":
collect(keys(c))

@test 2 == length((keys(c)))#src

# ## Adding combined constraints

# Values may be combined additively and multiplied by real constants; which
# allows us to easily create more complex linear combination of any values
# already occurring in the model:
c.fluxes.R_PFK - 2 * C.fluxes.R_ACALD

# Metabolic modeling relies on the fact that the total rates of any metabolite
# getting created and consumed by the reaction equals to zero (which
# corresponds to conservation of mass). We can now add corresponding
# "stoichiometric" network constraints by following the reactants and products
# in the SBML structure:
stoi_constraints = C.make_constraint_tree(
Symbol(m) => C.Constraint(
value = -sum(
(
sr.stoichiometry * c.fluxes[Symbol(rid)].value for
(rid, r) in ecoli.reactions for sr in r.reactants if sr.species == m
),
init = zero(C.Value), # sometimes the sums are empty
) + sum(
(
sr.stoichiometry * c.fluxes[Symbol(rid)].value for
(rid, r) in ecoli.reactions for sr in r.products if sr.species == m
),
init = zero(C.Value),
),
bound = 0.0,
) for m in keys(ecoli.species)
)

# Again, we can label the stoichiometry properly and add it to the bigger model
# representation:
c = c * :stoichiometry^stoi_constraints;

# ## Saving the objective
#
# Constraint based models typically optimize a certain linear formula.
# Constraint trees do not support setting objectives (they are not
# constraints), but we can save the objective as a harmless unconstrained
# "constraint" that can be used later to refer to the objective more easily.
# We can save that information into the constraint system immediately:
c *=
:objective^C.Constraint(
value = sum(
c.fluxes[Symbol(rid)].value * coeff for
(rid, coeff) in (keys(ecoli.reactions) .=> SBML.flux_objective(ecoli)) if
coeff != 0.0
),
);

# ## Solving the constraint system using JuMP
#
# We can make a small function that throws our model into JuMP, optimizes it,
# and gives us back a variable assignment vector. This vector can then be used
# to determine and browse the values of constraints and variables using
# `SolutionTree`.
import JuMP
function optimized_vars(cs::C.ConstraintTree, objective::C.Value, optimizer)
model = JuMP.Model(optimizer)
JuMP.@variable(model, x[1:C.var_count(cs)])
JuMP.@objective(model, JuMP.MAX_SENSE, C.value_product(objective, x))
#+
function add_constraint(c::C.Constraint)
if c.bound isa Float64
JuMP.@constraint(model, C.value_product(c.value, x) == c.bound)
elseif c.bound isa Tuple{Float64,Float64}
val = C.value_product(c.value, x)
isinf(c.bound[1]) || JuMP.@constraint(model, val >= c.bound[1])
isinf(c.bound[2]) || JuMP.@constraint(model, val <= c.bound[2])
end
end
#+
function add_constraint(c::C.ConstraintTree)
add_constraint.(values(c))
end
#+
add_constraint(cs)
#+
JuMP.optimize!(model)
#+
JuMP.value.(model[:x])
end

# With this in hand, we can use an external linear problem solver to find the
# optimum of the constrained system:
import GLPK
optimal_variable_assignment = optimized_vars(c, c.objective.value, GLPK.Optimizer)

# To explore the solution more easily, we can make a solution tree with values
# that correspond to ones in our constraint tree:
result = C.solution_tree(c, optimal_variable_assignment);
result.fluxes.R_BIOMASS_Ecoli_core_w_GAM

result.fluxes.R_PFK

result.objective

# Sometimes it is unnecessary to recover the values for all constraints, so we are better off selecting just a subtree:
C.elems(C.solution_tree(c.fluxes, optimal_variable_assignment))

C.solution_tree(c.objective, optimal_variable_assignment)

# ## Combining and extending constraint systems
#
# Constraint trees can be extended with new variables from another constraint
# trees using the `+` operator. Contrary to the `*` operator, adding the
# constraint trees does _not_ share the variables between operands, and the
# resulting constraint tree will basically contain two disconnected trees that
# solve independently. The user is expected to create additional constraints to
# connect the independent parts.
#
# Here, we demonstrate this by creating a community of two slightly different
# E. Coli species: First, we disable functionality of a different reaction in
# each of the models to create a diverse group of differently handicapped
# organisms:
c =
:community^(
:species1^(c * :handicap^C.Constraint(value = c.fluxes.R_PFK.value, bound = 0.0)) +
:species2^(c * :handicap^C.Constraint(value = c.fluxes.R_ACALD.value, bound = 0.0))
)

# We can create additional variables that represent total community intake of
# oxygen, and total community production of biomass:
c +=
:exchanges^C.allocate_variables(
keys = [:oxygen, :biomass],
bounds = [(-10.0, 10.0), nothing],
)

# These can be constrained so that the total influx (or outflux) of each of the
# registered metabolites is in fact equal to total consumption or production by
# each of the species:
c *=
:exchange_constraints^C.make_constraint_tree(
:oxygen => C.Constraint(
value = c.exchanges.oxygen.value - c.community.species1.fluxes.R_EX_o2_e.value -
c.community.species2.fluxes.R_EX_o2_e.value,
bound = 0.0,
),
:biomass => C.Constraint(
value = c.exchanges.biomass.value -
c.community.species1.fluxes.R_BIOMASS_Ecoli_core_w_GAM.value -
c.community.species2.fluxes.R_BIOMASS_Ecoli_core_w_GAM.value,
bound = 0.0,
),
)

# Let's see how much biomass are the two species capable of producing together:
result = C.solution_tree(c, optimized_vars(c, c.exchanges.biomass.value, GLPK.Optimizer));
C.elems(result.exchanges)

# Finally, we can iterate over all species in the small community and see how
# much biomass was actually contributed by each:
[k => v.fluxes.R_BIOMASS_Ecoli_core_w_GAM for (k, v) in result.community]
40 changes: 40 additions & 0 deletions src/constraint_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ Base.keys(x::ConstraintTree) = keys(elems(x))

Base.values(x::ConstraintTree) = values(elems(x))

Base.length(x::ConstraintTree) = length(elems(x))

Base.iterate(x::ConstraintTree) = iterate(elems(x))
Base.iterate(x::ConstraintTree, st) = iterate(elems(x), st)

Base.eltype(x::ConstraintTree) = eltype(elems(x))
Expand All @@ -113,6 +116,43 @@ Base.propertynames(x::ConstraintTree) = keys(x)

Base.getindex(x::ConstraintTree, sym::Symbol) = getindex(elems(x), sym)

#
# Tree-wide operations with variables
#

"""
$(TYPEDSIGNATURES)
Find the expected count of variables in a [`Constraint`](@ref).
"""
var_count(x::Constraint) = isempty(x.value.idxs) ? 0 : last(x.value.idxs)

"""
$(TYPEDSIGNATURES)
Find the expected count of variables in a [`ConstraintTree`](@ref).
"""
var_count(x::ConstraintTree) = isempty(elems(x)) ? 0 : maximum(var_count.(values(elems(x))))

"""
$(TYPEDSIGNATURES)
Offset all variable indexes in a [`Constraint`](@ref) by the given increment.
"""
incr_var_idxs(x::Constraint, incr::Int) = Constraint(
value = Value(idxs = x.value.idxs .+ incr, weights = x.value.weights),
bound = x.bound,
)

"""
$(TYPEDSIGNATURES)
Offset all variable indexes in a [`ConstraintTree`](@ref) by the given
increment.
"""
incr_var_idxs(x::ConstraintTree, incr::Int) =
ConstraintTree(elems = SortedDict(k => incr_var_idxs(v, incr) for (k, v) in elems(x)))

#
# Algebraic construction
#
Expand Down
5 changes: 4 additions & 1 deletion src/solution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ Base.keys(x::SolutionTree) = keys(elems(x))

Base.values(x::SolutionTree) = values(elems(x))

Base.length(x::SolutionTree) = length(elems(x))

Base.iterate(x::SolutionTree) = iterate(elems(x))
Base.iterate(x::SolutionTree, st) = iterate(elems(x), st)

Base.eltype(x::SolutionTree) = eltype(elems(x))
Expand Down Expand Up @@ -67,6 +70,6 @@ assignment.
"""
solution_tree(x::ConstraintTree, vars::AbstractVector{Float64}) = SolutionTree(
elems = SortedDict{Symbol,SolutionTreeElem}(
keys(elems(x)) .=> solution_tree.(values(elems(x)), Ref(ov)),
keys(x) .=> solution_tree.(values(x), Ref(vars)),
),
)
1 change: 1 addition & 0 deletions src/value.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Base.@kwdef struct Value
weights::Vector{Float64}
end

Base.zero(::Type{Value}) = Value(idxs = [], weights = [])
Base.:*(a::Real, b::Value) = b * a
Base.:*(a::Value, b::Real) = Value(idxs = a.idxs, weights = b .* a.weights)
Base.:-(a::Value, b::Value) = a + (-1 * b)
Expand Down

0 comments on commit 2c84b19

Please sign in to comment.