diff --git a/Project.toml b/Project.toml index 8a10502b..825fbd41 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "COBREXA" uuid = "babc4406-5200-4a30-9033-bf5ae714c842" authors = ["The developers of COBREXA.jl"] -version = "2.2.2" +version = "2.3.0" [deps] AbstractFBCModels = "5a4f3dfa-1789-40f8-8221-69268c29937c" diff --git a/docs/src/examples/05g-gapfilling.jl b/docs/src/examples/05g-gapfilling.jl new file mode 100644 index 00000000..5189ecf0 --- /dev/null +++ b/docs/src/examples/05g-gapfilling.jl @@ -0,0 +1,138 @@ + +# Copyright (c) 2024, University of Luxembourg #src +# Copyright (c) 2024, Heinrich-Heine University Duesseldorf #src +# #src +# Licensed under the Apache License, Version 2.0 (the "License"); #src +# you may not use this file except in compliance with the License. #src +# You may obtain a copy of the License at #src +# #src +# http://www.apache.org/licenses/LICENSE-2.0 #src +# #src +# Unless required by applicable law or agreed to in writing, software #src +# distributed under the License is distributed on an "AS IS" BASIS, #src +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #src +# See the License for the specific language governing permissions and #src +# limitations under the License. #src + +# # Gap filling +# +# Gapfilling is used to find easiest additions to the models that would make +# them feasible and capable of growth. +# +# Typically, an infeasible model ("with gaps") is used together with an +# universal model (which contains "everything"), and the algorithm attempts to +# find the minimal amount of reactions from the universal model that make the +# gappy model happy. In turn, the gapfilling optimization problem becomes a +# MILP. +# +# Gapfilling is sometimes used to produce "viable" genome-scale +# reconstructions from partial ones, but without additional manual intervention +# the gapfilling results are almost never biologically valid. A good use of +# gapfilling is to find problems in a model that cause infeasibility as +# follows: First the modeller makes a set of (unrealistic) universal reactions +# that supply or remove metabolites, and after gapfilling, metabolites that had +# to be supplied or removed to make the model feasible mark possible problems, +# thus guiding the modeller towards correct solution. + +# We will use a partially crippled *E. coli* toy model and see the minimal +# amount of reactions that may save it. + +using COBREXA + +download_model( + "http://bigg.ucsd.edu/static/models/e_coli_core.json", + "e_coli_core.json", + "7bedec10576cfe935b19218dc881f3fb14f890a1871448fc19a9b4ee15b448d8", +) + +import JSONFBCModels, HiGHS +model = load_model("e_coli_core.json") + +# First, let's produce an infeasible model: + +import AbstractFBCModels.CanonicalModel as CM +infeasible_model = convert(CM.Model, model) + +for rxn in ["TALA", "PDH", "PGI", "PYK"] + infeasible_model.reactions[rxn].lower_bound = 0.0 + infeasible_model.reactions[rxn].upper_bound = 0.0 +end + +# After removing the above reactions, the model will fail to solve: + +flux_balance_analysis(infeasible_model, optimizer = HiGHS.Optimizer) |> println + +# To avoid very subtle semantic issues, we are going to remove the ATP +# maintenance pseudoreaction from the universal model: +universal_model = convert(CM.Model, model) +delete!(universal_model.reactions, "ATPM") + +# ## Making the model feasible with a minimal set of reactions + +# Which of the reactions we have to fill back to get the model working again? +# First, let's run [`gap_filling_analysis`](@ref) to get a solution for a +# system that implements the reaction patching: + +x = gap_filling_analysis( + infeasible_model, + universal_model, + 0.05, + optimizer = HiGHS.Optimizer, +) + +# The reactions that had to be re-added can be found from the `fill_flags`: + +filled_reactions = [k for (k, v) in x.fill_flags if v != 0] + +@test length(filled_reactions) == 1 #src + +# If we want to try to generate another solution, we have to explicitly ask the +# system to avoid the previous solution. That is done via setting the argument +# `known_fill`. We can also set the `max_cost` to avoid finding too benevolent +# solutions: + +x2 = gap_filling_analysis( + infeasible_model, + universal_model, + 0.05, + max_cost = 2.0, + known_fills = [x.fill_flags], + optimizer = HiGHS.Optimizer, +) + +other_filled_reactions = [k for (k, v) in x2.fill_flags if v != 0] + +# ## Model debugging: which metabolite is missing? +# +# Gap-filling is great for detecting various broken links and imbalances in +# metabolic models. We show how to find the metabolites are causing the +# imbalance for our "broken" E. coli model. +# +# First, we construct a few completely unnatural reactions that create/remove +# the metabolites from/to nowhere: + +magic_model = convert(CM.Model, model) +empty!(magic_model.genes) +empty!(magic_model.reactions) + +for mid in keys(magic_model.metabolites) + magic_model.reactions[mid] = CM.Reaction( + lower_bound = -100.0, + upper_bound = 100.0, + stoichiometry = Dict(mid => 1.0), + ) +end + +# Gapfilling now points to the metabolites that need to be somehow taken care +# of by the modeller in order for the model to become feasible: + +xm = gap_filling_analysis(infeasible_model, magic_model, 0.05, optimizer = HiGHS.Optimizer) + +blocking_metabolites = [k for (k, v) in xm.fill_flags if v != 0] + +@test length(blocking_metabolites) == 1 #src + +# We can also have a look at how much of a given metabolite was used to make +# the model feasible again: + +xm.universal_fluxes[first(blocking_metabolites)] diff --git a/docs/src/reference/frontend.md b/docs/src/reference/frontend.md index 98eedd98..6fba749d 100644 --- a/docs/src/reference/frontend.md +++ b/docs/src/reference/frontend.md @@ -57,6 +57,13 @@ Modules = [COBREXA] Pages = ["src/frontend/envelope.jl"] ``` +## Gap-filling + +```@autodocs +Modules = [COBREXA] +Pages = ["src/frontend/gapfill.jl"] +``` + ## Knockout models ```@autodocs diff --git a/src/COBREXA.jl b/src/COBREXA.jl index e08b5b65..6aaeb718 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -77,6 +77,7 @@ include("frontend/community.jl") include("frontend/concentrations.jl") include("frontend/envelope.jl") include("frontend/enzymes.jl") +include("frontend/gapfill.jl") include("frontend/knockout.jl") include("frontend/loopless.jl") include("frontend/mmdf.jl") diff --git a/src/frontend/gapfill.jl b/src/frontend/gapfill.jl new file mode 100644 index 00000000..db21b5be --- /dev/null +++ b/src/frontend/gapfill.jl @@ -0,0 +1,190 @@ + +# Copyright (c) 2024, University of Luxembourg +# Copyright (c) 2024, Heinrich-Heine University Duesseldorf +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +$(TYPEDSIGNATURES) + +Make a gap-filling system from a FBC model with gaps and an universal +FBC model that contains reactions to be added into the original model. + +The output system will be constrainted to reach at least `objective_target` +flux through the objective function. Generally, this should be set to an +arbitrary small value such as `0.05`. + +`universal_reaction_cost` should assign a numeric cost of inclusion of each of +the reactions in the `universal_model`; by default all are assigned equal +weight of `1`. `max_cost` puts an optional maximum limit on the cost, which may +help the solver to avoid exploring unnecessarily complex solutions. +`known_fills` may contain previous solutions of the same system; these will be +made infeasible in the output constraint system in order to allow discovery of +other ones. + +Additional arguments are forwarded to `flux_balance_constraints` that converts +`model` to constraints. +""" +function gap_filling_constraints( + model::A.AbstractFBCModel, + universal_model::A.AbstractFBCModel, + objective_target::Float64; + universal_reaction_cost = _ -> 1.0, + max_cost = Inf, + known_fills::Vector{C.Tree{Float64}} = C.Tree{Float64}[], + kwargs..., +) + m = flux_balance_constraints(model; kwargs...) + m.objective.bound = C.Between(objective_target, Inf) + s = m.flux_stoichiometry + delete!(m, :flux_stoichiometry) + u = flux_balance_constraints(universal_model) + gap_filling_constraints(; + system = m, + stoichiometry = s, + universal_fluxes = u.fluxes, + universal_stoichiometry = u.flux_stoichiometry, + flux_cost = i -> universal_reaction_cost(String(i)), + max_cost, + known_fills, + ) +end + +""" +$(TYPEDSIGNATURES) + +Make a gap-fillign system from a non-solving `system` with a separated +`stoichiometry`, filling in possible fluxes from `universal_fluxes` that are +balanced with `universal_stoichiometry` + +`flux_cost` can be used to assign a weight to each given universal flux; the +total cost is bounded by `max_cost`. + +`known_fills` may contain previous solutions to be avoided; these are processed +by [`gap_filling_known_fill_constraint`](@ref) and attached to the system. + +`stoichiometry` needs to be extended to construct the final constraints, thus +it should not be present in the original `system`. +""" +function gap_filling_constraints(; + system::C.ConstraintTree, + stoichiometry::C.ConstraintTree, + universal_fluxes::C.ConstraintTree, + universal_stoichiometry::C.ConstraintTree, + flux_cost = _ -> 1.0, + max_cost = Inf, + known_fills::Vector{C.Tree{Float64}} = C.Tree{Float64}[], +) + joined = + C.ConstraintTree(:system => system, :stoichiometry => stoichiometry) + + :universal^C.ConstraintTree( + :fluxes => universal_fluxes, + :stoichiometry => universal_stoichiometry, + ) + + :fill_flags^C.variables_for(universal_fluxes) do _ + Switch(0, 1) + end + + return C.ConstraintTree( + :system => joined.system, + :universal_fluxes => joined.universal.fluxes, + :universal_flux_bounds => C.zip(joined.universal.fluxes, joined.fill_flags) do x, b + if x.bound isa C.Between + C.ConstraintTree( + :lower => C.Constraint( + x.value - x.bound.lower * b.value, + C.Between(0, Inf), + ), + :upper => C.Constraint( + x.value - x.bound.upper * b.value, + C.Between(-Inf, 0), + ), + ) + elseif x.bound isa C.EqualTo + C.Constraint(x.value - x.bound.equal_to * b.value, 0) + elseif isnothing(x.bound) + C.ConstraintTree() + else + throw(DomainError(x.bound, "unsupported flux bound")) + end + end, + :stoichiometry => + C.merge(joined.stoichiometry, joined.universal.stoichiometry) do a, b + ismissing(a) && return b + ismissing(b) && return a + @assert a.bound isa C.EqualTo && + a.bound.equal_to == 0.0 && + b.bound isa C.EqualTo && + b.bound.equal_to == 0.0 "Stoichiometries in both systems must only contain equal-to-zero-bounded constraints" + C.Constraint(a.value + b.value, 0.0) + end, + :fill_flags => joined.fill_flags, + ( + Symbol(:known_fills_, i) => + gap_filling_known_fill_constraint(joined.fill_flags, kf) for + (i, kf) in enumerate(known_fills) + )..., + :n_filled => C.Constraint( + C.sum( + (flux_cost(k) * v.value for (k, v) in joined.fill_flags), + init = zero(C.LinearValue), + ), + C.Between(0, max_cost), + ), + ) +end + +export gap_filling_constraints + +""" +$(TYPEDSIGNATURES) + +Produce a constraint that can be added to the system made by +[`gap_filling_constraints`](@ref) to avoid repeating of a solution that +includes reactions already generated by another solution, as represented by the +solved `fill_flags`. + +Parameter `fill_flags` are the gapfilling flags of the given constraint system, +parameter `known_flags` is expected to contain the solved `fill_flags` for the +solution that is to be avoided. +""" +gap_filling_known_fill_constraint( + fill_flags::C.ConstraintTree, + known_flags::C.Tree{Float64}, +) = C.Constraint( + C.sum( + values(C.zip(fill_flags, known_flags, C.Value) do f, k + k - f.value * k + end), + init = zero(C.LinearValue), + ), + (1 - eps(), Inf), +) + +export gap_filling_known_fill_constraint + +""" +$(TYPEDSIGNATURES) + +Run the gap-filling analysis on a constraint system specified by +[`gap_filling_constraints`](@ref). +""" +gap_filling_analysis(args...; kwargs...) = frontend_optimized_values( + gap_filling_constraints, + args...; + objective = x -> x.n_filled.value, + sense = Minimal, + kwargs..., +) + +export gap_filling_analysis diff --git a/test/misc.jl b/test/misc.jl index 64aeffd4..5994f46c 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -104,3 +104,23 @@ end @test c.interface_balance.x.value.weights == [1.0, 2.0, 1.0, 3.0, -1.0] end + +@testset "Uncommon bounds in gapfilling" begin + vs = C.variables(keys = [:a, :b], bounds = [C.EqualTo(123), nothing]) + stoi = C.variables(keys = [:c, :d], bounds = C.EqualTo(0)) + x = gap_filling_constraints( + system = vs, + stoichiometry = stoi, + universal_fluxes = vs, + universal_stoichiometry = stoi, + ) + @test x.universal_flux_bounds.a.bound isa C.EqualTo + @test isempty(x.universal_flux_bounds.b) + vs = C.variables(keys = [:a, :b], bounds = [C.EqualTo(123), Switch(1, 2)]) + @test_throws DomainError gap_filling_constraints( + system = vs, + stoichiometry = stoi, + universal_fluxes = vs, + universal_stoichiometry = stoi, + ) +end