Add preliminary NN support (#9)

lanl-ansi · Jun 6, 2024 · 35b07c4 · 35b07c4
1 parent 8f879d3
commit 35b07c4
Show file tree

Hide file tree

Showing 9 changed files with 458 additions and 11 deletions.
diff --git a/Project.toml b/Project.toml
@@ -10,13 +10,16 @@ MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 
 [weakdeps]
 GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 
 [extensions]
 OmeletteGLMExt = "GLM"
+OmeletteLuxExt = "Lux"
 
 [compat]
 Distributions = "0.25"
 GLM = "1.9"
+Lux = "0.5"
 JuMP = "1"
 MathOptInterface = "1"
 julia = "1.9"
diff --git a/ext/OmeletteLuxExt.jl b/ext/OmeletteLuxExt.jl
@@ -0,0 +1,31 @@
+# Copyright (c) 2024: Oscar Dowson and contributors
+#
+# Use of this source code is governed by an MIT-style license that can be found
+# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
+
+module OmeletteLuxExt
+
+import Omelette
+import Lux
+
+function _add_predictor(predictor::Omelette.Pipeline, layer::Lux.Dense, p)
+    push!(predictor.layers, Omelette.LinearRegression(p.weight, vec(p.bias)))
+    if layer.activation === identity
+        # Do nothing
+    elseif layer.activation === Lux.NNlib.relu
+        push!(predictor.layers, Omelette.ReLUBigM(1e4))
+    else
+        error("Unsupported activation function: $x")
+    end
+    return
+end
+
+function Omelette.Pipeline(x::Lux.Experimental.TrainState)
+    predictor = Omelette.Pipeline(Omelette.AbstractPredictor[])
+    for (layer, parameter) in zip(x.model.layers, x.parameters)
+        _add_predictor(predictor, layer, parameter)
+    end
+    return predictor
+end
+
+end #module
diff --git a/src/models/LinearRegression.jl b/src/models/LinearRegression.jl
@@ -4,13 +4,16 @@
 # in the LICENSE.md file or at https://opensource.org/licenses/MIT.
 
 """
-    LinearRegression(parameters::Matrix)
+    LinearRegression(
+        A::Matrix{Float64},
+        b::Vector{Float64} = zeros(size(A, 1)),
+    )
 
 Represents the linear relationship:
 ```math
-f(x) = A x
+f(x) = A x + b
 ```
-where \$A\$ is the \$m \\times n\$ matrix `parameters`.
+where \$A\$ is the \$m \\times n\$ matrix `A`.
 
 ## Example
 
@@ -22,7 +25,7 @@ julia> model = Model();
 julia> @variable(model, x[1:2]);
 
 julia> f = Omelette.LinearRegression([2.0, 3.0])
-Omelette.LinearRegression([2.0 3.0])
+Omelette.LinearRegression([2.0 3.0], [0.0])
 
 julia> y = Omelette.add_predictor(model, f, x)
 1-element Vector{VariableRef}:
@@ -35,20 +38,25 @@ julia> print(model)
 ```
 """
 struct LinearRegression <: AbstractPredictor
-    parameters::Matrix{Float64}
+    A::Matrix{Float64}
+    b::Vector{Float64}
 end
 
-function LinearRegression(parameters::Vector{Float64})
-    return LinearRegression(reshape(parameters, 1, length(parameters)))
+function LinearRegression(A::Matrix{Float64})
+    return LinearRegression(A, zeros(size(A, 1)))
+end
+
+function LinearRegression(A::Vector{Float64})
+    return LinearRegression(reshape(A, 1, length(A)), [0.0])
 end
 
 function add_predictor(
     model::JuMP.Model,
     predictor::LinearRegression,
     x::Vector{JuMP.VariableRef},
 )
-    m = size(predictor.parameters, 1)
+    m = size(predictor.A, 1)
     y = JuMP.@variable(model, [1:m], base_name = "omelette_y")
-    JuMP.@constraint(model, predictor.parameters * x .== y)
+    JuMP.@constraint(model, predictor.A * x .+ predictor.b .== y)
     return y
 end
diff --git a/src/models/LogisticRegression.jl b/src/models/LogisticRegression.jl
@@ -42,8 +42,6 @@ function LogisticRegression(parameters::Vector{Float64})
     return LogisticRegression(reshape(parameters, 1, length(parameters)))
 end
 
-Base.size(f::LogisticRegression) = size(f.parameters)
-
 function add_predictor(
     model::JuMP.Model,
     predictor::LogisticRegression,

diff --git a/src/models/Pipeline.jl b/src/models/Pipeline.jl
@@ -0,0 +1,58 @@
+# Copyright (c) 2024: Oscar Dowson and contributors
+#
+# Use of this source code is governed by an MIT-style license that can be found
+# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
+
+"""
+    Pipeline(layers::Vector{AbstractPredictor})
+
+A pipeline of nested layers
+```math
+f(x) = l_N(\\ldots(l_2(l_1(x))
+```
+
+## Example
+
+```jldoctest
+julia> using JuMP, Omelette
+
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = Omelette.Pipeline(
+           Omelette.LinearRegression([1.0 2.0], [0.0]),
+           Omelette.ReLUQuadratic(),
+       )
+Omelette.Pipeline(Omelette.AbstractPredictor[Omelette.LinearRegression([1.0 2.0], [0.0]), Omelette.ReLUQuadratic()])
+
+julia> y = Omelette.add_predictor(model, f, x)
+1-element Vector{VariableRef}:
+ omelette_y[1]
+
+julia> print(model)
+Feasibility
+Subject to
+ x[1] + 2 x[2] - omelette_y[1] = 0
+ omelette_y[1] - omelette_y[1] + _z[1] = 0
+ omelette_y[1]*_z[1] = 0
+ omelette_y[1] ≥ 0
+ _z[1] ≥ 0
+```
+"""
+struct Pipeline <: AbstractPredictor
+    layers::Vector{AbstractPredictor}
+end
+
+Pipeline(args::AbstractPredictor...) = Pipeline(collect(args))
+
+function add_predictor(
+    model::JuMP.Model,
+    predictor::Pipeline,
+    x::Vector{JuMP.VariableRef},
+)
+    for layer in predictor.layers
+        x = add_predictor(model, layer, x)
+    end
+    return x
+end
diff --git a/src/models/ReLU.jl b/src/models/ReLU.jl
@@ -0,0 +1,173 @@
+# Copyright (c) 2024: Oscar Dowson and contributors
+#
+# Use of this source code is governed by an MIT-style license that can be found
+# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
+
+"""
+    ReLUBigM(M::Float64)
+
+Represents the rectified linear unit relationship:
+```math
+f(x) = max.(0, x)
+```
+
+## Example
+
+```jldoctest
+julia> using JuMP, Omelette
+
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = Omelette.ReLUBigM(100.0)
+Omelette.ReLUBigM(100.0)
+
+julia> y = Omelette.add_predictor(model, f, x)
+
+julia> print(model)
+Feasibility
+Subject to
+ -x[1] + omelette_y[1] ≥ 0
+ -x[2] + omelette_y[2] ≥ 0
+ omelette_y[1] - 100 _[5] ≤ 0
+ omelette_y[2] - 100 _[6] ≤ 0
+ -x[1] + omelette_y[1] + 100 _[5] ≤ 100
+ -x[2] + omelette_y[2] + 100 _[6] ≤ 100
+ omelette_y[1] ≥ 0
+ omelette_y[2] ≥ 0
+ _[5] binary
+ _[6] binary
+```
+"""
+struct ReLUBigM <: AbstractPredictor
+    M::Float64
+end
+
+function add_predictor(
+    model::JuMP.Model,
+    predictor::ReLUBigM,
+    x::Vector{JuMP.VariableRef},
+)
+    m = length(x)
+    y = JuMP.@variable(model, [1:m], lower_bound = 0, base_name = "omelette_y")
+    z = JuMP.@variable(model, [1:m], Bin)
+    JuMP.@constraint(model, y .>= x)
+    JuMP.@constraint(model, y .<= predictor.M * z)
+    JuMP.@constraint(model, y .<= x .+ predictor.M * (1 .- z))
+    return y
+end
+
+"""
+    ReLUSOS1()
+
+Implements the ReLU constraint \$y = max(0, x)\$ by the reformulation:
+```math
+\\begin{aligned}
+x = y - z \\\\
+[y, z] \\in SOS1 \\\\
+y, z \\ge 0
+\\end{aligned}
+```
+
+## Example
+
+```jldoctest
+julia> using JuMP, Omelette
+
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = Omelette.ReLUSOS1()
+Omelette.ReLUSOS1()
+
+julia> y = Omelette.add_predictor(model, f, x)
+2-element Vector{VariableRef}:
+ omelette_y[1]
+ omelette_y[2]
+
+julia> print(model)
+Feasibility
+Subject to
+ x[1] - omelette_y[1] + _z[1] = 0
+ x[2] - omelette_y[2] + _z[2] = 0
+ [omelette_y[1], _z[1]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])
+ [omelette_y[2], _z[2]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])
+ omelette_y[1] ≥ 0
+ omelette_y[2] ≥ 0
+ _z[1] ≥ 0
+ _z[2] ≥ 0
+```
+"""
+struct ReLUSOS1 <: AbstractPredictor end
+
+function add_predictor(
+    model::JuMP.Model,
+    predictor::ReLUSOS1,
+    x::Vector{JuMP.VariableRef},
+)
+    m = length(x)
+    y = JuMP.@variable(model, [1:m], lower_bound = 0, base_name = "omelette_y")
+    z = JuMP.@variable(model, [1:m], lower_bound = 0, base_name = "_z")
+    JuMP.@constraint(model, x .== y - z)
+    for i in 1:m
+        JuMP.@constraint(model, [y[i], z[i]] in MOI.SOS1([1.0, 2.0]))
+    end
+    return y
+end
+
+"""
+    ReLUQuadratic()
+
+Implements the ReLU constraint \$y = max(0, x)\$ by the reformulation:
+```math
+\\begin{aligned}
+x = y - z \\\\
+y \\times z = 0 \\\\
+y, z \\ge 0
+\\end{aligned}
+```
+
+## Example
+
+```jldoctest
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = Omelette.ReLUQuadratic()
+Omelette.ReLUQuadratic()
+
+julia> y = Omelette.add_predictor(model, f, x)
+2-element Vector{VariableRef}:
+ omelette_y[1]
+ omelette_y[2]
+
+julia> print(model)
+Feasibility
+Subject to
+ x[1] - omelette_y[1] + _z[1] = 0
+ x[2] - omelette_y[2] + _z[2] = 0
+ omelette_y[1]*_z[1] = 0
+ omelette_y[2]*_z[2] = 0
+ omelette_y[1] ≥ 0
+ omelette_y[2] ≥ 0
+ _z[1] ≥ 0
+ _z[2] ≥ 0
+```
+"""
+struct ReLUQuadratic <: AbstractPredictor end
+
+function add_predictor(
+    model::JuMP.Model,
+    predictor::ReLUQuadratic,
+    x::Vector{JuMP.VariableRef},
+)
+    m = length(x)
+    y = JuMP.@variable(model, [1:m], lower_bound = 0, base_name = "omelette_y")
+    z = JuMP.@variable(model, [1:m], lower_bound = 0, base_name = "_z")
+    JuMP.@constraint(model, x .== y - z)
+    JuMP.@constraint(model, y .* z .== 0)
+    return y
+end
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,15 +1,29 @@
 [deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 Omelette = "e52c2cb8-508e-4e12-9dd2-9c4755b60e73"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
+ADTypes = "1"
 GLM = "1"
 HiGHS = "1"
 Ipopt = "1"
 JuMP = "1"
+Lux = "0.5"
+Optimisers = "0.3"
+Printf = "<0.0.1, 1.10"
+Random = "<0.0.1, 1.10"
+Statistics = "<0.0.1, 1.10"
 Test = "<0.0.1, 1.6"
+Zygote = "0.6"
 julia = "1.9"