jump-dev · odow · May 30, 2022 · Apr 26, 2022 · Apr 26, 2022 · May 16, 2022
diff --git a/docs/src/submodules/Nonlinear/reference.md b/docs/src/submodules/Nonlinear/reference.md
@@ -60,6 +60,7 @@ Nonlinear.eval_univariate_gradient
 Nonlinear.eval_univariate_hessian
 Nonlinear.eval_multivariate_function
 Nonlinear.eval_multivariate_gradient
+Nonlinear.eval_multivariate_hessian
 Nonlinear.eval_logic_function
 Nonlinear.eval_comparison_function
 ```

diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl
@@ -126,6 +126,7 @@ function _hessian_slice_inner(d, ex, input_ϵ, output_ϵ, ::Type{T}) where {T}
     for i in ex.dependent_subexpressions
         subexpr = d.subexpressions[i]
         subexpr_forward_values_ϵ[i] = _forward_eval_ϵ(
+            d,
             subexpr,
             _reinterpret_unsafe(T, subexpr.forward_storage_ϵ),
             _reinterpret_unsafe(T, subexpr.partials_storage_ϵ),
@@ -135,6 +136,7 @@ function _hessian_slice_inner(d, ex, input_ϵ, output_ϵ, ::Type{T}) where {T}
         )
     end
     _forward_eval_ϵ(
+        d,
         ex,
         _reinterpret_unsafe(T, d.forward_storage_ϵ),
         _reinterpret_unsafe(T, d.partials_storage_ϵ),
@@ -178,6 +180,7 @@ end
 
 """
     _forward_eval_ϵ(
+        d::NLPEvaluator,
         ex::Union{_FunctionStorage,_SubexpressionStorage},
         storage_ϵ::AbstractVector{ForwardDiff.Partials{N,T}},
         partials_storage_ϵ::AbstractVector{ForwardDiff.Partials{N,T}},
@@ -195,6 +198,7 @@ components separate so that we don't need to recompute the real components.
 This assumes that `_reverse_model(d, x)` has already been called.
 """
 function _forward_eval_ϵ(
+    d::NLPEvaluator,
     ex::Union{_FunctionStorage,_SubexpressionStorage},
     storage_ϵ::AbstractVector{ForwardDiff.Partials{N,T}},
     partials_storage_ϵ::AbstractVector{ForwardDiff.Partials{N,T}},
@@ -328,10 +332,38 @@ function _forward_eval_ϵ(
                         recip_denominator,
                     )
                 elseif op > 6
-                    error(
-                        "User-defined operators not supported for hessian " *
-                        "computations",
+                    f_input = _UnsafeVectorView(d.jac_storage, n_children)
+                    for (i, c) in enumerate(children_idx)
+                        f_input[i] = ex.forward_storage[children_arr[c]]
+                    end
+                    H = _UnsafeLowerTriangularMatrixView(
+                        d.user_output_buffer,
+                        n_children,
+                    )
+                    has_hessian = Nonlinear.eval_multivariate_hessian(
+                        user_operators,
+                        user_operators.multivariate_operators[node.index],
+                        H,
+                        f_input,
                     )
+                    # This might be `false` if we extend this code to all
+                    # multivariate functions.
+                    @assert has_hessian
+                    for col in 1:n_children
+                        dual = zero(ForwardDiff.Partials{N,T})
+                        for row in 1:n_children
+                            # Make sure we get the lower-triangular component.
+                            h = row >= col ? H[row, col] : H[col, row]
+                            # Performance optimization: hessians can be quite
+                            # sparse
+                            if !iszero(h)
+                                i = children_arr[children_idx[row]]
+                                dual += h * storage_ϵ[i]
+                            end
+                        end
+                        i = children_arr[children_idx[col]]
+                        partials_storage_ϵ[i] = dual
+                    end
                 end
             elseif node.type == Nonlinear.NODE_CALL_UNIVARIATE
                 @inbounds child_idx = children_arr[ex.adj.colptr[k]]

diff --git a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl
@@ -5,10 +5,12 @@
 # in the LICENSE.md file or at https://opensource.org/licenses/MIT.
 
 function MOI.features_available(d::NLPEvaluator)
-    # Check if we have any user-defined multivariate operators, in which case we
-    # need to disable hessians. The result of features_available depends on this.
-    d.disable_2ndorder =
-        length(d.data.operators.registered_multivariate_operators) > 0
+    # Check if we are missing any hessians for user-defined multivariate
+    # operators, in which case we need to disable :Hess and :HessVec.
+    d.disable_2ndorder = any(
+        op -> op.∇²f === nothing,
+        d.data.operators.registered_multivariate_operators,
+    )
     if d.disable_2ndorder
         return [:Grad, :Jac, :JacVec]
     end
@@ -286,6 +288,7 @@ function MOI.eval_hessian_lagrangian_product(d::NLPEvaluator, h, x, v, σ, μ)
     for i in d.subexpression_order
         subexpr = d.subexpressions[i]
         subexpr_forward_values_ϵ[i] = _forward_eval_ϵ(
+            d,
             subexpr,
             reinterpret(T, subexpr.forward_storage_ϵ),
             reinterpret(T, subexpr.partials_storage_ϵ),
@@ -302,6 +305,7 @@ function MOI.eval_hessian_lagrangian_product(d::NLPEvaluator, h, x, v, σ, μ)
     fill!(output_ϵ, zero(T))
     if d.objective !== nothing
         _forward_eval_ϵ(
+            d,
             d.objective,
             reinterpret(T, d.forward_storage_ϵ),
             reinterpret(T, d.partials_storage_ϵ),
@@ -322,6 +326,7 @@ function MOI.eval_hessian_lagrangian_product(d::NLPEvaluator, h, x, v, σ, μ)
     end
     for (i, con) in enumerate(d.constraints)
         _forward_eval_ϵ(
+            d,
             con,
             reinterpret(T, d.forward_storage_ϵ),
             reinterpret(T, d.partials_storage_ϵ),

diff --git a/src/Nonlinear/ReverseAD/utils.jl b/src/Nonlinear/ReverseAD/utils.jl
@@ -53,13 +53,105 @@ Base.length(v::_UnsafeVectorView) = v.len
 
 Base.size(v::_UnsafeVectorView) = (v.len,)
 
+"""
+    _UnsafeVectorView(x::Vector, N::Int)
+
+Create a new [`_UnsafeVectorView`](@ref) from `x`, and resize `x` if needed to
+ensure it has a length of at least `N`.
+
+## Unsafe behavior
+
+In addition to the usafe behavior of `_UnsafeVectorView`, this constructor is
+additionally unsafe because it may resize `x`. Only call it if you are sure that
+the usage of `_UnsafeVectorView(x, N)` is short-lived, and that there are no
+other views to `x` while the returned value is within scope.
+"""
 function _UnsafeVectorView(x::Vector, N::Int)
     if length(x) < N
         resize!(x, N)
     end
     return _UnsafeVectorView(0, N, pointer(x))
 end
 
+"""
+    _UnsafeLowerTriangularMatrixView(x, N)
+
+Lightweight unsafe view that converts a vector `x` into the lower-triangular
+component of a symmetric `N`-by-`N` matrix.
+
+## Motivation
+
+`_UnsafeLowerTriangularMatrixView` is needed as an allocation-free equivalent of
+`view`. Other alternatives, like `reshape(view(x, 1:N^2), N, N)` or a struct
+like
+```julia
+struct _SafeView{T}
+    x::Vector{T}
+    len::Int
+end
+```
+will allocate so that `x` can be tracked by Julia's GC.
+`_UnsafeLowerTriangularMatrixView` relies on the fact that the use-cases of
+`_UnsafeLowerTriangularMatrixView` only temporarily wrap a long-lived vector
+like `d.jac_storage` so that we don't have to worry about the GC removing
+`d.jac_storage` while `_UnsafeLowerTriangularMatrixView` exists. This lets us
+use a `Ptr{T}` and create a struct that is `isbitstype` and therefore does not
+allocate.
+
+## Unsafe behavior
+
+`_UnsafeLowerTriangularMatrixView` is unsafe because it assumes that the vector
+`x` remains valid during the usage of `_UnsafeLowerTriangularMatrixView`.
+"""
+struct _UnsafeLowerTriangularMatrixView <: AbstractMatrix{Float64}
+    N::Int
+    ptr::Ptr{Float64}
+end
+
+Base.size(x::_UnsafeLowerTriangularMatrixView) = (x.N, x.N)
+
+function _linear_index(row, col)
+    if row < col
+        error("Unable to access upper-triangular component: ($row, $col)")
+    end
+    return div((row - 1) * row, 2) + col
+end
+
+function Base.getindex(x::_UnsafeLowerTriangularMatrixView, i, j)
+    return unsafe_load(x.ptr, _linear_index(i, j))
+end
+
+function Base.setindex!(x::_UnsafeLowerTriangularMatrixView, value, i, j)
+    unsafe_store!(x.ptr, value, _linear_index(i, j))
+    return value
+end
+
+"""
+    _UnsafeLowerTriangularMatrixView(x::Vector{Float64}, N::Int)
+
+Create a new [`_UnsafeLowerTriangularMatrixView`](@ref) from `x`, zero the
+elements in `x`, and resize `x` if needed to ensure it has a length of at least
+`N * (N + 1) / 2`.
+
+## Unsafe behavior
+
+In addition to the usafe behavior of `_UnsafeLowerTriangularMatrixView`, this
+constructor is additionally unsafe because it may resize `x`. Only call it if
+you are sure that the usage of `_UnsafeLowerTriangularMatrixView(x, N)` is
+short-lived, and that there are no other views to `x` while the returned value
+is within scope.
+"""
+function _UnsafeLowerTriangularMatrixView(x::Vector{Float64}, N::Int)
+    z = div(N * (N + 1), 2)
+    if length(x) < z
+        resize!(x, z)
+    end
+    for i in 1:z
+        x[i] = 0.0
+    end
+    return _UnsafeLowerTriangularMatrixView(N, pointer(x))
+end
+
 function _reinterpret_unsafe(::Type{T}, x::Vector{R}) where {T,R}
     # how many T's fit into x?
     @assert isbitstype(T) && isbitstype(R)

diff --git a/src/Nonlinear/model.jl b/src/Nonlinear/model.jl
@@ -203,8 +203,15 @@ Register the user-defined operator `op` with `nargs` input arguments in `model`.
 * `∇f(g::AbstractVector{T}, x::T...)::T` is a function that takes a cache vector
   `g` of length `length(x)`, and fills each element `g[i]` with the partial
   derivative of `f` with respect to `x[i]`.
+* `∇²f(H::AbstractMatrix, x::T...)::T` is a function that takes a matrix `H` and
+  fills the lower-triangular components `H[i, j]` with the Hessian of `f` with
+  respect to `x[i]` and `x[j]` for `i >= j`.
 
-Hessian are not supported for multivariate functions.
+### Notes for multivariate Hessians
+
+ * `H` has `size(H) == (length(x), length(x))`, but you must not access
+   elements `H[i, j]` for `i > j`.
+ * `H` is dense, but you do not need to fill structural zeros.
 """
 function register_operator(model::Model, op::Symbol, nargs::Int, f::Function...)
     return register_operator(model.operators, op, nargs, f...)

diff --git a/src/Nonlinear/operators.jl b/src/Nonlinear/operators.jl
@@ -631,7 +631,24 @@ end
 
 _nan_to_zero(x) = isnan(x) ? 0.0 : x
 
-# No docstring because this function is still a WIP.
+"""
+    eval_multivariate_hessian(
+        registry::OperatorRegistry,
+        op::Symbol,
+        H::AbstractMatrix,
+        x::AbstractVector{T},
+    ) where {T}
+
+Evaluate the Hessian of operator `∇²op(x)`, where `op` is a multivariate
+function in `registry`.
+
+The Hessian is stored in the lower-triangular part of the matrix `H`.
+
+!!! note
+    Implementations of the Hessian operators will not fill structural zeros.
+    Therefore, before calling this function you should pre-populate the matrix
+    `H` with `0`.
+"""
 function eval_multivariate_hessian(
     registry::OperatorRegistry,
     op::Symbol,

diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl
@@ -53,6 +53,34 @@ function test_objective_quadratic_univariate()
     return
 end
 
+function test_objective_and_constraints_quadratic_univariate()
+    x = MOI.VariableIndex(1)
+    model = Nonlinear.Model()
+    Nonlinear.set_objective(model, :($x^2 + 1))
+    Nonlinear.add_constraint(model, :($x^2), MOI.LessThan(2.0))
+    evaluator = Nonlinear.Evaluator(model, Nonlinear.SparseReverseMode(), [x])
+    MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
+    @test MOI.eval_objective(evaluator, [1.2]) == 1.2^2 + 1
+    g = [NaN]
+    MOI.eval_objective_gradient(evaluator, g, [1.2])
+    @test g == [2.4]
+    @test MOI.hessian_lagrangian_structure(evaluator) == [(1, 1), (1, 1)]
+    H = [NaN, NaN]
+    MOI.eval_hessian_lagrangian(evaluator, H, [1.2], 1.5, Float64[1.3])
+    @test H == [1.5, 1.3] .* [2.0, 2.0]
+    Hp = [NaN]
+    MOI.eval_hessian_lagrangian_product(
+        evaluator,
+        Hp,
+        [1.2],
+        [1.2],
+        1.5,
+        Float64[1.3],
+    )
+    @test Hp == [1.5 * 2.0 * 1.2 + 1.3 * 2.0 * 1.2]
+    return
+end
+
 function test_objective_quadratic_multivariate()
     x = MOI.VariableIndex(1)
     y = MOI.VariableIndex(2)
@@ -287,14 +315,13 @@ function test_hessian_sparsity_registered_function()
     Nonlinear.set_objective(model, :(f($x, $z) + $y^2))
     evaluator =
         Nonlinear.Evaluator(model, Nonlinear.SparseReverseMode(), [x, y, z])
-    @test_broken :Hess in MOI.features_available(evaluator)
-    # TODO(odow): re-enable these tests when user-defined hessians are supported
-    # MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
-    # @test MOI.hessian_lagrangian_structure(evaluator) ==
-    #       [(1, 1), (2, 2), (3, 3), (3, 1)]
-    # H = fill(NaN, 4)
-    # MOI.eval_hessian_lagrangian(evaluator, H, rand(3), 1.5, Float64[])
-    # @test H == 1.5 .* [2.0, 2.0, 2.0, 0.0]
+    @test :Hess in MOI.features_available(evaluator)
+    MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
+    @test MOI.hessian_lagrangian_structure(evaluator) ==
+          [(1, 1), (2, 2), (3, 3), (3, 1)]
+    H = fill(NaN, 4)
+    MOI.eval_hessian_lagrangian(evaluator, H, rand(3), 1.5, Float64[])
+    @test H == 1.5 .* [2.0, 2.0, 2.0, 0.0]
     return
 end
 
@@ -308,6 +335,7 @@ function test_hessian_sparsity_registered_rosenbrock()
         return
     end
     function ∇²f(H, x...)
+        @assert size(H) == (2, 2)
         H[1, 1] = 1200 * x[1]^2 - 400 * x[2] + 2
         H[2, 1] = -400 * x[1]
         H[2, 2] = 200.0
@@ -318,13 +346,43 @@ function test_hessian_sparsity_registered_rosenbrock()
     Nonlinear.set_objective(model, :(rosenbrock($x, $y)))
     evaluator =
         Nonlinear.Evaluator(model, Nonlinear.SparseReverseMode(), [x, y])
-    @test_broken :Hess in MOI.features_available(evaluator)
-    # TODO(odow): re-enable these tests when user-defined hessians are supported
-    # MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
-    # @test MOI.hessian_lagrangian_structure(evaluator) == [(1, 1), (2, 2), (2, 1)]
-    # H = fill(NaN, 3)
-    # MOI.eval_hessian_lagrangian(evaluator, H, [1.0, 1.0], 1.5, Float64[])
-    # @test H == 1.5 .* [802, 200, -400]
+    @test :Hess in MOI.features_available(evaluator)
+    MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
+    @test MOI.hessian_lagrangian_structure(evaluator) ==
+          [(1, 1), (2, 2), (2, 1)]
+    H = fill(NaN, 3)
+    MOI.eval_hessian_lagrangian(evaluator, H, [1.0, 1.0], 1.5, Float64[])
+    @test H == 1.5 .* [802.0, 200.0, -400.0]
+    return
+end
+
+function test_hessian_registered_error()
+    x = MOI.VariableIndex(1)
+    y = MOI.VariableIndex(2)
+    f(x...) = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
+    function ∇f(g, x...)
+        g[1] = 400 * x[1]^3 - 400 * x[1] * x[2] + 2 * x[1] - 2
+        g[2] = 200 * (x[2] - x[1]^2)
+        return
+    end
+    function ∇²f(H, x...)
+        H[1, 1] = 1200 * x[1]^2 - 400 * x[2] + 2
+        # Wrong index! Should be [2, 1]
+        H[1, 2] = -400 * x[1]
+        H[2, 2] = 200.0
+        return
+    end
+    model = Nonlinear.Model()
+    Nonlinear.register_operator(model, :rosenbrock, 2, f, ∇f, ∇²f)
+    Nonlinear.set_objective(model, :(rosenbrock($x, $y)))
+    evaluator =
+        Nonlinear.Evaluator(model, Nonlinear.SparseReverseMode(), [x, y])
+    MOI.initialize(evaluator, [:Grad, :Jac, :Hess])
+    H = fill(NaN, 3)
+    @test_throws(
+        ErrorException("Unable to access upper-triangular component: (1, 2)"),
+        MOI.eval_hessian_lagrangian(evaluator, H, [1.0, 1.0], 1.5, Float64[]),
+    )
     return
 end