diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1fbced77d..374ada27d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -11,8 +11,8 @@ jobs: strategy: matrix: version: - - "min" - - "lts" + # - "min" + # - "lts" - "1" os: - ubuntu-latest diff --git a/.github/workflows/Docs.yml b/.github/workflows/Docs.yml index ebf64bd72..943c995eb 100644 --- a/.github/workflows/Docs.yml +++ b/.github/workflows/Docs.yml @@ -19,7 +19,7 @@ jobs: with: version: '1' - name: Install dependencies - run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + run: julia --project=docs/ -e 'using Pkg; Pkg.instantiate()' - name: Build and deploy env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token diff --git a/Project.toml b/Project.toml index cf261c942..db9d65c47 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,10 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +[sources] +LineSearches = { url = "https://github.com/devmotion/LineSearches.jl.git", rev = "dmw/jvp" } +NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" } + [extensions] OptimMOIExt = "MathOptInterface" @@ -30,11 +34,11 @@ ExplicitImports = "1.13.2" FillArrays = "0.6.2, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1" ForwardDiff = "0.10, 1" JET = "0.9, 0.10" -LineSearches = "7.5.1" +LineSearches = "7.6" LinearAlgebra = "<0.0.1, 1.6" MathOptInterface = "1.17" Measurements = "2.14.1" -NLSolversBase = "7.9.0" +NLSolversBase = "8" NaNMath = "0.3.2, 1" OptimTestProblems = "2.0.3" PositiveFactorizations = "0.2.2" diff --git a/docs/Project.toml b/docs/Project.toml index c8ea546db..2cfc7556c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -12,5 +12,6 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Documenter = "1" Literate = "2" -[sources.Optim] -path = ".." +[sources] +Optim = { path = ".." } +NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" } diff --git a/docs/src/examples/ipnewton_basics.jl b/docs/src/examples/ipnewton_basics.jl index a8564957b..66f4f79ab 100644 --- a/docs/src/examples/ipnewton_basics.jl +++ b/docs/src/examples/ipnewton_basics.jl @@ -22,6 +22,7 @@ # constraint is unbounded from below or above respectively. using Optim, NLSolversBase #hide +import ADTypes #hide import NLSolversBase: clear! #hide # # Constrained optimization with `IPNewton` diff --git a/ext/OptimMOIExt.jl b/ext/OptimMOIExt.jl index 8f9147ef8..5c0c10fad 100644 --- a/ext/OptimMOIExt.jl +++ b/ext/OptimMOIExt.jl @@ -1,7 +1,7 @@ module OptimMOIExt using Optim -using Optim.LinearAlgebra: rmul! +using Optim.LinearAlgebra: rmul! import MathOptInterface as MOI function __init__() diff --git a/src/Manifolds.jl b/src/Manifolds.jl index b016dd629..5ff53aa2f 100644 --- a/src/Manifolds.jl +++ b/src/Manifolds.jl @@ -20,9 +20,8 @@ project_tangent(M::Manifold, x) = project_tangent!(M, similar(x), x) retract(M::Manifold, x) = retract!(M, copy(x)) # Fake objective function implementing a retraction -mutable struct ManifoldObjective{T<:NLSolversBase.AbstractObjective} <: - NLSolversBase.AbstractObjective - manifold::Manifold +struct ManifoldObjective{M<:Manifold,T<:AbstractObjective} <: AbstractObjective + manifold::M inner_obj::T end # TODO: is it safe here to call retract! and change x? @@ -43,6 +42,20 @@ function NLSolversBase.value_gradient!(obj::ManifoldObjective, x) return f_x, g_x end +# In general, we have to compute the gradient/Jacobian separately as it has to be projected +function NLSolversBase.jvp!(obj::ManifoldObjective, x, v) + xin = retract(obj.manifold, x) + g_x = gradient!(obj.inner_obj, xin) + project_tangent!(obj.manifold, g_x, xin) + return dot(g_x, v) +end +function NLSolversBase.value_jvp!(obj::ManifoldObjective, x, v) + xin = retract(obj.manifold, x) + f_x, g_x = value_gradient!(obj.inner_obj, xin) + project_tangent!(obj.manifold, g_x, xin) + return f_x, dot(g_x, v) +end + """Flat Euclidean space {R,C}^N, with projections equal to the identity.""" struct Flat <: Manifold end # all the functions below are no-ops, and therefore the generated code @@ -53,6 +66,10 @@ retract!(M::Flat, x) = x project_tangent(M::Flat, g, x) = g project_tangent!(M::Flat, g, x) = g +# Optimizations for `Flat` manifold +NLSolversBase.jvp!(obj::ManifoldObjective{Flat}, x, v) = NLSolversBase.jvp!(obj.inner_obj, x, v) +NLSolversBase.value_jvp!(obj::ManifoldObjective{Flat}, x, v) = NLSolversBase.value_jvp!(obj.inner_obj, x, v) + """Spherical manifold {|x| = 1}.""" struct Sphere <: Manifold end retract!(S::Sphere, x) = (x ./= norm(x)) diff --git a/src/Optim.jl b/src/Optim.jl index d77385456..0974751b5 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -16,6 +16,8 @@ documentation online at http://julianlsolvers.github.io/Optim.jl/stable/ . """ module Optim +import ADTypes + using PositiveFactorizations: Positive # for globalization strategy in Newton using LineSearches: LineSearches # for globalization strategy in Quasi-Newton algs @@ -35,7 +37,6 @@ using NLSolversBase: NonDifferentiable, OnceDifferentiable, TwiceDifferentiable, - TwiceDifferentiableHV, AbstractConstraints, ConstraintBounds, TwiceDifferentiableConstraints, diff --git a/src/api.jl b/src/api.jl index 467488ff4..71a1a3d01 100644 --- a/src/api.jl +++ b/src/api.jl @@ -104,7 +104,7 @@ f_calls(d::AbstractObjective) = NLSolversBase.f_calls(d) g_calls(r::OptimizationResults) = error("g_calls is not implemented for $(summary(r)).") g_calls(r::MultivariateOptimizationResults) = r.g_calls -g_calls(d::AbstractObjective) = NLSolversBase.g_calls(d) +g_calls(d::AbstractObjective) = NLSolversBase.g_calls(d) + NLSolversBase.jvp_calls(d) h_calls(r::OptimizationResults) = error("h_calls is not implemented for $(summary(r)).") h_calls(r::MultivariateOptimizationResults) = r.h_calls diff --git a/src/multivariate/optimize/interface.jl b/src/multivariate/optimize/interface.jl index 4642dd0e0..eaf91a4df 100644 --- a/src/multivariate/optimize/interface.jl +++ b/src/multivariate/optimize/interface.jl @@ -66,20 +66,6 @@ promote_objtype( inplace::Bool, f::InplaceObjective, ) = TwiceDifferentiable(f, x, real(zero(eltype(x)))) -promote_objtype( - method::SecondOrderOptimizer, - x, - autodiff::ADTypes.AbstractADType, - inplace::Bool, - f::NLSolversBase.InPlaceObjectiveFGHv, -) = TwiceDifferentiableHV(f, x) -promote_objtype( - method::SecondOrderOptimizer, - x, - autodiff::ADTypes.AbstractADType, - inplace::Bool, - f::NLSolversBase.InPlaceObjectiveFG_Hv, -) = TwiceDifferentiableHV(f, x) promote_objtype(method::SecondOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f, g) = TwiceDifferentiable( f, diff --git a/src/multivariate/optimize/optimize.jl b/src/multivariate/optimize/optimize.jl index 6a3ce198f..a37fa6fc1 100644 --- a/src/multivariate/optimize/optimize.jl +++ b/src/multivariate/optimize/optimize.jl @@ -1,25 +1,23 @@ # Update function value, gradient and Hessian function update_fgh!(d, state, ::ZerothOrderOptimizer) - f_x = value!(d, state.x) + f_x = NLSolversBase.value!(d, state.x) state.f_x = f_x return nothing end function update_fgh!(d, state, method::FirstOrderOptimizer) - f_x, g_x = value_gradient!(d, state.x) + f_x, g_x = NLSolversBase.value_gradient!(d, state.x) + copyto!(state.g_x, g_x) if hasproperty(method, :manifold) - project_tangent!(method.manifold, g_x, state.x) + project_tangent!(method.manifold, state.g_x, state.x) end state.f_x = f_x - copyto!(state.g_x, g_x) return nothing end function update_fgh!(d, state, method::SecondOrderOptimizer) # Manifold optimization is currently not supported for second order optimization algorithms @assert !hasproperty(method, :manifold) - # TODO: Switch to `value_gradient_hessian!` when it becomes available - f_x, g_x = value_gradient!(d, state.x) - H_x = hessian!(d, state.x) + f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, state.x) state.f_x = f_x copyto!(state.g_x, g_x) copyto!(state.H_x, H_x) diff --git a/src/multivariate/solvers/constrained/fminbox.jl b/src/multivariate/solvers/constrained/fminbox.jl index d4a11128d..92d365705 100644 --- a/src/multivariate/solvers/constrained/fminbox.jl +++ b/src/multivariate/solvers/constrained/fminbox.jl @@ -13,6 +13,7 @@ end NLSolversBase.f_calls(obj::BarrierWrapper) = NLSolversBase.f_calls(obj.obj) NLSolversBase.g_calls(obj::BarrierWrapper) = NLSolversBase.g_calls(obj.obj) +NLSolversBase.jvp_calls(obj::BarrierWrapper) = NLSolversBase.jvp_calls(obj.obj) NLSolversBase.h_calls(obj::BarrierWrapper) = NLSolversBase.h_calls(obj.obj) NLSolversBase.hv_calls(obj::BarrierWrapper) = NLSolversBase.hv_calls(obj.obj) @@ -40,9 +41,9 @@ struct BoxBarrier{L,U} upper::U end function in_box(bb::BoxBarrier, x) - all(x -> x[1] >= x[2] && x[1] <= x[3], zip(x, bb.lower, bb.upper)) + all(x -> x[1] <= x[2] <= x[3], zip(bb.lower, x, bb.upper)) end -in_box(bw::BarrierWrapper, x) = in_box(bw.b, x) + # evaluates the value and gradient components comming from the log barrier function _barrier_term_value(x::T, l, u) where {T} dxl = x - l @@ -71,11 +72,15 @@ function _barrier_term_gradient(x::T, l, u) where {T} return g end +function _barrier_jvp(bb::BoxBarrier, x, v) + return sum(Broadcast.instantiate(Broadcast.broadcasted((xi, li, ui, vi) -> dot(_barrier_term_gradient(xi, li, ui), vi), x, bb.lower, bb.upper, v))) +end + # Wrappers function NLSolversBase.value_gradient!(bb::BarrierWrapper, x) bb.DFb .= _barrier_term_gradient.(x, bb.b.lower, bb.b.upper) bb.Fb = _barrier_value(bb.b, x) - if in_box(bb, x) + if in_box(bb.b, x) F, DF = value_gradient!(bb.obj, x) bb.DFtotal .= muladd.(bb.mu, bb.DFb, DF) bb.Ftotal = muladd(bb.mu, bb.Fb, F) @@ -87,7 +92,7 @@ function NLSolversBase.value_gradient!(bb::BarrierWrapper, x) end function NLSolversBase.value!(obj::BarrierWrapper, x) obj.Fb = _barrier_value(obj.b, x) - if in_box(obj, x) + if in_box(obj.b, x) F = value!(obj.obj, x) obj.Ftotal = muladd(obj.mu, obj.Fb, F) else @@ -97,7 +102,7 @@ function NLSolversBase.value!(obj::BarrierWrapper, x) end function NLSolversBase.value(obj::BarrierWrapper, x) Fb = _barrier_value(obj.b, x) - if in_box(obj, x) + if in_box(obj.b, x) return muladd(obj.mu, Fb, value(obj.obj, x)) else return obj.mu * Fb @@ -105,7 +110,7 @@ function NLSolversBase.value(obj::BarrierWrapper, x) end function NLSolversBase.gradient!(obj::BarrierWrapper, x) obj.DFb .= _barrier_term_gradient.(x, obj.b.lower, obj.b.upper) - if in_box(obj, x) + if in_box(obj.b, x) DF = gradient!(obj.obj, x) obj.DFtotal .= muladd.(obj.mu, obj.DFb, DF) else @@ -114,6 +119,29 @@ function NLSolversBase.gradient!(obj::BarrierWrapper, x) return obj.DFtotal end +function NLSolversBase.jvp!(obj::BarrierWrapper, x, v) + JVPb = _barrier_jvp(obj.b, x, v) + if in_box(obj.b, x) + JVP = NLSolversBase.jvp!(obj.obj, x, v) + return muladd(obj.mu, JVPb, JVP) + else + return obj.mu * JVPb + end +end +function NLSolversBase.value_jvp!(obj::BarrierWrapper, x, v) + JVPb = _barrier_jvp(obj.b, x, v) + obj.Fb = _barrier_value(obj.b, x) + if in_box(obj.b, x) + F, JVP = NLSolversBase.value_jvp!(obj.obj, x, v) + JVPtotal = muladd(obj.mu, JVPb, JVP) + obj.Ftotal = muladd(obj.mu, obj.Fb, F) + else + JVPtotal = obj.mu * JVPb + obj.Ftotal = obj.mu * obj.Fb + end + return obj.Ftotal, JVPtotal +end + function limits_box( x::AbstractArray{T}, d::AbstractArray{T}, @@ -274,7 +302,7 @@ function optimize( initial_x::AbstractArray, F::Fminbox = Fminbox(), options::Options = Options(); - inplace = true, + inplace::Bool = true, ) g! = inplace ? g : (G, x) -> copyto!(G, g(x)) @@ -499,12 +527,13 @@ function optimize( results = optimize(dfbox, x, _optimizer, options, state) stopped_by_callback = results.stopped_by.callback - dfbox.obj.f_calls[1] = 0 + # TODO: Reset everything? Add upstream API for resetting call counters? + dfbox.obj.f_calls = 0 if hasfield(typeof(dfbox.obj), :df_calls) - dfbox.obj.df_calls[1] = 0 + dfbox.obj.df_calls = 0 end if hasfield(typeof(dfbox.obj), :h_calls) - dfbox.obj.h_calls[1] = 0 + dfbox.obj.h_calls = 0 end # Compute function value and gradient (without barrier term) @@ -579,12 +608,13 @@ function optimize( resultsnew = optimize(dfbox, x, _optimizer, options, state) stopped_by_callback = resultsnew.stopped_by.callback append!(results, resultsnew) - dfbox.obj.f_calls[1] = 0 + # TODO: Reset everything? Add upstream API for resetting call counters? + dfbox.obj.f_calls = 0 if hasfield(typeof(dfbox.obj), :df_calls) - dfbox.obj.df_calls[1] = 0 + dfbox.obj.df_calls = 0 end if hasfield(typeof(dfbox.obj), :h_calls) - dfbox.obj.h_calls[1] = 0 + dfbox.obj.h_calls = 0 end # Compute function value and gradient (without barrier term) diff --git a/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl b/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl index 189f6a8c9..b07bba7b3 100644 --- a/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl +++ b/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl @@ -107,8 +107,7 @@ function initial_state( # Allocate fields for the objective function n = length(initial_x) - # TODO: Switch to `value_gradient_hessian!` - f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x) + f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x) # More constraints constr_J = fill!(Matrix{T}(undef, mc, n), NaN) @@ -157,8 +156,7 @@ end function update_fgh!(d, constraints::TwiceDifferentiableConstraints, state, method::IPNewton) # Compute objective function, gradient and Hessian matrix - # TODO: Switch to `value_gradient_hessian!!` - f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, state.x) + f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, state.x) copyto!(state.g_x, g_x) copyto!(state.H_x, H_x) state.f_x = f_x diff --git a/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl b/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl index 7e1ca1d53..6c5796cf6 100644 --- a/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl +++ b/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl @@ -43,12 +43,13 @@ function initial_state( ) initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) + g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) AcceleratedGradientDescentState( initial_x, # Maintain current state in state.x - copy(g_x), # Maintain current gradient in state.g_x + g_x, # Maintain current gradient in state.g_x f_x, # Maintain current f in state.f_x fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous oftype(f_x, NaN), # Store previous f in state.f_x_previous diff --git a/src/multivariate/solvers/first_order/adam.jl b/src/multivariate/solvers/first_order/adam.jl index 6eb608210..9886e5920 100644 --- a/src/multivariate/solvers/first_order/adam.jl +++ b/src/multivariate/solvers/first_order/adam.jl @@ -62,7 +62,7 @@ function reset!(method::Adam, state::AdamState, obj, x) # Update function value and gradient copyto!(state.x, x) retract!(method.manifold, state.x) - f_x, g_x = value_gradient!(obj, state.x) + f_x, g_x = NLSolversBase.value_gradient!(obj, state.x) copyto!(state.g_x, g_x) project_tangent!(method.manifold, state.g_x, state.x) state.f_x = f_x @@ -88,7 +88,7 @@ function initial_state(method::Adam, ::Options, d, initial_x::AbstractArray) # Compute function value and gradient initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) diff --git a/src/multivariate/solvers/first_order/adamax.jl b/src/multivariate/solvers/first_order/adamax.jl index 678a683d4..9f16cf0b5 100644 --- a/src/multivariate/solvers/first_order/adamax.jl +++ b/src/multivariate/solvers/first_order/adamax.jl @@ -61,7 +61,7 @@ function reset!(method::AdaMax, state::AdaMaxState, obj, x) # Update function value and gradient copyto!(state.x, x) retract!(method.manifold, state.x) - f_x, g_x = value_gradient!(obj, state.x) + f_x, g_x = NLSolversBase.value_gradient!(obj, state.x) copyto!(state.g_x, g_x) project_tangent!(method.manifold, state.g_x, state.x) state.f_x = f_x @@ -87,7 +87,7 @@ function initial_state(method::AdaMax, options::Options, d, initial_x::AbstractA # Compute function value and gradient initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) diff --git a/src/multivariate/solvers/first_order/bfgs.jl b/src/multivariate/solvers/first_order/bfgs.jl index 9e6041b51..4e83b0ea9 100644 --- a/src/multivariate/solvers/first_order/bfgs.jl +++ b/src/multivariate/solvers/first_order/bfgs.jl @@ -72,11 +72,12 @@ end function reset!(method, state::BFGSState, obj, x) # Update function value and gradient - retract!(method.manifold, x) - f_x, g_x = value_gradient!(obj, x) - project_tangent!(method.manifold, g_x, x) - state.f_x = f_x + copyto!(state.x, x) + retract!(method.manifold, state.x) + f_x, g_x = NLSolversBase.value_gradient!(obj, state.x) copyto!(state.g_x, g_x) + project_tangent!(method.manifold, state.g_x, state.x) + state.f_x = f_x # Delete history fill!(state.x_previous, NaN) @@ -104,7 +105,8 @@ function initial_state(method::BFGS, ::Options, d, initial_x::AbstractArray) # Compute function value and gradient initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) + g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) # Initialize approximation of inverse Hessian @@ -125,7 +127,7 @@ function initial_state(method::BFGS, ::Options, d, initial_x::AbstractArray) # Trace the history of states visited BFGSState( initial_x, # Maintain current state in state.x - copy(g_x), # Maintain current gradient in state.g_x + g_x, # Maintain current gradient in state.g_x f_x, # Maintain current f in state.f_x fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous @@ -164,7 +166,7 @@ function update_fgh!(d, state::BFGSState, method::BFGS) (; invH, dx, dg, u) = state # Update function value and gradient - f_x, g_x = value_gradient!(d, state.x) + f_x, g_x = NLSolversBase.value_gradient!(d, state.x) copyto!(state.g_x, g_x) project_tangent!(method.manifold, state.g_x, state.x) state.f_x = f_x diff --git a/src/multivariate/solvers/first_order/cg.jl b/src/multivariate/solvers/first_order/cg.jl index 99fd9d724..f3bfeb2d3 100644 --- a/src/multivariate/solvers/first_order/cg.jl +++ b/src/multivariate/solvers/first_order/cg.jl @@ -108,18 +108,18 @@ end function reset!(cg::ConjugateGradient, cgs::ConjugateGradientState, obj, x) copyto!(cgs.x, x) retract!(cg.manifold, cgs.x) - f_x, g_x = value_gradient!(obj, cgs.x) - project_tangent!(cg.manifold, g_x, cgs.x) - cgs.f_x = f_x + f_x, g_x = NLSolversBase.value_gradient!(obj, cgs.x) copyto!(cgs.g_x, g_x) + project_tangent!(cg.manifold, cgs.g_x, cgs.x) + cgs.f_x = f_x fill!(cgs.x_previous, NaN) cgs.f_x_previous = oftype(cgs.f_x_previous, NaN) fill!(cgs.g_x_previous, NaN) - _precondition!(cgs.pg, cg, x, g_x) + _precondition!(cgs.pg, cg, cgs.x, cgs.g_x) if cg.P !== nothing - project_tangent!(cg.manifold, cgs.pg, x) + project_tangent!(cg.manifold, cgs.pg, cgs.x) end cgs.s .= .-cgs.pg @@ -129,6 +129,7 @@ function initial_state(method::ConjugateGradient, ::Options, d, initial_x) initial_x = copy(initial_x) retract!(method.manifold, initial_x) f_x, g_x = value_gradient!(d, initial_x) + g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) # Could move this out? as a general check? @@ -152,7 +153,7 @@ function initial_state(method::ConjugateGradient, ::Options, d, initial_x) ConjugateGradientState( initial_x, # Maintain current state in state.x - copy(g_x), # Maintain current gradient in state.g + g_x, # Maintain current gradient in state.g f_x, # Maintain current f in state.f fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous @@ -179,10 +180,10 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie retract!(method.manifold, state.x) # Update the function value and gradient - f_x, g_x = value_gradient!(d, state.x) - project_tangent!(method.manifold, g_x, state.x) - state.f_x = f_x + f_x, g_x = NLSolversBase.value_gradient!(d, state.x) copyto!(state.g_x, g_x) + project_tangent!(method.manifold, state.g_x, state.x) + state.f_x = f_x # Check sanity of function and gradient isfinite(f_x) || error(LazyString("Non-finite f(x) while optimizing (", f_x, ")")) @@ -200,7 +201,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie _apply_precondprep(method, state.x) dPd = _inverse_precondition(method, state) etak = method.eta * real(dot(state.s, state.g_x_previous)) / dPd # New in HZ2013 - state.y .= g_x .- state.g_x_previous + state.y .= state.g_x .- state.g_x_previous ydots = real(dot(state.y, state.s)) copyto!(state.py, state.pg) # below, store pg - pg_previous in py # P already updated in _apply_precondprep above @@ -211,7 +212,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie betak = ( real(dot(state.y, state.pg)) - - real(dot(state.y, state.py)) * real(dot(g_x, state.s)) / ydots + real(dot(state.y, state.py)) * real(dot(state.g_x, state.s)) / ydots ) / ydots # betak may be undefined if ydots is zero (may due to f not strongly convex or non-Wolfe linesearch) beta = NaNMath.max(betak, etak) # TODO: Set to zero if betak is NaN? diff --git a/src/multivariate/solvers/first_order/gradient_descent.jl b/src/multivariate/solvers/first_order/gradient_descent.jl index b999f2096..dec894129 100644 --- a/src/multivariate/solvers/first_order/gradient_descent.jl +++ b/src/multivariate/solvers/first_order/gradient_descent.jl @@ -53,9 +53,9 @@ function reset!(method::GradientDescent, state::GradientDescentState, obj, x) # Update function value and gradient copyto!(state.x, x) retract!(method.manifold, state.x) - f_x, g_x = value_gradient!(obj, state.x) + f_x, g_x = NLSolversBase.value_gradient!(obj, state.x) copyto!(state.g_x, g_x) - project_tangent!(method.manifold, g_x, state.x) + project_tangent!(method.manifold, state.g_x, state.x) state.f_x = f_x # Delete history @@ -74,7 +74,7 @@ function initial_state( # Compute function value and gradient initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) diff --git a/src/multivariate/solvers/first_order/l_bfgs.jl b/src/multivariate/solvers/first_order/l_bfgs.jl index 23aa175c7..65c0da81d 100644 --- a/src/multivariate/solvers/first_order/l_bfgs.jl +++ b/src/multivariate/solvers/first_order/l_bfgs.jl @@ -155,11 +155,12 @@ mutable struct LBFGSState{Tx,Tdx,Tdg,T,G} <: AbstractOptimizerState @add_linesearch_fields() end function reset!(method::LBFGS, state::LBFGSState, obj, x::AbstractArray) - retract!(method.manifold, x) - f_x, g_x = value_gradient!(obj, x) - project_tangent!(method.manifold, g_x, x) - state.f_x = f_x + copyto!(state.x, x) + retract!(method.manifold, state.x) + f_x, g_x = NLSolversBase.value_gradient!(obj, state.x) copyto!(state.g_x, g_x) + project_tangent!(method.manifold, state.g_x, state.x) + state.f_x = f_x fill!(state.x_previous, NaN) fill!(state.g_x_previous, NaN) @@ -173,11 +174,12 @@ function initial_state(method::LBFGS, options::Options, d, initial_x::AbstractAr T = real(eltype(initial_x)) initial_x = copy(initial_x) retract!(method.manifold, initial_x) - f_x, g_x = value_gradient!(d, initial_x) + f_x, g_x = NLSolversBase.value_gradient!(d, initial_x) + g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) LBFGSState( initial_x, # Maintain current state in state.x - copy(g_x), # Maintain current gradient in state.g_x + g_x, # Maintain current gradient in state.g_x f_x, # Main current f in state.f_x fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous @@ -200,9 +202,6 @@ function update_state!(d, state::LBFGSState, method::LBFGS) # Increment the number of steps we've had to perform state.pseudo_iteration += 1 - # TODO: Can this be removed? - project_tangent!(method.manifold, state.g_x, state.x) - # update the preconditioner _apply_precondprep(method, state.x) @@ -236,7 +235,7 @@ end function update_fgh!(d, state::LBFGSState, method::LBFGS) # Update function value and gradient - f_x, g_x = value_gradient!(d, state.x) + f_x, g_x = NLSolversBase.value_gradient!(d, state.x) copyto!(state.g_x, g_x) project_tangent!(method.manifold, state.g_x, state.x) state.f_x = f_x diff --git a/src/multivariate/solvers/first_order/momentum_gradient_descent.jl b/src/multivariate/solvers/first_order/momentum_gradient_descent.jl index 7b074e17c..8f7ab70e3 100644 --- a/src/multivariate/solvers/first_order/momentum_gradient_descent.jl +++ b/src/multivariate/solvers/first_order/momentum_gradient_descent.jl @@ -35,11 +35,12 @@ function initial_state(method::MomentumGradientDescent, ::Options, d, initial_x: initial_x = copy(initial_x) retract!(method.manifold, initial_x) f_x, g_x = value_gradient!(d, initial_x) + g_x = copy(g_x) project_tangent!(method.manifold, g_x, initial_x) MomentumGradientDescentState( initial_x, # Maintain current state in state.x - copy(g_x), # Maintain current gradient in state.g_x + g_x, # Maintain current gradient in state.g_x f_x, # Maintain current f in state.f_x copy(initial_x), # Maintain previous state in state.x_previous fill!(similar(initial_x), NaN), # Record momentum correction direction in state.x_momentum diff --git a/src/multivariate/solvers/first_order/ngmres.jl b/src/multivariate/solvers/first_order/ngmres.jl index 9c74b4695..595d23069 100644 --- a/src/multivariate/solvers/first_order/ngmres.jl +++ b/src/multivariate/solvers/first_order/ngmres.jl @@ -427,11 +427,10 @@ end function update_fgh!(d, state, method::AbstractNGMRES) # Update the function value and gradient - # TODO: do we need a retract! on state.x here? - f_x, g_x = value_gradient!(d, state.x) - project_tangent!(method.manifold, g_x, state.x) - state.f_x = f_x + f_x, g_x = NLSolversBase.value_gradient!(d, state.x) copyto!(state.g_x, g_x) + project_tangent!(method.manifold, state.g_x, state.x) + state.f_x = f_x if state.restart state.k = 0 diff --git a/src/multivariate/solvers/second_order/krylov_trust_region.jl b/src/multivariate/solvers/second_order/krylov_trust_region.jl index 30b1c164a..8e4ffea27 100644 --- a/src/multivariate/solvers/second_order/krylov_trust_region.jl +++ b/src/multivariate/solvers/second_order/krylov_trust_region.jl @@ -24,6 +24,7 @@ mutable struct KrylovTrustRegionState{T} <: AbstractOptimizerState g_x::Vector{T} x_previous::Vector{T} f_x_previous::T + x_cache::Vector{T} s::Vector{T} interior::Bool accept_step::Bool @@ -50,10 +51,11 @@ function initial_state(method::KrylovTrustRegion, options::Options, d, initial_x KrylovTrustRegionState( copy(initial_x), # Maintain current state in state.x f_x, # Maintain f of current state in state.f_x - g_x, # Maintain gradient of current state in state.g_x + copy(g_x), # Maintain gradient of current state in state.g_x copy(initial_x), # x_previous - zero(T), # f_x_previous - similar(initial_x), # Maintain current search direction in state.s + oftype(f_x, NaN), # f_x_previous + fill!(similar(initial_x), NaN), # In-place cache for `update_state!` + fill!(similar(initial_x), NaN), # Maintain current search direction in state.s true, # interior true, # accept step convert(T, method.initial_radius), @@ -104,7 +106,7 @@ end function cg_steihaug!( - objective::TwiceDifferentiableHV, + objective::TwiceDifferentiable, state::KrylovTrustRegionState{T}, method::KrylovTrustRegion, ) where {T} @@ -157,14 +159,16 @@ end function update_state!( - objective::TwiceDifferentiableHV, + objective::TwiceDifferentiable, state::KrylovTrustRegionState, method::KrylovTrustRegion, ) state.m_diff = cg_steihaug!(objective, state, method) @assert state.m_diff <= 0 - state.f_diff = value(objective, state.x .+ state.s) - state.f_x + state.x_cache .= state.x .+ state.s + f_x_cache = NLSolversBase.value!(objective, state.x_cache) + state.f_diff = f_x_cache - state.f_x state.rho = state.f_diff / state.m_diff state.interior = norm(state.s) < 0.9 * state.radius @@ -176,22 +180,22 @@ function update_state!( state.accept_step = state.rho > method.eta if state.accept_step - state.x .+= state.s + # Update history + copyto!(state.x_previous, state.x) + state.f_x_previous = state.f_x + + # Update state, function value and its gradient + copyto!(state.x, state.x_cache) + state.f_x = f_x_cache + g_x = gradient!(objective, state.x) + copyto!(state.g_x, g_x) end return false end - -function update_fgh!(objective, state::KrylovTrustRegionState, ::KrylovTrustRegion) - if state.accept_step - # Update the function value and gradient - state.f_x_previous = state.f_x - f_x, g_x = value_gradient!(objective, state.x) - state.f_x = f_x - copyto!(state.g_x, g_x) - end -end +# Function value and gradient are already updated in update_state! +update_fgh!(objective, state::KrylovTrustRegionState, ::KrylovTrustRegion) = nothing function assess_convergence(state::KrylovTrustRegionState, d, options::Options) if !state.accept_step diff --git a/src/multivariate/solvers/second_order/newton.jl b/src/multivariate/solvers/second_order/newton.jl index 20a4824ae..4184d5233 100644 --- a/src/multivariate/solvers/second_order/newton.jl +++ b/src/multivariate/solvers/second_order/newton.jl @@ -42,8 +42,7 @@ mutable struct NewtonState{Tx,Tg,TH,T,F<:Cholesky} <: AbstractOptimizerState end function initial_state(method::Newton, options, d, initial_x) - # TODO: Switch to `value_gradient_hessian!` - f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x) + f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x) NewtonState( copy(initial_x), # Maintain current state in state.x diff --git a/src/multivariate/solvers/second_order/newton_trust_region.jl b/src/multivariate/solvers/second_order/newton_trust_region.jl index 55e50259e..a6eb3b4ac 100644 --- a/src/multivariate/solvers/second_order/newton_trust_region.jl +++ b/src/multivariate/solvers/second_order/newton_trust_region.jl @@ -333,8 +333,7 @@ function initial_state(method::NewtonTrustRegion, options, d, initial_x) interior = true lambda = NaN - # TODO: Switch to `value_gradient_hessian!` - f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x) + f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x) NewtonTrustRegionState( copy(initial_x), # Maintain current state in state.x @@ -419,8 +418,7 @@ function update_state!(d, state::NewtonTrustRegionState, method::NewtonTrustRegi if method.use_fg copyto!(state.H_x, hessian!(d, state.x)) else - # TODO: Switch to `gradient_hessian!` - g_x, H_x = NLSolversBase.gradient_hessian!!(d, state.x) + g_x, H_x = NLSolversBase.gradient_hessian!(d, state.x) copyto!(state.g_x, g_x) copyto!(state.H_x, H_x) end diff --git a/src/utilities/perform_linesearch.jl b/src/utilities/perform_linesearch.jl index b944a21f8..4c04c48ba 100644 --- a/src/utilities/perform_linesearch.jl +++ b/src/utilities/perform_linesearch.jl @@ -47,27 +47,29 @@ function perform_linesearch!(state, method, d) # Guess an alpha method.alphaguess!(method.linesearch!, state, phi_0, dphi_0, d) - # Store current x and f(x) for next iteration - state.f_x_previous = phi_0 - copyto!(state.x_previous, state.x) - if hasproperty(state, :g_x_previous) - copyto!(state.g_x_previous, state.g_x) - end - # Perform line search; catch LineSearchException to allow graceful exit - try + lssuccess = try state.alpha, ϕalpha = method.linesearch!(d, state.x, state.s, state.alpha, state.x_ls, phi_0, dphi_0) - return true # lssuccess = true + true catch ex - if isa(ex, LineSearches.LineSearchException) + if ex isa LineSearches.LineSearchException state.alpha = ex.alpha # We shouldn't warn here, we should just carry it to the output # @warn("Linesearch failed, using alpha = $(state.alpha) and # exiting optimization.\nThe linesearch exited with message:\n$(ex.message)") - return false # lssuccess = false + false else - rethrow(ex) + rethrow() end end + + # Store current x and f(x) for next iteration + state.f_x_previous = phi_0 + copyto!(state.x_previous, state.x) + if hasproperty(state, :g_x_previous) + copyto!(state.g_x_previous, state.g_x) + end + + return lssuccess end diff --git a/test/general/api.jl b/test/general/api.jl index 7eec17492..0e11ed329 100644 --- a/test/general/api.jl +++ b/test/general/api.jl @@ -130,8 +130,8 @@ @test Optim.minimum(res) ≈ 1.2580194638225255 @test Optim.minimizer(res) ≈ [-0.116688, 0.0031153] rtol = 0.001 @test Optim.iterations(res) == 10 - @test Optim.f_calls(res) == 38 - @test Optim.g_calls(res) == 38 + @test Optim.f_calls(res) == 42 + @test Optim.g_calls(res) == 42 @test !Optim.converged(res) @test !Optim.x_converged(res) @test !Optim.f_converged(res) diff --git a/test/general/counter.jl b/test/general/counter.jl index 52fa2a152..60c4229aa 100644 --- a/test/general/counter.jl +++ b/test/general/counter.jl @@ -76,10 +76,12 @@ @test hcount == Optim.h_calls(res) end - # Need to define fg! and hv! for KrylovTrustRegion - fg!(out, x) = begin - g!(out, x) - f(x) + # define fg! and hv! for KrylovTrustRegion + fg!(F, G, x) = begin + if G !== nothing + g!(G, x) + end + return F === nothing ? nothing : f(x) end hv!(out, x, v) = begin n = length(x) @@ -92,7 +94,7 @@ fcounter(true) gcounter(true) hcounter(true) - df = Optim.TwiceDifferentiableHV(f, fg!, hv!, prob.initial_x) + df = Optim.TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg!, hv!), prob.initial_x) res = Optim.optimize(df, prob.initial_x, solver) @test fcount == Optim.f_calls(res) @test gcount == Optim.g_calls(res) diff --git a/test/general/objective_types.jl b/test/general/objective_types.jl index 0d4474666..9542326dc 100644 --- a/test/general/objective_types.jl +++ b/test/general/objective_types.jl @@ -1,8 +1,5 @@ @testset "objective types" begin @testset "autodiff" begin - # Should throw, as :wah is not a proper autodiff choice - @test_throws ErrorException OnceDifferentiable(x -> x, rand(10); autodiff = :wah) - for T in (OnceDifferentiable, TwiceDifferentiable) odad1 = T(x -> 5.0, rand(1); autodiff = AutoFiniteDiff(; fdtype = Val(:central))) odad2 = T(x -> 5.0, rand(1); autodiff = AutoForwardDiff()) diff --git a/test/multivariate/solvers/constrained/ipnewton/constraints.jl b/test/multivariate/solvers/constrained/ipnewton/constraints.jl index a440855dd..601cae686 100644 --- a/test/multivariate/solvers/constrained/ipnewton/constraints.jl +++ b/test/multivariate/solvers/constrained/ipnewton/constraints.jl @@ -71,7 +71,7 @@ # Compare hand-computed gradient against that from automatic differentiation function check_autodiff(d, bounds, x, cfun::Function, bstate, μ) c = cfun(x) - J = Optim.NLSolversBase.ForwardDiff.jacobian(cfun, x) + J = ForwardDiff.jacobian(cfun, x) p = Optim.pack_vec(x, bstate) ftot! = (storage, p) -> Optim.lagrangian_fgvec!( @@ -90,8 +90,8 @@ pgrad = similar(p) ftot!(pgrad, p) chunksize = min(8, length(p)) - TD = Optim.NLSolversBase.ForwardDiff.Dual{ - Optim.ForwardDiff.Tag{Nothing,Float64}, + TD = ForwardDiff.Dual{ + ForwardDiff.Tag{Nothing,Float64}, eltype(p), chunksize, } @@ -100,7 +100,7 @@ pcmp = similar(p) ftot = p -> Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ) #ForwardDiff.gradient!(pcmp, ftot, p, Optim.ForwardDiff.{chunksize}()) - Optim.NLSolversBase.ForwardDiff.gradient!(pcmp, ftot, p) + ForwardDiff.gradient!(pcmp, ftot, p) @test pcmp ≈ pgrad end # Basic setup (using two objectives, one equal to zero and the other a Gaussian) @@ -338,7 +338,7 @@ h[3, 3] += 2 * λ[2] * x[2] end c = cfun(x) - J = Optim.NLSolversBase.ForwardDiff.jacobian(cfun, x) + J = ForwardDiff.jacobian(cfun, x) Jtmp = similar(J) @test cJ!(Jtmp, x) ≈ J # just to check we did it right cbar = rand(length(c)) @@ -535,8 +535,8 @@ # TODO: How do we deal with the new Tags in ForwardDiff? # TD = ForwardDiff.Dual{chunksize, eltype(y)} - TD = Optim.NLSolversBase.ForwardDiff.Dual{ - Optim.NLSolversBase.ForwardDiff.Tag{Nothing,Float64}, + TD = ForwardDiff.Dual{ + ForwardDiff.Tag{Nothing,Float64}, eltype(p), chunksize, } @@ -554,7 +554,7 @@ #ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2) #ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4)) - Optim.NLSolversBase.ForwardDiff.gradient(ϕd, [0.0])#, ForwardDiff.hessian(ϕd2, [0.0]) + ForwardDiff.gradient(ϕd, [0.0])#, ForwardDiff.hessian(ϕd2, [0.0]) end F = 1000 d = TwiceDifferentiable( diff --git a/test/multivariate/solvers/constrained/ipnewton/interface.jl b/test/multivariate/solvers/constrained/ipnewton/interface.jl index 476d183a1..fa273b68d 100644 --- a/test/multivariate/solvers/constrained/ipnewton/interface.jl +++ b/test/multivariate/solvers/constrained/ipnewton/interface.jl @@ -26,7 +26,7 @@ end storage end function exponential_hessian!(storage, x) - Optim.NLSolversBase.ForwardDiff.hessian!(storage, exponential, x) + ForwardDiff.hessian!(storage, exponential, x) end function exponential_gradient(x) diff --git a/test/multivariate/solvers/second_order/krylov_trust_region.jl b/test/multivariate/solvers/second_order/krylov_trust_region.jl index 4b7922e1d..f9877e7d3 100644 --- a/test/multivariate/solvers/second_order/krylov_trust_region.jl +++ b/test/multivariate/solvers/second_order/krylov_trust_region.jl @@ -5,27 +5,27 @@ function f(x::Vector) (x[1] - 5.0)^4 end - - function fg!(g, x) - g[1] = 4.0 * (x[1] - 5.0)^3 - f(x) - end - - function fg2!(_f, g, x) - g === nothing || (g[1] = 4.0 * (x[1] - 5.0)^3) - _f === nothing || return f(x) + function fg!(_f, g, x) + if g !== nothing + g[1] = 4.0 * (x[1] - 5.0)^3 + end + return _f === nothing ? nothing : f(x) end - function hv!(Hv, x, v) Hv[1] = 12.0 * (x[1] - 5.0)^2 * v[1] end - - d = Optim.TwiceDifferentiableHV(f, fg!, hv!, [0.0]) - d2 = Optim.TwiceDifferentiableHV(NLSolversBase.only_fg_and_hv!(fg2!, hv!), [0.0]) - + d = TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg!, hv!), [0.0]) result = Optim.optimize(d, [0.0], Optim.KrylovTrustRegion()) @test norm(Optim.minimizer(result) - [5.0]) < 0.01 + + function fgh!(f, g, H, x) + if H !== nothing + H[1, 1] = 12.0 * (x[1] - 5.0)^2 + end + return fg!(f, g, x) + end + d2 = TwiceDifferentiable(NLSolversBase.only_fgh!(fgh!), [0.0]) result = Optim.optimize(d2, [0.0], Optim.KrylovTrustRegion()) @test norm(Optim.minimizer(result) - [5.0]) < 0.01 end @@ -37,16 +37,19 @@ 0.5 * (x[1]^2 + eta * x[2]^2) end - function fg2!(storage::Vector, x::Vector) - storage[:] = [x[1], eta * x[2]] - f2(x) + function fg2!(_f, _g, x::Vector) + if _g !== nothing + _g[1] = x[1] + _g[2] = eta * x[2] + end + return _f === nothing ? nothing : f2(x) end function hv2!(Hv::Vector, x::Vector, v::Vector) - Hv[:] = [1.0 0.0; 0.0 eta] * v + return mul!(Hv, Diagonal([1.0, eta]), v) end - d2 = Optim.TwiceDifferentiableHV(f2, fg2!, hv2!, Float64[127, 921]) + d2 = Optim.TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg2!, hv2!), Float64[127, 921]) result = Optim.optimize(d2, Float64[127, 921], Optim.KrylovTrustRegion()) @test Optim.g_converged(result) @@ -56,20 +59,21 @@ @testset "Stock test problems" begin for (name, prob) in MultivariateProblems.UnconstrainedProblems.examples if prob.istwicedifferentiable - hv!(storage::Vector, x::Vector, v::Vector) = begin - n = length(x) - H = Matrix{Float64}(undef, n, n) - MVP.hessian(prob)(H, x) - storage .= H * v + n = length(prob.initial_x) + hv! = let H = Matrix{Float64}(undef, n, n) + function (storage::Vector, x::Vector, v::Vector) + MVP.hessian(prob)(H, x) + return mul!(storage, H, v) + end end - fg!(g::Vector, x::Vector) = begin - MVP.gradient(prob)(g, x) - MVP.objective(prob)(x) + fg!(_f, _g, x::Vector) = begin + if _g !== nothing + MVP.gradient(prob)(_g, x) + end + return _f === nothing ? nothing : MVP.objective(prob)(x) end - ddf = Optim.TwiceDifferentiableHV( - MVP.objective(prob), - fg!, - hv!, + ddf = Optim.TwiceDifferentiable( + NLSolversBase.only_fg_and_hv!(fg!, hv!), prob.initial_x, ) result = Optim.optimize(ddf, prob.initial_x, Optim.KrylovTrustRegion()) diff --git a/test/univariate/dual.jl b/test/univariate/dual.jl index e74f02bc3..e10145082 100644 --- a/test/univariate/dual.jl +++ b/test/univariate/dual.jl @@ -1,5 +1,5 @@ @testset "Dual numbers" begin f(x, y) = x[1]^2 + y[1]^2 g(param) = Optim.minimum(optimize(x -> f(x, param), -1, 1)) - @test Optim.NLSolversBase.ForwardDiff.gradient(g, [1.0]) == [2.0] + @test ForwardDiff.gradient(g, [1.0]) == [2.0] end