diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 1fbced77d..374ada27d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -11,8 +11,8 @@ jobs:
     strategy:
       matrix:
         version:
-          - "min"
-          - "lts"
+          # - "min"
+          # - "lts"
           - "1"
         os:
           - ubuntu-latest
diff --git a/.github/workflows/Docs.yml b/.github/workflows/Docs.yml
index ebf64bd72..943c995eb 100644
--- a/.github/workflows/Docs.yml
+++ b/.github/workflows/Docs.yml
@@ -19,7 +19,7 @@ jobs:
         with:
           version: '1'
       - name: Install dependencies
-        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+        run: julia --project=docs/ -e 'using Pkg; Pkg.instantiate()'
       - name: Build and deploy
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
diff --git a/Project.toml b/Project.toml
index cf261c942..db9d65c47 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,6 +18,10 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 [weakdeps]
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 
+[sources]
+LineSearches = { url = "https://github.com/devmotion/LineSearches.jl.git", rev = "dmw/jvp" }
+NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" }
+
 [extensions]
 OptimMOIExt = "MathOptInterface"
 
@@ -30,11 +34,11 @@ ExplicitImports = "1.13.2"
 FillArrays = "0.6.2, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
 ForwardDiff = "0.10, 1"
 JET = "0.9, 0.10"
-LineSearches = "7.5.1"
+LineSearches = "7.6"
 LinearAlgebra = "<0.0.1, 1.6"
 MathOptInterface = "1.17"
 Measurements = "2.14.1"
-NLSolversBase = "7.9.0"
+NLSolversBase = "8"
 NaNMath = "0.3.2, 1"
 OptimTestProblems = "2.0.3"
 PositiveFactorizations = "0.2.2"
diff --git a/docs/Project.toml b/docs/Project.toml
index c8ea546db..2cfc7556c 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -12,5 +12,6 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Documenter = "1"
 Literate = "2"
 
-[sources.Optim]
-path = ".."
+[sources]
+Optim = { path = ".." }
+NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" }
diff --git a/docs/src/examples/ipnewton_basics.jl b/docs/src/examples/ipnewton_basics.jl
index a8564957b..66f4f79ab 100644
--- a/docs/src/examples/ipnewton_basics.jl
+++ b/docs/src/examples/ipnewton_basics.jl
@@ -22,6 +22,7 @@
 # constraint is unbounded from below or above respectively.
 
 using Optim, NLSolversBase #hide
+import ADTypes #hide
 import NLSolversBase: clear! #hide
 
 # # Constrained optimization with `IPNewton`
diff --git a/ext/OptimMOIExt.jl b/ext/OptimMOIExt.jl
index 8f9147ef8..5c0c10fad 100644
--- a/ext/OptimMOIExt.jl
+++ b/ext/OptimMOIExt.jl
@@ -1,7 +1,7 @@
 module OptimMOIExt
 
 using Optim
-using Optim.LinearAlgebra: rmul! 
+using Optim.LinearAlgebra: rmul!
 import MathOptInterface as MOI
 
 function __init__()
diff --git a/src/Manifolds.jl b/src/Manifolds.jl
index b016dd629..5ff53aa2f 100644
--- a/src/Manifolds.jl
+++ b/src/Manifolds.jl
@@ -20,9 +20,8 @@ project_tangent(M::Manifold, x) = project_tangent!(M, similar(x), x)
 retract(M::Manifold, x) = retract!(M, copy(x))
 
 # Fake objective function implementing a retraction
-mutable struct ManifoldObjective{T<:NLSolversBase.AbstractObjective} <:
-               NLSolversBase.AbstractObjective
-    manifold::Manifold
+struct ManifoldObjective{M<:Manifold,T<:AbstractObjective} <: AbstractObjective
+    manifold::M
     inner_obj::T
 end
 # TODO: is it safe here to call retract! and change x?
@@ -43,6 +42,20 @@ function NLSolversBase.value_gradient!(obj::ManifoldObjective, x)
     return f_x, g_x
 end
 
+# In general, we have to compute the gradient/Jacobian separately as it has to be projected
+function NLSolversBase.jvp!(obj::ManifoldObjective, x, v)
+    xin = retract(obj.manifold, x)
+    g_x = gradient!(obj.inner_obj, xin)
+    project_tangent!(obj.manifold, g_x, xin)
+    return dot(g_x, v)
+end
+function NLSolversBase.value_jvp!(obj::ManifoldObjective, x, v)
+    xin = retract(obj.manifold, x)
+    f_x, g_x = value_gradient!(obj.inner_obj, xin)
+    project_tangent!(obj.manifold, g_x, xin)
+    return f_x, dot(g_x, v)
+end
+
 """Flat Euclidean space {R,C}^N, with projections equal to the identity."""
 struct Flat <: Manifold end
 # all the functions below are no-ops, and therefore the generated code
@@ -53,6 +66,10 @@ retract!(M::Flat, x) = x
 project_tangent(M::Flat, g, x) = g
 project_tangent!(M::Flat, g, x) = g
 
+# Optimizations for `Flat` manifold
+NLSolversBase.jvp!(obj::ManifoldObjective{Flat}, x, v) = NLSolversBase.jvp!(obj.inner_obj, x, v)
+NLSolversBase.value_jvp!(obj::ManifoldObjective{Flat}, x, v) = NLSolversBase.value_jvp!(obj.inner_obj, x, v)
+
 """Spherical manifold {|x| = 1}."""
 struct Sphere <: Manifold end
 retract!(S::Sphere, x) = (x ./= norm(x))
diff --git a/src/Optim.jl b/src/Optim.jl
index d77385456..0974751b5 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -16,6 +16,8 @@ documentation online at http://julianlsolvers.github.io/Optim.jl/stable/ .
 """
 module Optim
 
+import ADTypes
+
 using PositiveFactorizations: Positive # for globalization strategy in Newton
 
 using LineSearches: LineSearches # for globalization strategy in Quasi-Newton algs
@@ -35,7 +37,6 @@ using NLSolversBase:
     NonDifferentiable,
     OnceDifferentiable,
     TwiceDifferentiable,
-    TwiceDifferentiableHV,
     AbstractConstraints,
     ConstraintBounds,
     TwiceDifferentiableConstraints,
diff --git a/src/api.jl b/src/api.jl
index 467488ff4..71a1a3d01 100644
--- a/src/api.jl
+++ b/src/api.jl
@@ -104,7 +104,7 @@ f_calls(d::AbstractObjective) = NLSolversBase.f_calls(d)
 
 g_calls(r::OptimizationResults) = error("g_calls is not implemented for $(summary(r)).")
 g_calls(r::MultivariateOptimizationResults) = r.g_calls
-g_calls(d::AbstractObjective) = NLSolversBase.g_calls(d)
+g_calls(d::AbstractObjective) = NLSolversBase.g_calls(d) + NLSolversBase.jvp_calls(d)
 
 h_calls(r::OptimizationResults) = error("h_calls is not implemented for $(summary(r)).")
 h_calls(r::MultivariateOptimizationResults) = r.h_calls
diff --git a/src/multivariate/optimize/interface.jl b/src/multivariate/optimize/interface.jl
index 4642dd0e0..eaf91a4df 100644
--- a/src/multivariate/optimize/interface.jl
+++ b/src/multivariate/optimize/interface.jl
@@ -66,20 +66,6 @@ promote_objtype(
     inplace::Bool,
     f::InplaceObjective,
 ) = TwiceDifferentiable(f, x, real(zero(eltype(x))))
-promote_objtype(
-    method::SecondOrderOptimizer,
-    x,
-    autodiff::ADTypes.AbstractADType,
-    inplace::Bool,
-    f::NLSolversBase.InPlaceObjectiveFGHv,
-) = TwiceDifferentiableHV(f, x)
-promote_objtype(
-    method::SecondOrderOptimizer,
-    x,
-    autodiff::ADTypes.AbstractADType,
-    inplace::Bool,
-    f::NLSolversBase.InPlaceObjectiveFG_Hv,
-) = TwiceDifferentiableHV(f, x)
 promote_objtype(method::SecondOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f, g) =
     TwiceDifferentiable(
         f,
diff --git a/src/multivariate/optimize/optimize.jl b/src/multivariate/optimize/optimize.jl
index 6a3ce198f..a37fa6fc1 100644
--- a/src/multivariate/optimize/optimize.jl
+++ b/src/multivariate/optimize/optimize.jl
@@ -1,25 +1,23 @@
 # Update function value, gradient and Hessian
 function update_fgh!(d, state, ::ZerothOrderOptimizer)
-    f_x = value!(d, state.x)
+    f_x = NLSolversBase.value!(d, state.x)
     state.f_x = f_x
     return nothing
 end
 function update_fgh!(d, state, method::FirstOrderOptimizer)
-    f_x, g_x = value_gradient!(d, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
+    copyto!(state.g_x, g_x)
     if hasproperty(method, :manifold)
-        project_tangent!(method.manifold, g_x, state.x)
+        project_tangent!(method.manifold, state.g_x, state.x)
     end
     state.f_x = f_x
-    copyto!(state.g_x, g_x)
     return nothing
 end
 function update_fgh!(d, state, method::SecondOrderOptimizer)
     # Manifold optimization is currently not supported for second order optimization algorithms
     @assert !hasproperty(method, :manifold)
 
-    # TODO: Switch to `value_gradient_hessian!` when it becomes available
-    f_x, g_x = value_gradient!(d, state.x)
-    H_x = hessian!(d, state.x)
+    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, state.x)
     state.f_x = f_x
     copyto!(state.g_x, g_x)
     copyto!(state.H_x, H_x)
diff --git a/src/multivariate/solvers/constrained/fminbox.jl b/src/multivariate/solvers/constrained/fminbox.jl
index d4a11128d..92d365705 100644
--- a/src/multivariate/solvers/constrained/fminbox.jl
+++ b/src/multivariate/solvers/constrained/fminbox.jl
@@ -13,6 +13,7 @@ end
 
 NLSolversBase.f_calls(obj::BarrierWrapper) = NLSolversBase.f_calls(obj.obj)
 NLSolversBase.g_calls(obj::BarrierWrapper) = NLSolversBase.g_calls(obj.obj)
+NLSolversBase.jvp_calls(obj::BarrierWrapper) = NLSolversBase.jvp_calls(obj.obj)
 NLSolversBase.h_calls(obj::BarrierWrapper) = NLSolversBase.h_calls(obj.obj)
 NLSolversBase.hv_calls(obj::BarrierWrapper) = NLSolversBase.hv_calls(obj.obj)
 
@@ -40,9 +41,9 @@ struct BoxBarrier{L,U}
     upper::U
 end
 function in_box(bb::BoxBarrier, x)
-    all(x -> x[1] >= x[2] && x[1] <= x[3], zip(x, bb.lower, bb.upper))
+    all(x -> x[1] <= x[2] <= x[3], zip(bb.lower, x, bb.upper))
 end
-in_box(bw::BarrierWrapper, x) = in_box(bw.b, x)
+
 # evaluates the value and gradient components comming from the log barrier
 function _barrier_term_value(x::T, l, u) where {T}
     dxl = x - l
@@ -71,11 +72,15 @@ function _barrier_term_gradient(x::T, l, u) where {T}
     return g
 end
 
+function _barrier_jvp(bb::BoxBarrier, x, v)
+    return sum(Broadcast.instantiate(Broadcast.broadcasted((xi, li, ui, vi) -> dot(_barrier_term_gradient(xi, li, ui), vi), x, bb.lower, bb.upper, v)))
+end
+
 # Wrappers
 function NLSolversBase.value_gradient!(bb::BarrierWrapper, x)
     bb.DFb .= _barrier_term_gradient.(x, bb.b.lower, bb.b.upper)
     bb.Fb = _barrier_value(bb.b, x)
-    if in_box(bb, x)
+    if in_box(bb.b, x)
         F, DF = value_gradient!(bb.obj, x)
         bb.DFtotal .= muladd.(bb.mu, bb.DFb, DF)
         bb.Ftotal = muladd(bb.mu, bb.Fb, F)
@@ -87,7 +92,7 @@ function NLSolversBase.value_gradient!(bb::BarrierWrapper, x)
 end
 function NLSolversBase.value!(obj::BarrierWrapper, x)
     obj.Fb = _barrier_value(obj.b, x)
-    if in_box(obj, x)
+    if in_box(obj.b, x)
         F = value!(obj.obj, x)
         obj.Ftotal = muladd(obj.mu, obj.Fb, F)
     else
@@ -97,7 +102,7 @@ function NLSolversBase.value!(obj::BarrierWrapper, x)
 end
 function NLSolversBase.value(obj::BarrierWrapper, x)
     Fb = _barrier_value(obj.b, x)
-    if in_box(obj, x)
+    if in_box(obj.b, x)
         return muladd(obj.mu, Fb, value(obj.obj, x))
     else
         return obj.mu * Fb
@@ -105,7 +110,7 @@ function NLSolversBase.value(obj::BarrierWrapper, x)
 end
 function NLSolversBase.gradient!(obj::BarrierWrapper, x)
     obj.DFb .= _barrier_term_gradient.(x, obj.b.lower, obj.b.upper)
-    if in_box(obj, x)
+    if in_box(obj.b, x)
         DF = gradient!(obj.obj, x)
         obj.DFtotal .= muladd.(obj.mu, obj.DFb, DF)
     else
@@ -114,6 +119,29 @@ function NLSolversBase.gradient!(obj::BarrierWrapper, x)
     return obj.DFtotal
 end
 
+function NLSolversBase.jvp!(obj::BarrierWrapper, x, v)
+    JVPb = _barrier_jvp(obj.b, x, v)
+    if in_box(obj.b, x)
+        JVP = NLSolversBase.jvp!(obj.obj, x, v)
+        return muladd(obj.mu, JVPb, JVP)
+    else
+        return obj.mu * JVPb
+    end
+end
+function NLSolversBase.value_jvp!(obj::BarrierWrapper, x, v)
+    JVPb = _barrier_jvp(obj.b, x, v)
+    obj.Fb = _barrier_value(obj.b, x)
+    if in_box(obj.b, x)
+        F, JVP = NLSolversBase.value_jvp!(obj.obj, x, v)
+        JVPtotal = muladd(obj.mu, JVPb, JVP)
+        obj.Ftotal = muladd(obj.mu, obj.Fb, F)
+    else
+        JVPtotal = obj.mu * JVPb
+        obj.Ftotal = obj.mu * obj.Fb
+    end
+    return obj.Ftotal, JVPtotal
+end
+
 function limits_box(
     x::AbstractArray{T},
     d::AbstractArray{T},
@@ -274,7 +302,7 @@ function optimize(
     initial_x::AbstractArray,
     F::Fminbox = Fminbox(),
     options::Options = Options();
-    inplace = true,
+    inplace::Bool = true,
 )
 
     g! = inplace ? g : (G, x) -> copyto!(G, g(x))
@@ -499,12 +527,13 @@ function optimize(
 
     results = optimize(dfbox, x, _optimizer, options, state)
     stopped_by_callback = results.stopped_by.callback
-    dfbox.obj.f_calls[1] = 0
+    # TODO: Reset everything? Add upstream API for resetting call counters?
+    dfbox.obj.f_calls = 0
     if hasfield(typeof(dfbox.obj), :df_calls)
-        dfbox.obj.df_calls[1] = 0
+        dfbox.obj.df_calls = 0
     end
     if hasfield(typeof(dfbox.obj), :h_calls)
-        dfbox.obj.h_calls[1] = 0
+        dfbox.obj.h_calls = 0
     end
 
     # Compute function value and gradient (without barrier term)
@@ -579,12 +608,13 @@ function optimize(
             resultsnew = optimize(dfbox, x, _optimizer, options, state)
             stopped_by_callback = resultsnew.stopped_by.callback
             append!(results, resultsnew)
-            dfbox.obj.f_calls[1] = 0
+            # TODO: Reset everything? Add upstream API for resetting call counters?
+            dfbox.obj.f_calls = 0
             if hasfield(typeof(dfbox.obj), :df_calls)
-                dfbox.obj.df_calls[1] = 0
+                dfbox.obj.df_calls = 0
             end
             if hasfield(typeof(dfbox.obj), :h_calls)
-                dfbox.obj.h_calls[1] = 0
+                dfbox.obj.h_calls = 0
             end
 
             # Compute function value and gradient (without barrier term)
diff --git a/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl b/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl
index 189f6a8c9..b07bba7b3 100644
--- a/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl
+++ b/src/multivariate/solvers/constrained/ipnewton/ipnewton.jl
@@ -107,8 +107,7 @@ function initial_state(
     # Allocate fields for the objective function
     n = length(initial_x)
 
-    # TODO: Switch to `value_gradient_hessian!`
-    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x)
+    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x)
 
     # More constraints
     constr_J = fill!(Matrix{T}(undef, mc, n), NaN)
@@ -157,8 +156,7 @@ end
 
 function update_fgh!(d, constraints::TwiceDifferentiableConstraints, state, method::IPNewton)
     # Compute objective function, gradient and Hessian matrix
-    # TODO: Switch to `value_gradient_hessian!!`
-    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, state.x)
+    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, state.x)
     copyto!(state.g_x, g_x)
     copyto!(state.H_x, H_x)
     state.f_x = f_x
diff --git a/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl b/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl
index 7e1ca1d53..6c5796cf6 100644
--- a/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl
+++ b/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl
@@ -43,12 +43,13 @@ function initial_state(
 )
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
+    g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
     AcceleratedGradientDescentState(
         initial_x, # Maintain current state in state.x
-        copy(g_x), # Maintain current gradient in state.g_x
+        g_x, # Maintain current gradient in state.g_x
         f_x, # Maintain current f in state.f_x
         fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous
         oftype(f_x, NaN), # Store previous f in state.f_x_previous
diff --git a/src/multivariate/solvers/first_order/adam.jl b/src/multivariate/solvers/first_order/adam.jl
index 6eb608210..9886e5920 100644
--- a/src/multivariate/solvers/first_order/adam.jl
+++ b/src/multivariate/solvers/first_order/adam.jl
@@ -62,7 +62,7 @@ function reset!(method::Adam, state::AdamState, obj, x)
     # Update function value and gradient
     copyto!(state.x, x)
     retract!(method.manifold, state.x)
-    f_x, g_x = value_gradient!(obj, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(obj, state.x)
     copyto!(state.g_x, g_x)
     project_tangent!(method.manifold, state.g_x, state.x)
     state.f_x = f_x
@@ -88,7 +88,7 @@ function initial_state(method::Adam, ::Options, d, initial_x::AbstractArray)
     # Compute function value and gradient
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
     g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
diff --git a/src/multivariate/solvers/first_order/adamax.jl b/src/multivariate/solvers/first_order/adamax.jl
index 678a683d4..9f16cf0b5 100644
--- a/src/multivariate/solvers/first_order/adamax.jl
+++ b/src/multivariate/solvers/first_order/adamax.jl
@@ -61,7 +61,7 @@ function reset!(method::AdaMax, state::AdaMaxState, obj, x)
     # Update function value and gradient
     copyto!(state.x, x)
     retract!(method.manifold, state.x)
-    f_x, g_x = value_gradient!(obj, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(obj, state.x)
     copyto!(state.g_x, g_x)
     project_tangent!(method.manifold, state.g_x, state.x)
     state.f_x = f_x
@@ -87,7 +87,7 @@ function initial_state(method::AdaMax, options::Options, d, initial_x::AbstractA
     # Compute function value and gradient
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
     g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
diff --git a/src/multivariate/solvers/first_order/bfgs.jl b/src/multivariate/solvers/first_order/bfgs.jl
index 9e6041b51..4e83b0ea9 100644
--- a/src/multivariate/solvers/first_order/bfgs.jl
+++ b/src/multivariate/solvers/first_order/bfgs.jl
@@ -72,11 +72,12 @@ end
 
 function reset!(method, state::BFGSState, obj, x)
     # Update function value and gradient
-    retract!(method.manifold, x)
-    f_x, g_x = value_gradient!(obj, x)
-    project_tangent!(method.manifold, g_x, x)
-    state.f_x = f_x
+    copyto!(state.x, x)
+    retract!(method.manifold, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(obj, state.x)
     copyto!(state.g_x, g_x)
+    project_tangent!(method.manifold, state.g_x, state.x)
+    state.f_x = f_x
 
     # Delete history
     fill!(state.x_previous, NaN)
@@ -104,7 +105,8 @@ function initial_state(method::BFGS, ::Options, d, initial_x::AbstractArray)
     # Compute function value and gradient
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
+    g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
     # Initialize approximation of inverse Hessian
@@ -125,7 +127,7 @@ function initial_state(method::BFGS, ::Options, d, initial_x::AbstractArray)
     # Trace the history of states visited
     BFGSState(
         initial_x, # Maintain current state in state.x
-        copy(g_x), # Maintain current gradient in state.g_x
+        g_x, # Maintain current gradient in state.g_x
         f_x, # Maintain current f in state.f_x
         fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous
         fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous
@@ -164,7 +166,7 @@ function update_fgh!(d, state::BFGSState, method::BFGS)
     (; invH, dx, dg, u) = state
     
     # Update function value and gradient
-    f_x, g_x = value_gradient!(d, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
     copyto!(state.g_x, g_x)
     project_tangent!(method.manifold, state.g_x, state.x)
     state.f_x = f_x
diff --git a/src/multivariate/solvers/first_order/cg.jl b/src/multivariate/solvers/first_order/cg.jl
index 99fd9d724..f3bfeb2d3 100644
--- a/src/multivariate/solvers/first_order/cg.jl
+++ b/src/multivariate/solvers/first_order/cg.jl
@@ -108,18 +108,18 @@ end
 function reset!(cg::ConjugateGradient, cgs::ConjugateGradientState, obj, x)
     copyto!(cgs.x, x)
     retract!(cg.manifold, cgs.x)
-    f_x, g_x = value_gradient!(obj, cgs.x)
-    project_tangent!(cg.manifold, g_x, cgs.x)
-    cgs.f_x = f_x
+    f_x, g_x = NLSolversBase.value_gradient!(obj, cgs.x)
     copyto!(cgs.g_x, g_x)
+    project_tangent!(cg.manifold, cgs.g_x, cgs.x)
+    cgs.f_x = f_x
 
     fill!(cgs.x_previous, NaN)
     cgs.f_x_previous = oftype(cgs.f_x_previous, NaN)
     fill!(cgs.g_x_previous, NaN)
 
-    _precondition!(cgs.pg, cg, x, g_x)
+    _precondition!(cgs.pg, cg, cgs.x, cgs.g_x)
     if cg.P !== nothing
-        project_tangent!(cg.manifold, cgs.pg, x)
+        project_tangent!(cg.manifold, cgs.pg, cgs.x)
     end
     cgs.s .= .-cgs.pg
 
@@ -129,6 +129,7 @@ function initial_state(method::ConjugateGradient, ::Options, d, initial_x)
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
     f_x, g_x = value_gradient!(d, initial_x)
+    g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
     # Could move this out? as a general check?
@@ -152,7 +153,7 @@ function initial_state(method::ConjugateGradient, ::Options, d, initial_x)
 
     ConjugateGradientState(
         initial_x, # Maintain current state in state.x
-        copy(g_x), # Maintain current gradient in state.g
+        g_x, # Maintain current gradient in state.g
         f_x, # Maintain current f in state.f
         fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous
         fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous
@@ -179,10 +180,10 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
     retract!(method.manifold, state.x)
 
     # Update the function value and gradient
-    f_x, g_x = value_gradient!(d, state.x)
-    project_tangent!(method.manifold, g_x, state.x)
-    state.f_x = f_x
+    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
     copyto!(state.g_x, g_x)
+    project_tangent!(method.manifold, state.g_x, state.x)
+    state.f_x = f_x
 
     # Check sanity of function and gradient
     isfinite(f_x) || error(LazyString("Non-finite f(x) while optimizing (", f_x, ")"))
@@ -200,7 +201,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
     _apply_precondprep(method, state.x)
     dPd = _inverse_precondition(method, state)
     etak = method.eta * real(dot(state.s, state.g_x_previous)) / dPd # New in HZ2013
-    state.y .= g_x .- state.g_x_previous
+    state.y .= state.g_x .- state.g_x_previous
     ydots = real(dot(state.y, state.s))
     copyto!(state.py, state.pg)        # below, store pg - pg_previous in py
     # P already updated in _apply_precondprep above
@@ -211,7 +212,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
     betak =
         (
             real(dot(state.y, state.pg)) -
-            real(dot(state.y, state.py)) * real(dot(g_x, state.s)) / ydots
+            real(dot(state.y, state.py)) * real(dot(state.g_x, state.s)) / ydots
         ) / ydots
     # betak may be undefined if ydots is zero (may due to f not strongly convex or non-Wolfe linesearch)
     beta = NaNMath.max(betak, etak) # TODO: Set to zero if betak is NaN?
diff --git a/src/multivariate/solvers/first_order/gradient_descent.jl b/src/multivariate/solvers/first_order/gradient_descent.jl
index b999f2096..dec894129 100644
--- a/src/multivariate/solvers/first_order/gradient_descent.jl
+++ b/src/multivariate/solvers/first_order/gradient_descent.jl
@@ -53,9 +53,9 @@ function reset!(method::GradientDescent, state::GradientDescentState, obj, x)
     # Update function value and gradient
     copyto!(state.x, x)
     retract!(method.manifold, state.x)
-    f_x, g_x = value_gradient!(obj, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(obj, state.x)
     copyto!(state.g_x, g_x)
-    project_tangent!(method.manifold, g_x, state.x)
+    project_tangent!(method.manifold, state.g_x, state.x)
     state.f_x = f_x
 
     # Delete history
@@ -74,7 +74,7 @@ function initial_state(
     # Compute function value and gradient
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
     g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
diff --git a/src/multivariate/solvers/first_order/l_bfgs.jl b/src/multivariate/solvers/first_order/l_bfgs.jl
index 23aa175c7..65c0da81d 100644
--- a/src/multivariate/solvers/first_order/l_bfgs.jl
+++ b/src/multivariate/solvers/first_order/l_bfgs.jl
@@ -155,11 +155,12 @@ mutable struct LBFGSState{Tx,Tdx,Tdg,T,G} <: AbstractOptimizerState
     @add_linesearch_fields()
 end
 function reset!(method::LBFGS, state::LBFGSState, obj, x::AbstractArray)
-    retract!(method.manifold, x)
-    f_x, g_x = value_gradient!(obj, x)
-    project_tangent!(method.manifold, g_x, x)
-    state.f_x = f_x
+    copyto!(state.x, x)
+    retract!(method.manifold, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(obj, state.x)
     copyto!(state.g_x, g_x)
+    project_tangent!(method.manifold, state.g_x, state.x)
+    state.f_x = f_x
 
     fill!(state.x_previous, NaN)
     fill!(state.g_x_previous, NaN)
@@ -173,11 +174,12 @@ function initial_state(method::LBFGS, options::Options, d, initial_x::AbstractAr
     T = real(eltype(initial_x))
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
-    f_x, g_x = value_gradient!(d, initial_x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, initial_x)
+    g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
     LBFGSState(
         initial_x, # Maintain current state in state.x
-        copy(g_x), # Maintain current gradient in state.g_x
+        g_x, # Maintain current gradient in state.g_x
         f_x, # Main current f in state.f_x
         fill!(similar(initial_x), NaN), # Maintain previous state in state.x_previous
         fill!(similar(g_x), NaN), # Store previous gradient in state.g_x_previous
@@ -200,9 +202,6 @@ function update_state!(d, state::LBFGSState, method::LBFGS)
     # Increment the number of steps we've had to perform
     state.pseudo_iteration += 1
 
-    # TODO: Can this be removed?
-    project_tangent!(method.manifold, state.g_x, state.x)
-
     # update the preconditioner
     _apply_precondprep(method, state.x)
 
@@ -236,7 +235,7 @@ end
 
 function update_fgh!(d, state::LBFGSState, method::LBFGS)
     # Update function value and gradient
-    f_x, g_x = value_gradient!(d, state.x)
+    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
     copyto!(state.g_x, g_x)
     project_tangent!(method.manifold, state.g_x, state.x)
     state.f_x = f_x
diff --git a/src/multivariate/solvers/first_order/momentum_gradient_descent.jl b/src/multivariate/solvers/first_order/momentum_gradient_descent.jl
index 7b074e17c..8f7ab70e3 100644
--- a/src/multivariate/solvers/first_order/momentum_gradient_descent.jl
+++ b/src/multivariate/solvers/first_order/momentum_gradient_descent.jl
@@ -35,11 +35,12 @@ function initial_state(method::MomentumGradientDescent, ::Options, d, initial_x:
     initial_x = copy(initial_x)
     retract!(method.manifold, initial_x)
     f_x, g_x = value_gradient!(d, initial_x)
+    g_x = copy(g_x)
     project_tangent!(method.manifold, g_x, initial_x)
 
     MomentumGradientDescentState(
         initial_x, # Maintain current state in state.x
-        copy(g_x), # Maintain current gradient in state.g_x
+        g_x, # Maintain current gradient in state.g_x
         f_x, # Maintain current f in state.f_x 
         copy(initial_x), # Maintain previous state in state.x_previous
         fill!(similar(initial_x), NaN), # Record momentum correction direction in state.x_momentum
diff --git a/src/multivariate/solvers/first_order/ngmres.jl b/src/multivariate/solvers/first_order/ngmres.jl
index 9c74b4695..595d23069 100644
--- a/src/multivariate/solvers/first_order/ngmres.jl
+++ b/src/multivariate/solvers/first_order/ngmres.jl
@@ -427,11 +427,10 @@ end
 
 function update_fgh!(d, state, method::AbstractNGMRES)
     # Update the function value and gradient
-    # TODO: do we need a retract! on state.x here?
-    f_x, g_x = value_gradient!(d, state.x)
-    project_tangent!(method.manifold, g_x, state.x)
-    state.f_x = f_x
+    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
     copyto!(state.g_x, g_x)
+    project_tangent!(method.manifold, state.g_x, state.x)
+    state.f_x = f_x
 
     if state.restart
         state.k = 0
diff --git a/src/multivariate/solvers/second_order/krylov_trust_region.jl b/src/multivariate/solvers/second_order/krylov_trust_region.jl
index 30b1c164a..8e4ffea27 100644
--- a/src/multivariate/solvers/second_order/krylov_trust_region.jl
+++ b/src/multivariate/solvers/second_order/krylov_trust_region.jl
@@ -24,6 +24,7 @@ mutable struct KrylovTrustRegionState{T} <: AbstractOptimizerState
     g_x::Vector{T}
     x_previous::Vector{T}
     f_x_previous::T
+    x_cache::Vector{T}
     s::Vector{T}
     interior::Bool
     accept_step::Bool
@@ -50,10 +51,11 @@ function initial_state(method::KrylovTrustRegion, options::Options, d, initial_x
     KrylovTrustRegionState(
         copy(initial_x),    # Maintain current state in state.x
         f_x,                # Maintain f of current state in state.f_x
-        g_x,                # Maintain gradient of current state in state.g_x
+        copy(g_x),          # Maintain gradient of current state in state.g_x
         copy(initial_x), # x_previous
-        zero(T),            # f_x_previous
-        similar(initial_x), # Maintain current search direction in state.s
+        oftype(f_x, NaN),   # f_x_previous
+        fill!(similar(initial_x), NaN), # In-place cache for `update_state!`
+        fill!(similar(initial_x), NaN), # Maintain current search direction in state.s
         true,               # interior
         true,               # accept step
         convert(T, method.initial_radius),
@@ -104,7 +106,7 @@ end
 
 
 function cg_steihaug!(
-    objective::TwiceDifferentiableHV,
+    objective::TwiceDifferentiable,
     state::KrylovTrustRegionState{T},
     method::KrylovTrustRegion,
 ) where {T}
@@ -157,14 +159,16 @@ end
 
 
 function update_state!(
-    objective::TwiceDifferentiableHV,
+    objective::TwiceDifferentiable,
     state::KrylovTrustRegionState,
     method::KrylovTrustRegion,
 )
     state.m_diff = cg_steihaug!(objective, state, method)
     @assert state.m_diff <= 0
 
-    state.f_diff = value(objective, state.x .+ state.s) - state.f_x
+    state.x_cache .= state.x .+ state.s
+    f_x_cache = NLSolversBase.value!(objective, state.x_cache)
+    state.f_diff = f_x_cache - state.f_x
     state.rho = state.f_diff / state.m_diff
     state.interior = norm(state.s) < 0.9 * state.radius
 
@@ -176,22 +180,22 @@ function update_state!(
 
     state.accept_step = state.rho > method.eta
     if state.accept_step
-        state.x .+= state.s
+        # Update history
+        copyto!(state.x_previous, state.x)
+        state.f_x_previous = state.f_x
+
+        # Update state, function value and its gradient
+        copyto!(state.x, state.x_cache)
+        state.f_x = f_x_cache
+        g_x = gradient!(objective, state.x)
+        copyto!(state.g_x, g_x)
     end
 
     return false
 end
 
-
-function update_fgh!(objective, state::KrylovTrustRegionState, ::KrylovTrustRegion)
-    if state.accept_step
-        # Update the function value and gradient
-        state.f_x_previous = state.f_x
-        f_x, g_x = value_gradient!(objective, state.x)
-        state.f_x = f_x
-        copyto!(state.g_x, g_x)
-    end
-end
+# Function value and gradient are already updated in update_state!
+update_fgh!(objective, state::KrylovTrustRegionState, ::KrylovTrustRegion) = nothing
 
 function assess_convergence(state::KrylovTrustRegionState, d, options::Options)
     if !state.accept_step
diff --git a/src/multivariate/solvers/second_order/newton.jl b/src/multivariate/solvers/second_order/newton.jl
index 20a4824ae..4184d5233 100644
--- a/src/multivariate/solvers/second_order/newton.jl
+++ b/src/multivariate/solvers/second_order/newton.jl
@@ -42,8 +42,7 @@ mutable struct NewtonState{Tx,Tg,TH,T,F<:Cholesky} <: AbstractOptimizerState
 end
 
 function initial_state(method::Newton, options, d, initial_x)
-    # TODO: Switch to `value_gradient_hessian!`
-    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x)
+    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x)
 
     NewtonState(
         copy(initial_x), # Maintain current state in state.x
diff --git a/src/multivariate/solvers/second_order/newton_trust_region.jl b/src/multivariate/solvers/second_order/newton_trust_region.jl
index 55e50259e..a6eb3b4ac 100644
--- a/src/multivariate/solvers/second_order/newton_trust_region.jl
+++ b/src/multivariate/solvers/second_order/newton_trust_region.jl
@@ -333,8 +333,7 @@ function initial_state(method::NewtonTrustRegion, options, d, initial_x)
     interior = true
     lambda = NaN
 
-    # TODO: Switch to `value_gradient_hessian!`
-    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!!(d, initial_x)
+    f_x, g_x, H_x = NLSolversBase.value_gradient_hessian!(d, initial_x)
 
     NewtonTrustRegionState(
         copy(initial_x), # Maintain current state in state.x
@@ -419,8 +418,7 @@ function update_state!(d, state::NewtonTrustRegionState, method::NewtonTrustRegi
         if method.use_fg
             copyto!(state.H_x, hessian!(d, state.x))
         else
-            # TODO: Switch to `gradient_hessian!`
-            g_x, H_x = NLSolversBase.gradient_hessian!!(d, state.x)
+            g_x, H_x = NLSolversBase.gradient_hessian!(d, state.x)
             copyto!(state.g_x, g_x)
             copyto!(state.H_x, H_x)
         end
diff --git a/src/utilities/perform_linesearch.jl b/src/utilities/perform_linesearch.jl
index b944a21f8..4c04c48ba 100644
--- a/src/utilities/perform_linesearch.jl
+++ b/src/utilities/perform_linesearch.jl
@@ -47,27 +47,29 @@ function perform_linesearch!(state, method, d)
     # Guess an alpha
     method.alphaguess!(method.linesearch!, state, phi_0, dphi_0, d)
 
-    # Store current x and f(x) for next iteration
-    state.f_x_previous = phi_0
-    copyto!(state.x_previous, state.x)
-    if hasproperty(state, :g_x_previous)
-        copyto!(state.g_x_previous, state.g_x)
-    end
-
     # Perform line search; catch LineSearchException to allow graceful exit
-    try
+    lssuccess = try
         state.alpha, ϕalpha =
             method.linesearch!(d, state.x, state.s, state.alpha, state.x_ls, phi_0, dphi_0)
-        return true # lssuccess = true
+        true
     catch ex
-        if isa(ex, LineSearches.LineSearchException)
+        if ex isa LineSearches.LineSearchException
             state.alpha = ex.alpha
             # We shouldn't warn here, we should just carry it to the output
             # @warn("Linesearch failed, using alpha = $(state.alpha) and
             # exiting optimization.\nThe linesearch exited with message:\n$(ex.message)")
-            return false # lssuccess = false
+            false
         else
-            rethrow(ex)
+            rethrow()
         end
     end
+
+    # Store current x and f(x) for next iteration
+    state.f_x_previous = phi_0
+    copyto!(state.x_previous, state.x)
+    if hasproperty(state, :g_x_previous)
+        copyto!(state.g_x_previous, state.g_x)
+    end
+
+    return lssuccess
 end
diff --git a/test/general/api.jl b/test/general/api.jl
index 7eec17492..0e11ed329 100644
--- a/test/general/api.jl
+++ b/test/general/api.jl
@@ -130,8 +130,8 @@
     @test Optim.minimum(res) ≈ 1.2580194638225255
     @test Optim.minimizer(res) ≈ [-0.116688, 0.0031153] rtol = 0.001
     @test Optim.iterations(res) == 10
-    @test Optim.f_calls(res) == 38
-    @test Optim.g_calls(res) == 38
+    @test Optim.f_calls(res) == 42
+    @test Optim.g_calls(res) == 42
     @test !Optim.converged(res)
     @test !Optim.x_converged(res)
     @test !Optim.f_converged(res)
diff --git a/test/general/counter.jl b/test/general/counter.jl
index 52fa2a152..60c4229aa 100644
--- a/test/general/counter.jl
+++ b/test/general/counter.jl
@@ -76,10 +76,12 @@
         @test hcount == Optim.h_calls(res)
     end
 
-    # Need to define fg! and hv! for KrylovTrustRegion
-    fg!(out, x) = begin
-        g!(out, x)
-        f(x)
+    # define fg! and hv! for KrylovTrustRegion
+    fg!(F, G, x) = begin
+        if G !== nothing
+            g!(G, x)
+        end
+        return F === nothing ? nothing : f(x)
     end
     hv!(out, x, v) = begin
         n = length(x)
@@ -92,7 +94,7 @@
         fcounter(true)
         gcounter(true)
         hcounter(true)
-        df = Optim.TwiceDifferentiableHV(f, fg!, hv!, prob.initial_x)
+        df = Optim.TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg!, hv!), prob.initial_x)
         res = Optim.optimize(df, prob.initial_x, solver)
         @test fcount == Optim.f_calls(res)
         @test gcount == Optim.g_calls(res)
diff --git a/test/general/objective_types.jl b/test/general/objective_types.jl
index 0d4474666..9542326dc 100644
--- a/test/general/objective_types.jl
+++ b/test/general/objective_types.jl
@@ -1,8 +1,5 @@
 @testset "objective types" begin
     @testset "autodiff" begin
-        # Should throw, as :wah is not a proper autodiff choice
-        @test_throws ErrorException OnceDifferentiable(x -> x, rand(10); autodiff = :wah)
-
         for T in (OnceDifferentiable, TwiceDifferentiable)
             odad1 = T(x -> 5.0, rand(1); autodiff = AutoFiniteDiff(; fdtype = Val(:central)))
             odad2 = T(x -> 5.0, rand(1); autodiff = AutoForwardDiff())
diff --git a/test/multivariate/solvers/constrained/ipnewton/constraints.jl b/test/multivariate/solvers/constrained/ipnewton/constraints.jl
index a440855dd..601cae686 100644
--- a/test/multivariate/solvers/constrained/ipnewton/constraints.jl
+++ b/test/multivariate/solvers/constrained/ipnewton/constraints.jl
@@ -71,7 +71,7 @@
         # Compare hand-computed gradient against that from automatic differentiation
         function check_autodiff(d, bounds, x, cfun::Function, bstate, μ)
             c = cfun(x)
-            J = Optim.NLSolversBase.ForwardDiff.jacobian(cfun, x)
+            J = ForwardDiff.jacobian(cfun, x)
             p = Optim.pack_vec(x, bstate)
             ftot! =
                 (storage, p) -> Optim.lagrangian_fgvec!(
@@ -90,8 +90,8 @@
             pgrad = similar(p)
             ftot!(pgrad, p)
             chunksize = min(8, length(p))
-            TD = Optim.NLSolversBase.ForwardDiff.Dual{
-                Optim.ForwardDiff.Tag{Nothing,Float64},
+            TD = ForwardDiff.Dual{
+                ForwardDiff.Tag{Nothing,Float64},
                 eltype(p),
                 chunksize,
             }
@@ -100,7 +100,7 @@
             pcmp = similar(p)
             ftot = p -> Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ)
             #ForwardDiff.gradient!(pcmp, ftot, p, Optim.ForwardDiff.{chunksize}())
-            Optim.NLSolversBase.ForwardDiff.gradient!(pcmp, ftot, p)
+            ForwardDiff.gradient!(pcmp, ftot, p)
             @test pcmp ≈ pgrad
         end
         # Basic setup (using two objectives, one equal to zero and the other a Gaussian)
@@ -338,7 +338,7 @@
             h[3, 3] += 2 * λ[2] * x[2]
         end
         c = cfun(x)
-        J = Optim.NLSolversBase.ForwardDiff.jacobian(cfun, x)
+        J = ForwardDiff.jacobian(cfun, x)
         Jtmp = similar(J)
         @test cJ!(Jtmp, x) ≈ J  # just to check we did it right
         cbar = rand(length(c))
@@ -535,8 +535,8 @@
             # TODO: How do we deal with the new Tags in ForwardDiff?
 
             # TD = ForwardDiff.Dual{chunksize, eltype(y)}
-            TD = Optim.NLSolversBase.ForwardDiff.Dual{
-                Optim.NLSolversBase.ForwardDiff.Tag{Nothing,Float64},
+            TD = ForwardDiff.Dual{
+                ForwardDiff.Tag{Nothing,Float64},
                 eltype(p),
                 chunksize,
             }
@@ -554,7 +554,7 @@
             #ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2)
 
             #ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4))
-            Optim.NLSolversBase.ForwardDiff.gradient(ϕd, [0.0])#, ForwardDiff.hessian(ϕd2, [0.0])
+            ForwardDiff.gradient(ϕd, [0.0])#, ForwardDiff.hessian(ϕd2, [0.0])
         end
         F = 1000
         d = TwiceDifferentiable(
diff --git a/test/multivariate/solvers/constrained/ipnewton/interface.jl b/test/multivariate/solvers/constrained/ipnewton/interface.jl
index 476d183a1..fa273b68d 100644
--- a/test/multivariate/solvers/constrained/ipnewton/interface.jl
+++ b/test/multivariate/solvers/constrained/ipnewton/interface.jl
@@ -26,7 +26,7 @@ end
         storage
     end
     function exponential_hessian!(storage, x)
-        Optim.NLSolversBase.ForwardDiff.hessian!(storage, exponential, x)
+        ForwardDiff.hessian!(storage, exponential, x)
     end
 
     function exponential_gradient(x)
diff --git a/test/multivariate/solvers/second_order/krylov_trust_region.jl b/test/multivariate/solvers/second_order/krylov_trust_region.jl
index 4b7922e1d..f9877e7d3 100644
--- a/test/multivariate/solvers/second_order/krylov_trust_region.jl
+++ b/test/multivariate/solvers/second_order/krylov_trust_region.jl
@@ -5,27 +5,27 @@
         function f(x::Vector)
             (x[1] - 5.0)^4
         end
-
-        function fg!(g, x)
-            g[1] = 4.0 * (x[1] - 5.0)^3
-            f(x)
-        end
-
-        function fg2!(_f, g, x)
-            g === nothing || (g[1] = 4.0 * (x[1] - 5.0)^3)
-            _f === nothing || return f(x)
+        function fg!(_f, g, x)
+            if g !== nothing
+                g[1] = 4.0 * (x[1] - 5.0)^3
+            end
+            return _f === nothing ? nothing : f(x)
         end
 
-
         function hv!(Hv, x, v)
             Hv[1] = 12.0 * (x[1] - 5.0)^2 * v[1]
         end
-
-        d = Optim.TwiceDifferentiableHV(f, fg!, hv!, [0.0])
-        d2 = Optim.TwiceDifferentiableHV(NLSolversBase.only_fg_and_hv!(fg2!, hv!), [0.0])
-
+        d = TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg!, hv!), [0.0])
         result = Optim.optimize(d, [0.0], Optim.KrylovTrustRegion())
         @test norm(Optim.minimizer(result) - [5.0]) < 0.01
+
+        function fgh!(f, g, H, x)
+            if H !== nothing
+                H[1, 1] = 12.0 * (x[1] - 5.0)^2
+            end
+            return fg!(f, g, x)
+        end
+        d2 = TwiceDifferentiable(NLSolversBase.only_fgh!(fgh!), [0.0])
         result = Optim.optimize(d2, [0.0], Optim.KrylovTrustRegion())
         @test norm(Optim.minimizer(result) - [5.0]) < 0.01
     end
@@ -37,16 +37,19 @@
             0.5 * (x[1]^2 + eta * x[2]^2)
         end
 
-        function fg2!(storage::Vector, x::Vector)
-            storage[:] = [x[1], eta * x[2]]
-            f2(x)
+        function fg2!(_f, _g, x::Vector)
+            if _g !== nothing
+                _g[1] = x[1]
+                _g[2] = eta * x[2]
+            end
+            return _f === nothing ? nothing : f2(x)
         end
 
         function hv2!(Hv::Vector, x::Vector, v::Vector)
-            Hv[:] = [1.0 0.0; 0.0 eta] * v
+            return mul!(Hv, Diagonal([1.0, eta]), v)
         end
 
-        d2 = Optim.TwiceDifferentiableHV(f2, fg2!, hv2!, Float64[127, 921])
+        d2 = Optim.TwiceDifferentiable(NLSolversBase.only_fg_and_hv!(fg2!, hv2!), Float64[127, 921])
 
         result = Optim.optimize(d2, Float64[127, 921], Optim.KrylovTrustRegion())
         @test Optim.g_converged(result)
@@ -56,20 +59,21 @@
     @testset "Stock test problems" begin
         for (name, prob) in MultivariateProblems.UnconstrainedProblems.examples
             if prob.istwicedifferentiable
-                hv!(storage::Vector, x::Vector, v::Vector) = begin
-                    n = length(x)
-                    H = Matrix{Float64}(undef, n, n)
-                    MVP.hessian(prob)(H, x)
-                    storage .= H * v
+                n = length(prob.initial_x)
+                hv! = let H = Matrix{Float64}(undef, n, n)
+                    function (storage::Vector, x::Vector, v::Vector)
+                        MVP.hessian(prob)(H, x)
+                        return mul!(storage, H, v)
+                    end
                 end
-                fg!(g::Vector, x::Vector) = begin
-                    MVP.gradient(prob)(g, x)
-                    MVP.objective(prob)(x)
+                fg!(_f, _g, x::Vector) = begin
+                    if _g !== nothing
+                        MVP.gradient(prob)(_g, x)
+                    end
+                    return _f === nothing ? nothing : MVP.objective(prob)(x)
                 end
-                ddf = Optim.TwiceDifferentiableHV(
-                    MVP.objective(prob),
-                    fg!,
-                    hv!,
+                ddf = Optim.TwiceDifferentiable(
+                    NLSolversBase.only_fg_and_hv!(fg!, hv!),
                     prob.initial_x,
                 )
                 result = Optim.optimize(ddf, prob.initial_x, Optim.KrylovTrustRegion())
diff --git a/test/univariate/dual.jl b/test/univariate/dual.jl
index e74f02bc3..e10145082 100644
--- a/test/univariate/dual.jl
+++ b/test/univariate/dual.jl
@@ -1,5 +1,5 @@
 @testset "Dual numbers" begin
     f(x, y) = x[1]^2 + y[1]^2
     g(param) = Optim.minimum(optimize(x -> f(x, param), -1, 1))
-    @test Optim.NLSolversBase.ForwardDiff.gradient(g, [1.0]) == [2.0]
+    @test ForwardDiff.gradient(g, [1.0]) == [2.0]
 end