"""Trust-region optimization."""
OptimizeResult)
""" Base/abstract class defining the quadratic model for trust-region minimization. Child classes must implement the ``solve`` method.
Values of the objective function, jacobian and hessian (if provided) at the current iterate ``x`` are evaluated on demand and then stored as attributes ``fun``, ``jac``, ``hess``. """
self._x = x self._f = None self._g = None self._h = None self._g_mag = None self._cauchy_point = None self._newton_point = None self._fun = fun self._jac = jac self._hess = hess self._hessp = hessp
return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
def fun(self): """Value of objective function at current iteration.""" if self._f is None: self._f = self._fun(self._x) return self._f
def jac(self): """Value of jacobian of objective function at current iteration.""" if self._g is None: self._g = self._jac(self._x) return self._g
def hess(self): """Value of hessian of objective function at current iteration.""" if self._h is None: self._h = self._hess(self._x) return self._h
if self._hessp is not None: return self._hessp(self._x, p) else: return np.dot(self.hess, p)
def jac_mag(self): """Magniture of jacobian of objective function at current iteration.""" if self._g_mag is None: self._g_mag = scipy.linalg.norm(self.jac) return self._g_mag
""" Solve the scalar quadratic equation ||z + t d|| == trust_radius. This is like a line-sphere intersection. Return the two values of t, sorted from low to high. """ a = np.dot(d, d) b = 2 * np.dot(z, d) c = np.dot(z, z) - trust_radius**2 sqrt_discriminant = math.sqrt(b*b - 4*a*c)
# The following calculation is mathematically # equivalent to: # ta = (-b - sqrt_discriminant) / (2*a) # tb = (-b + sqrt_discriminant) / (2*a) # but produce smaller round off errors. # Look at Matrix Computation p.97 # for a better justification. aux = b + math.copysign(sqrt_discriminant, b) ta = -aux / (2*a) tb = -2*c / aux return sorted([ta, tb])
raise NotImplementedError('The solve method should be implemented by ' 'the child class')
subproblem=None, initial_trust_radius=1.0, max_trust_radius=1000.0, eta=0.15, gtol=1e-4, maxiter=None, disp=False, return_all=False, callback=None, inexact=True, **unknown_options): """ Minimization of scalar function of one or more variables using a trust-region algorithm.
Options for the trust-region algorithm are: initial_trust_radius : float Initial trust radius. max_trust_radius : float Never propose steps that are longer than this value. eta : float Trust region related acceptance stringency for proposed steps. gtol : float Gradient norm must be less than `gtol` before successful termination. maxiter : int Maximum number of iterations to perform. disp : bool If True, print convergence message. inexact : bool Accuracy to solve subproblems. If True requires less nonlinear iterations, but more vector products. Only effective for method trust-krylov.
This function is called by the `minimize` function. It is not supposed to be called directly. """ _check_unknown_options(unknown_options)
if jac is None: raise ValueError('Jacobian is currently required for trust-region ' 'methods') if hess is None and hessp is None: raise ValueError('Either the Hessian or the Hessian-vector product ' 'is currently required for trust-region methods') if subproblem is None: raise ValueError('A subproblem solving strategy is required for ' 'trust-region methods') if not (0 <= eta < 0.25): raise Exception('invalid acceptance stringency') if max_trust_radius <= 0: raise Exception('the max trust radius must be positive') if initial_trust_radius <= 0: raise ValueError('the initial trust radius must be positive') if initial_trust_radius >= max_trust_radius: raise ValueError('the initial trust radius must be less than the ' 'max trust radius')
# force the initial guess into a nice format x0 = np.asarray(x0).flatten()
# Wrap the functions, for a couple reasons. # This tracks how many times they have been called # and it automatically passes the args. nfun, fun = wrap_function(fun, args) njac, jac = wrap_function(jac, args) nhess, hess = wrap_function(hess, args) nhessp, hessp = wrap_function(hessp, args)
# limit the number of iterations if maxiter is None: maxiter = len(x0)*200
# init the search status warnflag = 0
# initialize the search trust_radius = initial_trust_radius x = x0 if return_all: allvecs = [x] m = subproblem(x, fun, jac, hess, hessp) k = 0
# search for the function min # do not even start if the gradient is small enough while m.jac_mag >= gtol:
# Solve the sub-problem. # This gives us the proposed step relative to the current position # and it tells us whether the proposed step # has reached the trust region boundary or not. try: p, hits_boundary = m.solve(trust_radius) except np.linalg.linalg.LinAlgError as e: warnflag = 3 break
# calculate the predicted value at the proposed point predicted_value = m(p)
# define the local approximation at the proposed point x_proposed = x + p m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
# evaluate the ratio defined in equation (4.4) actual_reduction = m.fun - m_proposed.fun predicted_reduction = m.fun - predicted_value if predicted_reduction <= 0: warnflag = 2 break rho = actual_reduction / predicted_reduction
# update the trust radius according to the actual/predicted ratio if rho < 0.25: trust_radius *= 0.25 elif rho > 0.75 and hits_boundary: trust_radius = min(2*trust_radius, max_trust_radius)
# if the ratio is high enough then accept the proposed step if rho > eta: x = x_proposed m = m_proposed
# append the best guess, call back, increment the iteration count if return_all: allvecs.append(np.copy(x)) if callback is not None: callback(np.copy(x)) k += 1
# check if the gradient is small enough to stop if m.jac_mag < gtol: warnflag = 0 break
# check if we have looked at enough iterations if k >= maxiter: warnflag = 1 break
# print some stuff if requested status_messages = ( _status_message['success'], _status_message['maxiter'], 'A bad approximation caused failure to predict improvement.', 'A linalg error occurred, such as a non-psd Hessian.', ) if disp: if warnflag == 0: print(status_messages[warnflag]) else: print('Warning: ' + status_messages[warnflag]) print(" Current function value: %f" % m.fun) print(" Iterations: %d" % k) print(" Function evaluations: %d" % nfun[0]) print(" Gradient evaluations: %d" % njac[0]) print(" Hessian evaluations: %d" % nhess[0])
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag, fun=m.fun, jac=m.jac, nfev=nfun[0], njev=njac[0], nhev=nhess[0], nit=k, message=status_messages[warnflag])
if hess is not None: result['hess'] = m.hess
if return_all: result['allvecs'] = allvecs
return result |