Coverage for /usr/lib/python3/dist-packages/scipy/optimize/_hessian_update

100

101

102

103

"""Hessian update strategies for quasi-Newton optimization methods."""

from __future__ import division, print_function, absolute_import

import numpy as np

from numpy.linalg import norm

from scipy.linalg import get_blas_funcs

from warnings import warn

__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']

class HessianUpdateStrategy(object):

"""Interface for implementing Hessian update strategies.

Many optimization methods make use of Hessian (or inverse Hessian)

approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.

Some of these approximations, however, do not actually need to store

the entire matrix or can compute the internal matrix product with a

given vector in a very efficiently manner. This class serves as an

abstract interface between the optimization algorithm and the

quasi-Newton update strategies, giving freedom of implementation

to store and update the internal matrix as efficiently as possible.

Different choices of initialization and update procedure will result

in different quasi-Newton strategies.

Four methods should be implemented in derived classes: ``initialize``,

``update``, ``dot`` and ``get_matrix``.

Notes

-----

Any instance of a class that implements this interface,

can be accepted by the method ``minimize`` and used by

the compatible solvers to approximate the Hessian (or

inverse Hessian) used by the optimization algorithms.

"""

def initialize(self, n, approx_type):

"""Initialize internal matrix.

Allocate internal memory for storing and updating

the Hessian or its inverse.

Parameters

----------

n : int

Problem dimension.

approx_type : {'hess', 'inv_hess'}

Selects either the Hessian or the inverse Hessian.

When set to 'hess' the Hessian will be stored and updated.

When set to 'inv_hess' its inverse will be used instead.

"""

raise NotImplementedError("The method ``initialize(n, approx_type)``"

" is not implemented.")

def update(self, delta_x, delta_grad):

"""Update internal matrix.

Update Hessian matrix or its inverse (depending on how 'approx_type'

is defined) using information about the last evaluated points.

Parameters

----------

delta_x : ndarray

The difference between two points the gradient

function have been evaluated at: ``delta_x = x2 - x1``.

delta_grad : ndarray

The difference between the gradients:

``delta_grad = grad(x2) - grad(x1)``.

"""

raise NotImplementedError("The method ``update(delta_x, delta_grad)``"

" is not implemented.")

def dot(self, p):

"""Compute the product of the internal matrix with the given vector.

Parameters

----------

p : array_like

1-d array representing a vector.

Returns

-------

Hp : array

1-d represents the result of multiplying the approximation matrix

by vector p.

"""

raise NotImplementedError("The method ``dot(p)``"

" is not implemented.")

def get_matrix(self):

"""Return current internal matrix.

Returns

-------

H : ndarray, shape (n, n)

Dense matrix containing either the Hessian

or its inverse (depending on how 'approx_type'

is defined).

"""

raise NotImplementedError("The method ``get_matrix(p)``"

" is not implemented.")

class FullHessianUpdateStrategy(HessianUpdateStrategy):

"""Hessian update strategy with full dimensional internal representation.

"""

_syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update

_syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update

# Symmetric matrix-vector product

_symv = get_blas_funcs('symv', dtype='d')

def __init__(self, init_scale='auto'):

self.init_scale = init_scale

# Until initialize is called we can't really use the class,

# so it makes sense to set everything to None.

self.first_iteration = None

self.approx_type = None

self.B = None

self.H = None

def initialize(self, n, approx_type):

"""Initialize internal matrix.

Allocate internal memory for storing and updating

the Hessian or its inverse.

Parameters

----------

n : int

Problem dimension.

approx_type : {'hess', 'inv_hess'}

Selects either the Hessian or the inverse Hessian.

When set to 'hess' the Hessian will be stored and updated.

When set to 'inv_hess' its inverse will be used instead.

"""

self.first_iteration = True

self.n = n

self.approx_type = approx_type

if approx_type not in ('hess', 'inv_hess'):

raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")

# Create matrix

if self.approx_type == 'hess':

self.B = np.eye(n, dtype=float)

else:

self.H = np.eye(n, dtype=float)

def _auto_scale(self, delta_x, delta_grad):

# Heuristic to scale matrix at first iteration.

# Described in Nocedal and Wright "Numerical Optimization"

# p.143 formula (6.20).

s_norm2 = np.dot(delta_x, delta_x)

y_norm2 = np.dot(delta_grad, delta_grad)

ys = np.abs(np.dot(delta_grad, delta_x))

if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:

return 1

if self.approx_type == 'hess':

return y_norm2 / ys

else:

return ys / y_norm2

def _update_implementation(self, delta_x, delta_grad):

raise NotImplementedError("The method ``_update_implementation``"

" is not implemented.")

def update(self, delta_x, delta_grad):

"""Update internal matrix.

Update Hessian matrix or its inverse (depending on how 'approx_type'

is defined) using information about the last evaluated points.

Parameters

----------

delta_x : ndarray

The difference between two points the gradient

function have been evaluated at: ``delta_x = x2 - x1``.

delta_grad : ndarray

The difference between the gradients:

``delta_grad = grad(x2) - grad(x1)``.

"""

if np.all(delta_x == 0.0):

return

if np.all(delta_grad == 0.0):

warn('delta_grad == 0.0. Check if the approximated '

'function is linear. If the function is linear '

'better results can be obtained by defining the '

'Hessian as zero instead of using quasi-Newton '

'approximations.', UserWarning)

return

if self.first_iteration:

# Get user specific scale

if self.init_scale == "auto":

scale = self._auto_scale(delta_x, delta_grad)

else:

scale = float(self.init_scale)

# Scale initial matrix with ``scale * np.eye(n)``

if self.approx_type == 'hess':

self.B *= scale

else:

self.H *= scale

self.first_iteration = False

self._update_implementation(delta_x, delta_grad)

def dot(self, p):

"""Compute the product of the internal matrix with the given vector.

Parameters

----------

p : array_like

1-d array representing a vector.

Returns

-------

Hp : array

1-d represents the result of multiplying the approximation matrix

by vector p.

"""

if self.approx_type == 'hess':

return self._symv(1, self.B, p)

else:

return self._symv(1, self.H, p)

def get_matrix(self):

"""Return the current internal matrix.

Returns

-------

M : ndarray, shape (n, n)

Dense matrix containing either the Hessian or its inverse

(depending on how `approx_type` was defined).

"""

if self.approx_type == 'hess':

M = np.copy(self.B)

else:

M = np.copy(self.H)

li = np.tril_indices_from(M, k=-1)

M[li] = M.T[li]

return M

class BFGS(FullHessianUpdateStrategy):

"""Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.

Parameters

----------

exception_strategy : {'skip_update', 'damp_update'}, optional

Define how to proceed when the curvature condition is violated.

Set it to 'skip_update' to just skip the update. Or, alternatively,

set it to 'damp_update' to interpolate between the actual BFGS

result and the unmodified matrix. Both exceptions strategies

are explained in [1]_, p.536-537.

min_curvature : float

This number, scaled by a normalization factor, defines the

minimum curvature ``dot(delta_grad, delta_x)`` allowed to go

unaffected by the exception strategy. By default is equal to

1e-8 when ``exception_strategy = 'skip_update'`` and equal

to 0.2 when ``exception_strategy = 'damp_update'``.

init_scale : {float, 'auto'}

Matrix scale at first iteration. At the first

iteration the Hessian matrix or its inverse will be initialized

with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.

Set it to 'auto' in order to use an automatic heuristic for choosing

the initial scale. The heuristic is described in [1]_, p.143.

By default uses 'auto'.

Notes

-----

The update is based on the description in [1]_, p.140.

References

----------

.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"

Second Edition (2006).

"""

def __init__(self, exception_strategy='skip_update', min_curvature=None,

init_scale='auto'):

if exception_strategy == 'skip_update':

if min_curvature is not None:

self.min_curvature = min_curvature

else:

self.min_curvature = 1e-8

elif exception_strategy == 'damp_update':

if min_curvature is not None:

self.min_curvature = min_curvature

else:

self.min_curvature = 0.2

else:

raise ValueError("`exception_strategy` must be 'skip_update' "

"or 'damp_update'.")

super(BFGS, self).__init__(init_scale)

self.exception_strategy = exception_strategy

def _update_inverse_hessian(self, ys, Hy, yHy, s):

"""Update the inverse Hessian matrix.

BFGS update using the formula:

``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)

- 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``

where ``s = delta_x`` and ``y = delta_grad``. This formula is

equivalent to (6.17) in [1]_ written in a more efficient way

for implementation.

References

----------

.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"

Second Edition (2006).

"""

self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)

self.H = self._syr((ys+yHy)/ys**2, s, a=self.H)

def _update_hessian(self, ys, Bs, sBs, y):

"""Update the Hessian matrix.

BFGS update using the formula:

``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``

where ``s`` is short for ``delta_x`` and ``y`` is short

for ``delta_grad``. Formula (6.19) in [1]_.

References

----------

.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"

Second Edition (2006).

"""

self.B = self._syr(1.0 / ys, y, a=self.B)

self.B = self._syr(-1.0 / sBs, Bs, a=self.B)

def _update_implementation(self, delta_x, delta_grad):

# Auxiliary variables w and z

if self.approx_type == 'hess':

w = delta_x

z = delta_grad

else:

w = delta_grad

z = delta_x

# Do some common operations

wz = np.dot(w, z)

Mw = self.dot(w)

wMw = Mw.dot(w)

# Guarantee that wMw > 0 by reinitializing matrix.

# While this is always true in exact arithmetics,

# indefinite matrix may appear due to roundoff errors.

if wMw <= 0.0:

scale = self._auto_scale(delta_x, delta_grad)

# Reinitialize matrix

if self.approx_type == 'hess':

self.B = scale * np.eye(self.n, dtype=float)

else:

self.H = scale * np.eye(self.n, dtype=float)

# Do common operations for new matrix

Mw = self.dot(w)

wMw = Mw.dot(w)

# Check if curvature condition is violated

if wz <= self.min_curvature * wMw:

# If the option 'skip_update' is set

# we just skip the update when the condion

# is violated.

if self.exception_strategy == 'skip_update':

return

# If the option 'damp_update' is set we

# interpolate between the actual BFGS

# result and the unmodified matrix.

elif self.exception_strategy == 'damp_update':

update_factor = (1-self.min_curvature) / (1 - wz/wMw)

z = update_factor*z + (1-update_factor)*Mw

wz = np.dot(w, z)

# Update matrix

if self.approx_type == 'hess':

self._update_hessian(wz, Mw, wMw, z)

else:

self._update_inverse_hessian(wz, Mw, wMw, z)

class SR1(FullHessianUpdateStrategy):

"""Symmetric-rank-1 Hessian update strategy.

Parameters

----------

min_denominator : float

This number, scaled by a normalization factor,

defines the minimum denominator magnitude allowed

in the update. When the condition is violated we skip

the update. By default uses ``1e-8``.

init_scale : {float, 'auto'}, optional

Matrix scale at first iteration. At the first

iteration the Hessian matrix or its inverse will be initialized

with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.

Set it to 'auto' in order to use an automatic heuristic for choosing

the initial scale. The heuristic is described in [1]_, p.143.

By default uses 'auto'.

Notes

-----

The update is based on the description in [1]_, p.144-146.

References

----------

.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"

Second Edition (2006).

"""

def __init__(self, min_denominator=1e-8, init_scale='auto'):

self.min_denominator = min_denominator

super(SR1, self).__init__(init_scale)

def _update_implementation(self, delta_x, delta_grad):

# Auxiliary variables w and z

if self.approx_type == 'hess':

w = delta_x

z = delta_grad

else:

w = delta_grad

z = delta_x

# Do some common operations

Mw = self.dot(w)

z_minus_Mw = z - Mw

denominator = np.dot(w, z_minus_Mw)

# If the denominator is too small

# we just skip the update.

if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):

return

# Update matrix

if self.approx_type == 'hess':

self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)

else:

self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)

Coverage for /usr/lib/python3/dist-packages/scipy/optimize/_hessian_update_strategy.py : 31%

127 statements 40 run 87 missing 7 excluded