"""Nearly exact trust-region optimization subproblem."""
cho_solve)
'estimate_smallest_singular_value', 'singular_leading_submatrix', 'IterativeSubproblem']
**trust_region_options): """ Minimization of scalar function of one or more variables using a nearly exact trust-region algorithm.
Options ------- initial_tr_radius : float Initial trust-region radius. max_tr_radius : float Maximum value of the trust-region radius. No steps that are longer than this value will be proposed. eta : float Trust region related acceptance stringency for proposed steps. gtol : float Gradient norm must be less than ``gtol`` before successful termination. """
if jac is None: raise ValueError('Jacobian is required for trust region ' 'exact minimization.') if hess is None: raise ValueError('Hessian matrix is required for trust region ' 'exact minimization.') return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess, subproblem=IterativeSubproblem, **trust_region_options)
"""Given upper triangular matrix ``U`` estimate the smallest singular value and the correspondent right singular vector in O(n**2) operations.
Parameters ---------- U : ndarray Square upper triangular matrix.
Returns ------- s_min : float Estimated smallest singular value of the provided matrix. z_min : ndarray Estimatied right singular vector.
Notes ----- The procedure is based on [1]_ and is done in two steps. First it finds a vector ``e`` with components selected from {+1, -1} such that the solution ``w`` from the system ``U.T w = e`` is as large as possible. Next it estimate ``U v = w``. The smallest singular value is close to ``norm(w)/norm(v)`` and the right singular vector is close to ``v/norm(v)``.
The estimation will be better more ill-conditioned is the matrix.
References ---------- .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H. An estimate for the condition number of a matrix. 1979. SIAM Journal on Numerical Analysis, 16(2), 368-375. """
U = np.atleast_2d(U) m, n = U.shape
if m != n: raise ValueError("A square triangular matrix should be provided.")
# A vector `e` with components selected from {+1, -1} # is selected so that the solution `w` to the system # `U.T w = e` is as large as possible. Implementation # based on algorithm 3.5.1, p. 142, from reference [2] # adapted for lower triangular matrix.
p = np.zeros(n) w = np.empty(n)
# Implemented according to: Golub, G. H., Van Loan, C. F. (2013). # "Matrix computations". Forth Edition. JHU press. pp. 140-142. for k in range(n): wp = (1-p[k]) / U.T[k, k] wm = (-1-p[k]) / U.T[k, k] pp = p[k+1:] + U.T[k+1:, k]*wp pm = p[k+1:] + U.T[k+1:, k]*wm
if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1): w[k] = wp p[k+1:] = pp else: w[k] = wm p[k+1:] = pm
# The system `U v = w` is solved using backward substitution. v = solve_triangular(U, w)
v_norm = norm(v) w_norm = norm(w)
# Smallest singular value s_min = w_norm / v_norm
# Associated vector z_min = v / v_norm
return s_min, z_min
""" Given a square matrix ``H`` compute upper and lower bounds for its eigenvalues (Gregoshgorin Bounds). Defined ref. [1].
References ---------- .. [1] Conn, A. R., Gould, N. I., & Toint, P. L. Trust region methods. 2000. Siam. pp. 19. """
H_diag = np.diag(H) H_diag_abs = np.abs(H_diag) H_row_sums = np.sum(np.abs(H), axis=1) lb = np.min(H_diag + H_diag_abs - H_row_sums) ub = np.max(H_diag - H_diag_abs + H_row_sums)
return lb, ub
""" Compute term that makes the leading ``k`` by ``k`` submatrix from ``A`` singular.
Parameters ---------- A : ndarray Symmetric matrix that is not positive definite. U : ndarray Upper triangular matrix resulting of an incomplete Cholesky decomposition of matrix ``A``. k : int Positive integer such that the leading k by k submatrix from `A` is the first non-positive definite leading submatrix.
Returns ------- delta : float Amount that should be added to the element (k, k) of the leading k by k submatrix of ``A`` to make it singular. v : ndarray A vector such that ``v.T B v = 0``. Where B is the matrix A after ``delta`` is added to its element (k, k). """
# Compute delta delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
n = len(A)
# Inicialize v v = np.zeros(n) v[k-1] = 1
# Compute the remaining values of v by solving a triangular system. if k != 1: v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
return delta, v
"""Quadratic subproblem solved by nearly exact iterative method.
Notes ----- This subproblem solver was based on [1]_, [2]_ and [3]_, which implement similar algorithms. The algorithm is basically that of [1]_ but ideas from [2]_ and [3]_ were also used.
References ---------- .. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods", Siam, pp. 169-200, 2000. .. [2] J. Nocedal and S. Wright, "Numerical optimization", Springer Science & Business Media. pp. 83-91, 2006. .. [3] J.J. More and D.C. Sorensen, "Computing a trust region step", SIAM Journal on Scientific and Statistical Computing, vol. 4(3), pp. 553-572, 1983. """
# UPDATE_COEFF appears in reference [1]_ # in formula 7.3.14 (p. 190) named as "theta". # As recommended there it value is fixed in 0.01.
k_easy=0.1, k_hard=0.2):
super(IterativeSubproblem, self).__init__(x, fun, jac, hess)
# When the trust-region shrinks in two consecutive # calculations (``tr_radius < previous_tr_radius``) # the lower bound ``lambda_lb`` may be reused, # facilitating the convergence. To indicate no # previous value is known at first ``previous_tr_radius`` # is set to -1 and ``lambda_lb`` to None. self.previous_tr_radius = -1 self.lambda_lb = None
self.niter = 0
# ``k_easy`` and ``k_hard`` are parameters used # to determine the stop criteria to the iterative # subproblem solver. Take a look at pp. 194-197 # from reference _[1] for a more detailed description. self.k_easy = k_easy self.k_hard = k_hard
# Get Lapack function for cholesky decomposition. # The implemented Scipy wrapper does not return # the incomplete factorization needed by the method. self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
# Get info about Hessian self.dimension = len(self.hess) self.hess_gershgorin_lb,\ self.hess_gershgorin_ub = gershgorin_bounds(self.hess) self.hess_inf = norm(self.hess, np.Inf) self.hess_fro = norm(self.hess, 'fro')
# A constant such that for vectors smaler than that # backward substituition is not reliable. It was stabilished # based on Golub, G. H., Van Loan, C. F. (2013). # "Matrix computations". Forth Edition. JHU press., p.165. self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
"""Given a trust radius, return a good initial guess for the damping factor, the lower bound and the upper bound. The values were chosen accordingly to the guidelines on section 7.3.8 (p. 192) from [1]_. """
# Upper bound for the damping factor lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb, self.hess_fro, self.hess_inf))
# Lower bound for the damping factor lambda_lb = max(0, -min(self.hess.diagonal()), self.jac_mag/tr_radius - min(self.hess_gershgorin_ub, self.hess_fro, self.hess_inf))
# Improve bounds with previous info if tr_radius < self.previous_tr_radius: lambda_lb = max(self.lambda_lb, lambda_lb)
# Initial guess for the damping factor if lambda_lb == 0: lambda_initial = 0 else: lambda_initial = max(np.sqrt(lambda_lb * lambda_ub), lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
return lambda_initial, lambda_lb, lambda_ub
"""Solve quadratic subproblem"""
lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius) n = self.dimension hits_boundary = True already_factorized = False self.niter = 0
while True:
# Compute Cholesky factorization if already_factorized: already_factorized = False else: H = self.hess+lambda_current*np.eye(n) U, info = self.cholesky(H, lower=False, overwrite_a=False, clean=True)
self.niter += 1
# Check if factorization succeeded if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO: # Successful factorization
# Solve `U.T U p = s` p = cho_solve((U, False), -self.jac)
p_norm = norm(p)
# Check for interior convergence if p_norm <= tr_radius and lambda_current == 0: hits_boundary = False break
# Solve `U.T w = p` w = solve_triangular(U, p, trans='T')
w_norm = norm(w)
# Compute Newton step accordingly to # formula (4.44) p.87 from ref [2]_. delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius lambda_new = lambda_current + delta_lambda
if p_norm < tr_radius: # Inside boundary s_min, z_min = estimate_smallest_singular_value(U)
ta, tb = self.get_boundaries_intersections(p, z_min, tr_radius)
# Choose `step_len` with the smallest magnitude. # The reason for this choice is explained at # ref [3]_, p. 6 (Immediately before the formula # for `tau`). step_len = min([ta, tb], key=abs)
# Compute the quadratic term (p.T*H*p) quadratic_term = np.dot(p, np.dot(H, p))
# Check stop criteria relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2) if relative_error <= self.k_hard: p += step_len * z_min break
# Update uncertanty bounds lambda_ub = lambda_current lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Compute Cholesky factorization H = self.hess + lambda_new*np.eye(n) c, info = self.cholesky(H, lower=False, overwrite_a=False, clean=True)
# Check if the factorization have succeeded # if info == 0: # Successful factorization # Update damping factor lambda_current = lambda_new already_factorized = True else: # Unsuccessful factorization # Update uncertanty bounds lambda_lb = max(lambda_lb, lambda_new)
# Update damping factor lambda_current = max(np.sqrt(lambda_lb * lambda_ub), lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
else: # Outside boundary # Check stop criteria relative_error = abs(p_norm - tr_radius) / tr_radius if relative_error <= self.k_easy: break
# Update uncertanty bounds lambda_lb = lambda_current
# Update damping factor lambda_current = lambda_new
elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO: # jac_mag very close to zero
# Check for interior convergence if lambda_current == 0: p = np.zeros(n) hits_boundary = False break
s_min, z_min = estimate_smallest_singular_value(U) step_len = tr_radius
# Check stop criteria if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2: p = step_len * z_min break
# Update uncertanty bounds lambda_ub = lambda_current lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Update damping factor lambda_current = max(np.sqrt(lambda_lb * lambda_ub), lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
else: # Unsuccessful factorization
# Compute auxiliary terms delta, v = singular_leading_submatrix(H, U, info) v_norm = norm(v)
# Update uncertanty interval lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
# Update damping factor lambda_current = max(np.sqrt(lambda_lb * lambda_ub), lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
self.lambda_lb = lambda_lb self.lambda_current = lambda_current self.previous_tr_radius = tr_radius
return p, hits_boundary |