1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

"""Nearly exact trust-region optimization subproblem.""" 

from __future__ import division, print_function, absolute_import 

 

import numpy as np 

from scipy.linalg import (norm, get_lapack_funcs, solve_triangular, 

cho_solve) 

from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem) 

 

__all__ = ['_minimize_trustregion_exact', 

'estimate_smallest_singular_value', 

'singular_leading_submatrix', 

'IterativeSubproblem'] 

 

 

def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None, 

**trust_region_options): 

""" 

Minimization of scalar function of one or more variables using 

a nearly exact trust-region algorithm. 

 

Options 

------- 

initial_tr_radius : float 

Initial trust-region radius. 

max_tr_radius : float 

Maximum value of the trust-region radius. No steps that are longer 

than this value will be proposed. 

eta : float 

Trust region related acceptance stringency for proposed steps. 

gtol : float 

Gradient norm must be less than ``gtol`` before successful 

termination. 

""" 

 

if jac is None: 

raise ValueError('Jacobian is required for trust region ' 

'exact minimization.') 

if hess is None: 

raise ValueError('Hessian matrix is required for trust region ' 

'exact minimization.') 

return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess, 

subproblem=IterativeSubproblem, 

**trust_region_options) 

 

 

def estimate_smallest_singular_value(U): 

"""Given upper triangular matrix ``U`` estimate the smallest singular 

value and the correspondent right singular vector in O(n**2) operations. 

 

Parameters 

---------- 

U : ndarray 

Square upper triangular matrix. 

 

Returns 

------- 

s_min : float 

Estimated smallest singular value of the provided matrix. 

z_min : ndarray 

Estimatied right singular vector. 

 

Notes 

----- 

The procedure is based on [1]_ and is done in two steps. First it finds 

a vector ``e`` with components selected from {+1, -1} such that the 

solution ``w`` from the system ``U.T w = e`` is as large as possible. 

Next it estimate ``U v = w``. The smallest singular value is close 

to ``norm(w)/norm(v)`` and the right singular vector is close 

to ``v/norm(v)``. 

 

The estimation will be better more ill-conditioned is the matrix. 

 

References 

---------- 

.. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H. 

An estimate for the condition number of a matrix. 1979. 

SIAM Journal on Numerical Analysis, 16(2), 368-375. 

""" 

 

U = np.atleast_2d(U) 

m, n = U.shape 

 

if m != n: 

raise ValueError("A square triangular matrix should be provided.") 

 

# A vector `e` with components selected from {+1, -1} 

# is selected so that the solution `w` to the system 

# `U.T w = e` is as large as possible. Implementation 

# based on algorithm 3.5.1, p. 142, from reference [2] 

# adapted for lower triangular matrix. 

 

p = np.zeros(n) 

w = np.empty(n) 

 

# Implemented according to: Golub, G. H., Van Loan, C. F. (2013). 

# "Matrix computations". Forth Edition. JHU press. pp. 140-142. 

for k in range(n): 

wp = (1-p[k]) / U.T[k, k] 

wm = (-1-p[k]) / U.T[k, k] 

pp = p[k+1:] + U.T[k+1:, k]*wp 

pm = p[k+1:] + U.T[k+1:, k]*wm 

 

if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1): 

w[k] = wp 

p[k+1:] = pp 

else: 

w[k] = wm 

p[k+1:] = pm 

 

# The system `U v = w` is solved using backward substitution. 

v = solve_triangular(U, w) 

 

v_norm = norm(v) 

w_norm = norm(w) 

 

# Smallest singular value 

s_min = w_norm / v_norm 

 

# Associated vector 

z_min = v / v_norm 

 

return s_min, z_min 

 

 

def gershgorin_bounds(H): 

""" 

Given a square matrix ``H`` compute upper 

and lower bounds for its eigenvalues (Gregoshgorin Bounds). 

Defined ref. [1]. 

 

References 

---------- 

.. [1] Conn, A. R., Gould, N. I., & Toint, P. L. 

Trust region methods. 2000. Siam. pp. 19. 

""" 

 

H_diag = np.diag(H) 

H_diag_abs = np.abs(H_diag) 

H_row_sums = np.sum(np.abs(H), axis=1) 

lb = np.min(H_diag + H_diag_abs - H_row_sums) 

ub = np.max(H_diag - H_diag_abs + H_row_sums) 

 

return lb, ub 

 

 

def singular_leading_submatrix(A, U, k): 

""" 

Compute term that makes the leading ``k`` by ``k`` 

submatrix from ``A`` singular. 

 

Parameters 

---------- 

A : ndarray 

Symmetric matrix that is not positive definite. 

U : ndarray 

Upper triangular matrix resulting of an incomplete 

Cholesky decomposition of matrix ``A``. 

k : int 

Positive integer such that the leading k by k submatrix from 

`A` is the first non-positive definite leading submatrix. 

 

Returns 

------- 

delta : float 

Amount that should be added to the element (k, k) of the 

leading k by k submatrix of ``A`` to make it singular. 

v : ndarray 

A vector such that ``v.T B v = 0``. Where B is the matrix A after 

``delta`` is added to its element (k, k). 

""" 

 

# Compute delta 

delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1] 

 

n = len(A) 

 

# Inicialize v 

v = np.zeros(n) 

v[k-1] = 1 

 

# Compute the remaining values of v by solving a triangular system. 

if k != 1: 

v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1]) 

 

return delta, v 

 

 

class IterativeSubproblem(BaseQuadraticSubproblem): 

"""Quadratic subproblem solved by nearly exact iterative method. 

 

Notes 

----- 

This subproblem solver was based on [1]_, [2]_ and [3]_, 

which implement similar algorithms. The algorithm is basically 

that of [1]_ but ideas from [2]_ and [3]_ were also used. 

 

References 

---------- 

.. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods", 

Siam, pp. 169-200, 2000. 

.. [2] J. Nocedal and S. Wright, "Numerical optimization", 

Springer Science & Business Media. pp. 83-91, 2006. 

.. [3] J.J. More and D.C. Sorensen, "Computing a trust region step", 

SIAM Journal on Scientific and Statistical Computing, vol. 4(3), 

pp. 553-572, 1983. 

""" 

 

# UPDATE_COEFF appears in reference [1]_ 

# in formula 7.3.14 (p. 190) named as "theta". 

# As recommended there it value is fixed in 0.01. 

UPDATE_COEFF = 0.01 

 

EPS = np.finfo(float).eps 

 

def __init__(self, x, fun, jac, hess, hessp=None, 

k_easy=0.1, k_hard=0.2): 

 

super(IterativeSubproblem, self).__init__(x, fun, jac, hess) 

 

# When the trust-region shrinks in two consecutive 

# calculations (``tr_radius < previous_tr_radius``) 

# the lower bound ``lambda_lb`` may be reused, 

# facilitating the convergence. To indicate no 

# previous value is known at first ``previous_tr_radius`` 

# is set to -1 and ``lambda_lb`` to None. 

self.previous_tr_radius = -1 

self.lambda_lb = None 

 

self.niter = 0 

 

# ``k_easy`` and ``k_hard`` are parameters used 

# to determine the stop criteria to the iterative 

# subproblem solver. Take a look at pp. 194-197 

# from reference _[1] for a more detailed description. 

self.k_easy = k_easy 

self.k_hard = k_hard 

 

# Get Lapack function for cholesky decomposition. 

# The implemented Scipy wrapper does not return 

# the incomplete factorization needed by the method. 

self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,)) 

 

# Get info about Hessian 

self.dimension = len(self.hess) 

self.hess_gershgorin_lb,\ 

self.hess_gershgorin_ub = gershgorin_bounds(self.hess) 

self.hess_inf = norm(self.hess, np.Inf) 

self.hess_fro = norm(self.hess, 'fro') 

 

# A constant such that for vectors smaler than that 

# backward substituition is not reliable. It was stabilished 

# based on Golub, G. H., Van Loan, C. F. (2013). 

# "Matrix computations". Forth Edition. JHU press., p.165. 

self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf 

 

def _initial_values(self, tr_radius): 

"""Given a trust radius, return a good initial guess for 

the damping factor, the lower bound and the upper bound. 

The values were chosen accordingly to the guidelines on 

section 7.3.8 (p. 192) from [1]_. 

""" 

 

# Upper bound for the damping factor 

lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb, 

self.hess_fro, 

self.hess_inf)) 

 

# Lower bound for the damping factor 

lambda_lb = max(0, -min(self.hess.diagonal()), 

self.jac_mag/tr_radius - min(self.hess_gershgorin_ub, 

self.hess_fro, 

self.hess_inf)) 

 

# Improve bounds with previous info 

if tr_radius < self.previous_tr_radius: 

lambda_lb = max(self.lambda_lb, lambda_lb) 

 

# Initial guess for the damping factor 

if lambda_lb == 0: 

lambda_initial = 0 

else: 

lambda_initial = max(np.sqrt(lambda_lb * lambda_ub), 

lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

 

return lambda_initial, lambda_lb, lambda_ub 

 

def solve(self, tr_radius): 

"""Solve quadratic subproblem""" 

 

lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius) 

n = self.dimension 

hits_boundary = True 

already_factorized = False 

self.niter = 0 

 

while True: 

 

# Compute Cholesky factorization 

if already_factorized: 

already_factorized = False 

else: 

H = self.hess+lambda_current*np.eye(n) 

U, info = self.cholesky(H, lower=False, 

overwrite_a=False, 

clean=True) 

 

self.niter += 1 

 

# Check if factorization succeeded 

if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO: 

# Successful factorization 

 

# Solve `U.T U p = s` 

p = cho_solve((U, False), -self.jac) 

 

p_norm = norm(p) 

 

# Check for interior convergence 

if p_norm <= tr_radius and lambda_current == 0: 

hits_boundary = False 

break 

 

# Solve `U.T w = p` 

w = solve_triangular(U, p, trans='T') 

 

w_norm = norm(w) 

 

# Compute Newton step accordingly to 

# formula (4.44) p.87 from ref [2]_. 

delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius 

lambda_new = lambda_current + delta_lambda 

 

if p_norm < tr_radius: # Inside boundary 

s_min, z_min = estimate_smallest_singular_value(U) 

 

ta, tb = self.get_boundaries_intersections(p, z_min, 

tr_radius) 

 

# Choose `step_len` with the smallest magnitude. 

# The reason for this choice is explained at 

# ref [3]_, p. 6 (Immediately before the formula 

# for `tau`). 

step_len = min([ta, tb], key=abs) 

 

# Compute the quadratic term (p.T*H*p) 

quadratic_term = np.dot(p, np.dot(H, p)) 

 

# Check stop criteria 

relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2) 

if relative_error <= self.k_hard: 

p += step_len * z_min 

break 

 

# Update uncertanty bounds 

lambda_ub = lambda_current 

lambda_lb = max(lambda_lb, lambda_current - s_min**2) 

 

# Compute Cholesky factorization 

H = self.hess + lambda_new*np.eye(n) 

c, info = self.cholesky(H, lower=False, 

overwrite_a=False, 

clean=True) 

 

# Check if the factorization have succeeded 

# 

if info == 0: # Successful factorization 

# Update damping factor 

lambda_current = lambda_new 

already_factorized = True 

else: # Unsuccessful factorization 

# Update uncertanty bounds 

lambda_lb = max(lambda_lb, lambda_new) 

 

# Update damping factor 

lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

 

else: # Outside boundary 

# Check stop criteria 

relative_error = abs(p_norm - tr_radius) / tr_radius 

if relative_error <= self.k_easy: 

break 

 

# Update uncertanty bounds 

lambda_lb = lambda_current 

 

# Update damping factor 

lambda_current = lambda_new 

 

elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO: 

# jac_mag very close to zero 

 

# Check for interior convergence 

if lambda_current == 0: 

p = np.zeros(n) 

hits_boundary = False 

break 

 

s_min, z_min = estimate_smallest_singular_value(U) 

step_len = tr_radius 

 

# Check stop criteria 

if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2: 

p = step_len * z_min 

break 

 

# Update uncertanty bounds 

lambda_ub = lambda_current 

lambda_lb = max(lambda_lb, lambda_current - s_min**2) 

 

# Update damping factor 

lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

 

else: # Unsuccessful factorization 

 

# Compute auxiliary terms 

delta, v = singular_leading_submatrix(H, U, info) 

v_norm = norm(v) 

 

# Update uncertanty interval 

lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2) 

 

# Update damping factor 

lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

 

self.lambda_lb = lambda_lb 

self.lambda_current = lambda_current 

self.previous_tr_radius = tr_radius 

 

return p, hits_boundary