1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

"""Trust-region optimization.""" 

from __future__ import division, print_function, absolute_import 

 

import math 

 

import numpy as np 

import scipy.linalg 

from .optimize import (_check_unknown_options, wrap_function, _status_message, 

OptimizeResult) 

 

__all__ = [] 

 

 

class BaseQuadraticSubproblem(object): 

""" 

Base/abstract class defining the quadratic model for trust-region 

minimization. Child classes must implement the ``solve`` method. 

 

Values of the objective function, jacobian and hessian (if provided) at 

the current iterate ``x`` are evaluated on demand and then stored as 

attributes ``fun``, ``jac``, ``hess``. 

""" 

 

def __init__(self, x, fun, jac, hess=None, hessp=None): 

self._x = x 

self._f = None 

self._g = None 

self._h = None 

self._g_mag = None 

self._cauchy_point = None 

self._newton_point = None 

self._fun = fun 

self._jac = jac 

self._hess = hess 

self._hessp = hessp 

 

def __call__(self, p): 

return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p)) 

 

@property 

def fun(self): 

"""Value of objective function at current iteration.""" 

if self._f is None: 

self._f = self._fun(self._x) 

return self._f 

 

@property 

def jac(self): 

"""Value of jacobian of objective function at current iteration.""" 

if self._g is None: 

self._g = self._jac(self._x) 

return self._g 

 

@property 

def hess(self): 

"""Value of hessian of objective function at current iteration.""" 

if self._h is None: 

self._h = self._hess(self._x) 

return self._h 

 

def hessp(self, p): 

if self._hessp is not None: 

return self._hessp(self._x, p) 

else: 

return np.dot(self.hess, p) 

 

@property 

def jac_mag(self): 

"""Magniture of jacobian of objective function at current iteration.""" 

if self._g_mag is None: 

self._g_mag = scipy.linalg.norm(self.jac) 

return self._g_mag 

 

def get_boundaries_intersections(self, z, d, trust_radius): 

""" 

Solve the scalar quadratic equation ||z + t d|| == trust_radius. 

This is like a line-sphere intersection. 

Return the two values of t, sorted from low to high. 

""" 

a = np.dot(d, d) 

b = 2 * np.dot(z, d) 

c = np.dot(z, z) - trust_radius**2 

sqrt_discriminant = math.sqrt(b*b - 4*a*c) 

 

# The following calculation is mathematically 

# equivalent to: 

# ta = (-b - sqrt_discriminant) / (2*a) 

# tb = (-b + sqrt_discriminant) / (2*a) 

# but produce smaller round off errors. 

# Look at Matrix Computation p.97 

# for a better justification. 

aux = b + math.copysign(sqrt_discriminant, b) 

ta = -aux / (2*a) 

tb = -2*c / aux 

return sorted([ta, tb]) 

 

def solve(self, trust_radius): 

raise NotImplementedError('The solve method should be implemented by ' 

'the child class') 

 

 

def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None, 

subproblem=None, initial_trust_radius=1.0, 

max_trust_radius=1000.0, eta=0.15, gtol=1e-4, 

maxiter=None, disp=False, return_all=False, 

callback=None, inexact=True, **unknown_options): 

""" 

Minimization of scalar function of one or more variables using a 

trust-region algorithm. 

 

Options for the trust-region algorithm are: 

initial_trust_radius : float 

Initial trust radius. 

max_trust_radius : float 

Never propose steps that are longer than this value. 

eta : float 

Trust region related acceptance stringency for proposed steps. 

gtol : float 

Gradient norm must be less than `gtol` 

before successful termination. 

maxiter : int 

Maximum number of iterations to perform. 

disp : bool 

If True, print convergence message. 

inexact : bool 

Accuracy to solve subproblems. If True requires less nonlinear 

iterations, but more vector products. Only effective for method 

trust-krylov. 

 

This function is called by the `minimize` function. 

It is not supposed to be called directly. 

""" 

_check_unknown_options(unknown_options) 

 

if jac is None: 

raise ValueError('Jacobian is currently required for trust-region ' 

'methods') 

if hess is None and hessp is None: 

raise ValueError('Either the Hessian or the Hessian-vector product ' 

'is currently required for trust-region methods') 

if subproblem is None: 

raise ValueError('A subproblem solving strategy is required for ' 

'trust-region methods') 

if not (0 <= eta < 0.25): 

raise Exception('invalid acceptance stringency') 

if max_trust_radius <= 0: 

raise Exception('the max trust radius must be positive') 

if initial_trust_radius <= 0: 

raise ValueError('the initial trust radius must be positive') 

if initial_trust_radius >= max_trust_radius: 

raise ValueError('the initial trust radius must be less than the ' 

'max trust radius') 

 

# force the initial guess into a nice format 

x0 = np.asarray(x0).flatten() 

 

# Wrap the functions, for a couple reasons. 

# This tracks how many times they have been called 

# and it automatically passes the args. 

nfun, fun = wrap_function(fun, args) 

njac, jac = wrap_function(jac, args) 

nhess, hess = wrap_function(hess, args) 

nhessp, hessp = wrap_function(hessp, args) 

 

# limit the number of iterations 

if maxiter is None: 

maxiter = len(x0)*200 

 

# init the search status 

warnflag = 0 

 

# initialize the search 

trust_radius = initial_trust_radius 

x = x0 

if return_all: 

allvecs = [x] 

m = subproblem(x, fun, jac, hess, hessp) 

k = 0 

 

# search for the function min 

# do not even start if the gradient is small enough 

while m.jac_mag >= gtol: 

 

# Solve the sub-problem. 

# This gives us the proposed step relative to the current position 

# and it tells us whether the proposed step 

# has reached the trust region boundary or not. 

try: 

p, hits_boundary = m.solve(trust_radius) 

except np.linalg.linalg.LinAlgError as e: 

warnflag = 3 

break 

 

# calculate the predicted value at the proposed point 

predicted_value = m(p) 

 

# define the local approximation at the proposed point 

x_proposed = x + p 

m_proposed = subproblem(x_proposed, fun, jac, hess, hessp) 

 

# evaluate the ratio defined in equation (4.4) 

actual_reduction = m.fun - m_proposed.fun 

predicted_reduction = m.fun - predicted_value 

if predicted_reduction <= 0: 

warnflag = 2 

break 

rho = actual_reduction / predicted_reduction 

 

# update the trust radius according to the actual/predicted ratio 

if rho < 0.25: 

trust_radius *= 0.25 

elif rho > 0.75 and hits_boundary: 

trust_radius = min(2*trust_radius, max_trust_radius) 

 

# if the ratio is high enough then accept the proposed step 

if rho > eta: 

x = x_proposed 

m = m_proposed 

 

# append the best guess, call back, increment the iteration count 

if return_all: 

allvecs.append(np.copy(x)) 

if callback is not None: 

callback(np.copy(x)) 

k += 1 

 

# check if the gradient is small enough to stop 

if m.jac_mag < gtol: 

warnflag = 0 

break 

 

# check if we have looked at enough iterations 

if k >= maxiter: 

warnflag = 1 

break 

 

# print some stuff if requested 

status_messages = ( 

_status_message['success'], 

_status_message['maxiter'], 

'A bad approximation caused failure to predict improvement.', 

'A linalg error occurred, such as a non-psd Hessian.', 

) 

if disp: 

if warnflag == 0: 

print(status_messages[warnflag]) 

else: 

print('Warning: ' + status_messages[warnflag]) 

print(" Current function value: %f" % m.fun) 

print(" Iterations: %d" % k) 

print(" Function evaluations: %d" % nfun[0]) 

print(" Gradient evaluations: %d" % njac[0]) 

print(" Hessian evaluations: %d" % nhess[0]) 

 

result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag, 

fun=m.fun, jac=m.jac, nfev=nfun[0], njev=njac[0], 

nhev=nhess[0], nit=k, 

message=status_messages[warnflag]) 

 

if hess is not None: 

result['hess'] = m.hess 

 

if return_all: 

result['allvecs'] = allvecs 

 

return result