1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

""" 

dogleg algorithm with rectangular trust regions for least-squares minimization. 

 

The description of the algorithm can be found in [Voglis]_. The algorithm does 

trust-region iterations, but the shape of trust regions is rectangular as 

opposed to conventional elliptical. The intersection of a trust region and 

an initial feasible region is again some rectangle. Thus on each iteration a 

bound-constrained quadratic optimization problem is solved. 

 

A quadratic problem is solved by well-known dogleg approach, where the 

function is minimized along piecewise-linear "dogleg" path [NumOpt]_, 

Chapter 4. If Jacobian is not rank-deficient then the function is decreasing 

along this path, and optimization amounts to simply following along this 

path as long as a point stays within the bounds. A constrained Cauchy step 

(along the anti-gradient) is considered for safety in rank deficient cases, 

in this situations the convergence might be slow. 

 

If during iterations some variable hit the initial bound and the component 

of anti-gradient points outside the feasible region, then a next dogleg step 

won't make any progress. At this state such variables satisfy first-order 

optimality conditions and they are excluded before computing a next dogleg 

step. 

 

Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense 

Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for 

dense and sparse matrices, or Jacobian being LinearOperator). The second 

option allows to solve very large problems (up to couple of millions of 

residuals on a regular PC), provided the Jacobian matrix is sufficiently 

sparse. But note that dogbox is not very good for solving problems with 

large number of constraints, because of variables exclusion-inclusion on each 

iteration (a required number of function evaluations might be high or accuracy 

of a solution will be poor), thus its large-scale usage is probably limited 

to unconstrained problems. 

 

References 

---------- 

.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg 

Approach for Unconstrained and Bound Constrained Nonlinear 

Optimization", WSEAS International Conference on Applied 

Mathematics, Corfu, Greece, 2004. 

.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition". 

""" 

from __future__ import division, print_function, absolute_import 

 

import numpy as np 

from numpy.linalg import lstsq, norm 

 

from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr 

from scipy.optimize import OptimizeResult 

from scipy._lib.six import string_types 

 

from .common import ( 

step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic, 

build_quadratic_1d, minimize_quadratic_1d, compute_grad, 

compute_jac_scale, check_termination, scale_for_robust_loss_function, 

print_header_nonlinear, print_iteration_nonlinear) 

 

 

def lsmr_operator(Jop, d, active_set): 

"""Compute LinearOperator to use in LSMR by dogbox algorithm. 

 

`active_set` mask is used to excluded active variables from computations 

of matrix-vector products. 

""" 

m, n = Jop.shape 

 

def matvec(x): 

x_free = x.ravel().copy() 

x_free[active_set] = 0 

return Jop.matvec(x * d) 

 

def rmatvec(x): 

r = d * Jop.rmatvec(x) 

r[active_set] = 0 

return r 

 

return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float) 

 

 

def find_intersection(x, tr_bounds, lb, ub): 

"""Find intersection of trust-region bounds and initial bounds. 

 

Returns 

------- 

lb_total, ub_total : ndarray with shape of x 

Lower and upper bounds of the intersection region. 

orig_l, orig_u : ndarray of bool with shape of x 

True means that an original bound is taken as a corresponding bound 

in the intersection region. 

tr_l, tr_u : ndarray of bool with shape of x 

True means that a trust-region bound is taken as a corresponding bound 

in the intersection region. 

""" 

lb_centered = lb - x 

ub_centered = ub - x 

 

lb_total = np.maximum(lb_centered, -tr_bounds) 

ub_total = np.minimum(ub_centered, tr_bounds) 

 

orig_l = np.equal(lb_total, lb_centered) 

orig_u = np.equal(ub_total, ub_centered) 

 

tr_l = np.equal(lb_total, -tr_bounds) 

tr_u = np.equal(ub_total, tr_bounds) 

 

return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u 

 

 

def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub): 

"""Find dogleg step in a rectangular region. 

 

Returns 

------- 

step : ndarray, shape (n,) 

Computed dogleg step. 

bound_hits : ndarray of int, shape (n,) 

Each component shows whether a corresponding variable hits the 

initial bound after the step is taken: 

* 0 - a variable doesn't hit the bound. 

* -1 - lower bound is hit. 

* 1 - upper bound is hit. 

tr_hit : bool 

Whether the step hit the boundary of the trust-region. 

""" 

lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection( 

x, tr_bounds, lb, ub 

) 

bound_hits = np.zeros_like(x, dtype=int) 

 

if in_bounds(newton_step, lb_total, ub_total): 

return newton_step, bound_hits, False 

 

to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total) 

 

# The classical dogleg algorithm would check if Cauchy step fits into 

# the bounds, and just return it constrained version if not. But in a 

# rectangular trust region it makes sense to try to improve constrained 

# Cauchy step too. Thus we don't distinguish these two cases. 

 

cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g 

 

step_diff = newton_step - cauchy_step 

step_size, hits = step_size_to_bound(cauchy_step, step_diff, 

lb_total, ub_total) 

bound_hits[(hits < 0) & orig_l] = -1 

bound_hits[(hits > 0) & orig_u] = 1 

tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u) 

 

return cauchy_step + step_size * step_diff, bound_hits, tr_hit 

 

 

def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, 

loss_function, tr_solver, tr_options, verbose): 

f = f0 

f_true = f.copy() 

nfev = 1 

 

J = J0 

njev = 1 

 

if loss_function is not None: 

rho = loss_function(f) 

cost = 0.5 * np.sum(rho[0]) 

J, f = scale_for_robust_loss_function(J, f, rho) 

else: 

cost = 0.5 * np.dot(f, f) 

 

g = compute_grad(J, f) 

 

jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac' 

if jac_scale: 

scale, scale_inv = compute_jac_scale(J) 

else: 

scale, scale_inv = x_scale, 1 / x_scale 

 

Delta = norm(x0 * scale_inv, ord=np.inf) 

if Delta == 0: 

Delta = 1.0 

 

on_bound = np.zeros_like(x0, dtype=int) 

on_bound[np.equal(x0, lb)] = -1 

on_bound[np.equal(x0, ub)] = 1 

 

x = x0 

step = np.empty_like(x0) 

 

if max_nfev is None: 

max_nfev = x0.size * 100 

 

termination_status = None 

iteration = 0 

step_norm = None 

actual_reduction = None 

 

if verbose == 2: 

print_header_nonlinear() 

 

while True: 

active_set = on_bound * g < 0 

free_set = ~active_set 

 

g_free = g[free_set] 

g_full = g.copy() 

g[active_set] = 0 

 

g_norm = norm(g, ord=np.inf) 

if g_norm < gtol: 

termination_status = 1 

 

if verbose == 2: 

print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, 

step_norm, g_norm) 

 

if termination_status is not None or nfev == max_nfev: 

break 

 

x_free = x[free_set] 

lb_free = lb[free_set] 

ub_free = ub[free_set] 

scale_free = scale[free_set] 

 

# Compute (Gauss-)Newton and build quadratic model for Cauchy step. 

if tr_solver == 'exact': 

J_free = J[:, free_set] 

newton_step = lstsq(J_free, -f, rcond=-1)[0] 

 

# Coefficients for the quadratic model along the anti-gradient. 

a, b = build_quadratic_1d(J_free, g_free, -g_free) 

elif tr_solver == 'lsmr': 

Jop = aslinearoperator(J) 

 

# We compute lsmr step in scaled variables and then 

# transform back to normal variables, if lsmr would give exact lsq 

# solution this would be equivalent to not doing any 

# transformations, but from experience it's better this way. 

 

# We pass active_set to make computations as if we selected 

# the free subset of J columns, but without actually doing any 

# slicing, which is expensive for sparse matrices and impossible 

# for LinearOperator. 

 

lsmr_op = lsmr_operator(Jop, scale, active_set) 

newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set] 

newton_step *= scale_free 

 

# Components of g for active variables were zeroed, so this call 

# is correct and equivalent to using J_free and g_free. 

a, b = build_quadratic_1d(Jop, g, -g) 

 

actual_reduction = -1.0 

while actual_reduction <= 0 and nfev < max_nfev: 

tr_bounds = Delta * scale_free 

 

step_free, on_bound_free, tr_hit = dogleg_step( 

x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free) 

 

step.fill(0.0) 

step[free_set] = step_free 

 

if tr_solver == 'exact': 

predicted_reduction = -evaluate_quadratic(J_free, g_free, 

step_free) 

elif tr_solver == 'lsmr': 

predicted_reduction = -evaluate_quadratic(Jop, g, step) 

 

x_new = x + step 

f_new = fun(x_new) 

nfev += 1 

 

step_h_norm = norm(step * scale_inv, ord=np.inf) 

 

if not np.all(np.isfinite(f_new)): 

Delta = 0.25 * step_h_norm 

continue 

 

# Usual trust-region step quality estimation. 

if loss_function is not None: 

cost_new = loss_function(f_new, cost_only=True) 

else: 

cost_new = 0.5 * np.dot(f_new, f_new) 

actual_reduction = cost - cost_new 

 

Delta, ratio = update_tr_radius( 

Delta, actual_reduction, predicted_reduction, 

step_h_norm, tr_hit 

) 

 

step_norm = norm(step) 

termination_status = check_termination( 

actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) 

 

if termination_status is not None: 

break 

 

if actual_reduction > 0: 

on_bound[free_set] = on_bound_free 

 

x = x_new 

# Set variables exactly at the boundary. 

mask = on_bound == -1 

x[mask] = lb[mask] 

mask = on_bound == 1 

x[mask] = ub[mask] 

 

f = f_new 

f_true = f.copy() 

 

cost = cost_new 

 

J = jac(x, f) 

njev += 1 

 

if loss_function is not None: 

rho = loss_function(f) 

J, f = scale_for_robust_loss_function(J, f, rho) 

 

g = compute_grad(J, f) 

 

if jac_scale: 

scale, scale_inv = compute_jac_scale(J, scale_inv) 

else: 

step_norm = 0 

actual_reduction = 0 

 

iteration += 1 

 

if termination_status is None: 

termination_status = 0 

 

return OptimizeResult( 

x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm, 

active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)