Coverage for /usr/local/lib/python3.11/dist-packages/grond/optimisers/highscore/optimiser.py: 82%

499 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-26 16:25 +0000

1from __future__ import print_function 

2import math 

3import os.path as op 

4import os 

5import logging 

6import time 

7import numpy as num 

8from collections import OrderedDict 

9 

10from pyrocko.guts import StringChoice, Int, Float, Object, List, load_all 

11from pyrocko.guts_array import Array 

12 

13from grond.meta import GrondError, Forbidden, has_get_plot_classes, Path 

14from grond import problems 

15from grond.problems.base import ModelHistory 

16from grond.optimisers.base import Optimiser, OptimiserConfig, BadProblem, \ 

17 OptimiserStatus 

18 

19guts_prefix = 'grond' 

20 

21logger = logging.getLogger('grond.optimisers.highscore.optimiser') 

22 

23 

24def nextpow2(i): 

25 return 2**int(math.ceil(math.log(i)/math.log(2.))) 

26 

27 

28def excentricity_compensated_probabilities(xs, sbx, factor): 

29 inonflat = num.where(sbx != 0.0)[0] 

30 scale = num.zeros_like(sbx) 

31 scale[inonflat] = 1.0 / (sbx[inonflat] * (factor if factor != 0. else 1.0)) 

32 distances_sqr_all = num.sum( 

33 ((xs[num.newaxis, :, :] - xs[:, num.newaxis, :]) * 

34 scale[num.newaxis, num.newaxis, :])**2, axis=2) 

35 probabilities = 1.0 / num.sum(distances_sqr_all < 1.0, axis=1) 

36 # print(num.sort(num.sum(distances_sqr_all < 1.0, axis=1))) 

37 probabilities /= num.sum(probabilities) 

38 return probabilities 

39 

40 

41def excentricity_compensated_choice(xs, sbx, factor, rstate): 

42 probabilities = excentricity_compensated_probabilities( 

43 xs, sbx, factor) 

44 r = rstate.random_sample() 

45 ichoice = num.searchsorted(num.cumsum(probabilities), r) 

46 ichoice = min(ichoice, xs.shape[0]-1) 

47 return ichoice 

48 

49 

50def local_std(xs): 

51 ssbx = num.sort(xs, axis=0) 

52 dssbx = num.diff(ssbx, axis=0) 

53 mdssbx = num.median(dssbx, axis=0) 

54 return mdssbx * dssbx.shape[0] / 2.6 

55 

56 

57class SamplerDistributionChoice(StringChoice): 

58 choices = ['multivariate_normal', 'normal'] 

59 

60 

61class StandardDeviationEstimatorChoice(StringChoice): 

62 choices = [ 

63 'median_density_single_chain', 

64 'standard_deviation_all_chains', 

65 'standard_deviation_single_chain'] 

66 

67 

68class SamplerStartingPointChoice(StringChoice): 

69 choices = ['excentricity_compensated', 'random', 'mean'] 

70 

71 

72class BootstrapTypeChoice(StringChoice): 

73 choices = ['bayesian', 'classic'] 

74 

75 

76def fnone(i): 

77 return i if i is not None else -1 

78 

79 

80class Sample(Object): 

81 

82 '''Sample model with context about how it was generated.''' 

83 

84 model = Array.T(shape=(None,), dtype=float, serialize_as='list') 

85 iphase = Int.T(optional=True) 

86 ichain_base = Int.T(optional=True) 

87 ilink_base = Int.T(optional=True) 

88 imodel_base = Int.T(optional=True) 

89 

90 def preconstrain(self, problem, optimiser=None): 

91 self.model = problem.preconstrain(self.model, optimiser) 

92 

93 def pack_context(self): 

94 i = num.zeros(4, dtype=int) 

95 i[:] = ( 

96 fnone(self.iphase), 

97 fnone(self.ichain_base), 

98 fnone(self.ilink_base), 

99 fnone(self.imodel_base)) 

100 

101 return i 

102 

103 

104class SamplerPhase(Object): 

105 niterations = Int.T( 

106 help='Number of iteration for this phase.') 

107 ntries_preconstrain_limit = Int.T( 

108 default=1000, 

109 help='Tries to find a valid preconstrained sample.') 

110 seed = Int.T( 

111 optional=True, 

112 help='Random state seed.') 

113 

114 def __init__(self, *args, **kwargs): 

115 Object.__init__(self, *args, **kwargs) 

116 self._rstate = None 

117 self.optimiser = None 

118 

119 def get_rstate(self, problem): 

120 if self._rstate is None: 

121 self._rstate = problem.get_rstate_manager().get_rstate( 

122 self.__class__.__name__, self.seed) 

123 

124 return self._rstate 

125 

126 def get_raw_sample(self, problem, iiter, chains): 

127 raise NotImplementedError 

128 

129 def get_sample(self, problem, iiter, chains): 

130 assert 0 <= iiter < self.niterations 

131 

132 ntries_preconstrain = 0 

133 for ntries_preconstrain in range(self.ntries_preconstrain_limit): 

134 try: 

135 sample = self.get_raw_sample(problem, iiter, chains) 

136 sample.preconstrain(problem, self.optimiser) 

137 return sample 

138 

139 except Forbidden: 

140 pass 

141 

142 raise GrondError( 

143 'could not find any suitable candidate sample within %i tries' % ( 

144 self.ntries_preconstrain_limit)) 

145 

146 def set_optimiser(self, optimiser): 

147 self.optimiser = optimiser 

148 

149 

150class InjectionError(Exception): 

151 pass 

152 

153 

154class InjectionSamplerPhase(SamplerPhase): 

155 '''Inject predefined/precomputed models into the optimisation 

156 

157 Injected models are given either as an array or are generated from 

158 sources/events given in a file. Depending on the problem different sources 

159 or events can be used: 

160 

161 * :py:class:`grond.problems.cmt.problem.CMTProblem` uses as input: 

162 * :py:class:`pyrocko.model.event.Event` with 

163 :py:class:`pyrocko.moment_tensor.MomentTensor`, 

164 * :py:class:`pyrocko.gf.seismosizer.DCSource`, 

165 * :py:class:`pyrocko.gf.seismosizer.MTSource`. 

166 * :py:class:`grond.problems.double_dc.problem.DoubleDCProblem` uses as 

167 input: 

168 * :py:class:`pyrocko.gf.seismosizer.DoubleDCSource`. 

169 * :py:class:`grond.problems.vlvd.problem.VLVDProblem` uses as input: 

170 * :py:class:`pyrocko.gf.seismosizer.VLVDSource`. 

171 * :py:class:`grond.problems.rectangular.problem.RectangularProblem` uses as 

172 input: 

173 * :py:class:`pyrocko.gf.seismosizer.RectangularSource`. 

174 * :py:class:`grond.problems.dynamic_rupture.problem.DynamicRuptureProblem` 

175 uses as input: 

176 * :py:class:`pyrocko.gf.seismosizer.PseudoDynamicRupture`. 

177 

178 Only a single source or event file can be handled in one 

179 :py:class:`grond.optimisers.highscore.optimiser.InjectionSamplerPhase`. The 

180 number of iterations is adjusted according to the number of sources or 

181 events found. 

182 ''' 

183 xs_inject = Array.T( 

184 optional=True, 

185 dtype=float, shape=(None, None), 

186 help='Array with the injected models.') 

187 sources_path = Path.T( 

188 optional=True, 

189 help='File with sources to be injected as models') 

190 events_path = Path.T( 

191 optional=True, 

192 help='File with events to be injected as models') 

193 

194 def _xs_inject_from_sources(self, problem): 

195 sources = load_all(filename=self.sources_path) 

196 

197 if len(sources) == 0: 

198 raise InjectionError('No sources found in the given sources_path.') 

199 

200 if num.unique([s.T.classname for s in sources]).shape[0] > 1: 

201 raise InjectionError( 

202 'Sources are not of same type in the given file.') 

203 

204 return num.array([problem.source_to_x(s) for s in sources]) 

205 

206 def _xs_inject_from_events(self, problem): 

207 from pyrocko import model 

208 

209 if not hasattr(problem, 'event_to_x'): 

210 raise InjectionError( 

211 'Events can only be injected using the %s. ' 

212 'Try injecting as sources defining the "sources_path" ' 

213 'attribute instead.' % ( 

214 problems.CMTProblem.T.classname)) 

215 

216 events = model.load_events(filename=self.events_path) 

217 

218 if len(events) == 0: 

219 raise InjectionError('No events found in the given events_path.') 

220 

221 events = [ev for ev in events if ev.moment_tensor is not None] 

222 n_ev = len(events) 

223 

224 if n_ev == 0: 

225 raise InjectionError( 

226 'The loaded events have no moment tensor information.') 

227 

228 return num.array([problem.event_to_x(e) for e in events]) 

229 

230 def xs_inject_from_file(self, problem): 

231 if self.sources_path and self.events_path: 

232 raise AttributeError( 

233 'Sources_path and events_path are both given but mutually ' 

234 'exclusive.') 

235 elif self.sources_path: 

236 xs_inject = self._xs_inject_from_sources(problem) 

237 elif self.events_path: 

238 xs_inject = self._xs_inject_from_events(problem) 

239 else: 

240 raise IndexError( 

241 'No event/source file found to generate xs_inject array from.') 

242 

243 if xs_inject.shape[0] != self.niterations: 

244 logger.warning( 

245 'Number of injected models (%i) not equal to the number of ' 

246 'iterations (%i) and is adjusted accordingly.' % ( 

247 xs_inject.shape[0], self.niterations)) 

248 self.niterations = xs_inject.shape[0] 

249 

250 self.xs_inject = xs_inject 

251 

252 def get_raw_sample(self, problem, iiter, chains): 

253 if self.xs_inject is None: 

254 self.xs_inject_from_file(problem) 

255 

256 return Sample(model=self.xs_inject[iiter, :]) 

257 

258 

259class UniformSamplerPhase(SamplerPhase): 

260 

261 def get_raw_sample(self, problem, iiter, chains): 

262 xbounds = problem.get_parameter_bounds() 

263 return Sample( 

264 model=problem.random_uniform(xbounds, self.get_rstate(problem))) 

265 

266 

267class DirectedSamplerPhase(SamplerPhase): 

268 scatter_scale = Float.T( 

269 optional=True, 

270 help='Scales search radius around the current `highscore` models') 

271 scatter_scale_begin = Float.T( 

272 optional=True, 

273 help='Scaling factor at beginning of the phase.') 

274 scatter_scale_end = Float.T( 

275 optional=True, 

276 help='Scaling factor at the end of the directed phase.') 

277 starting_point = SamplerStartingPointChoice.T( 

278 default='excentricity_compensated', 

279 help='Tunes to the center value of the sampler distribution.' 

280 'May increase the likelihood to draw a highscore member model' 

281 ' off-center to the mean value') 

282 

283 sampler_distribution = SamplerDistributionChoice.T( 

284 default='normal', 

285 help='Distribution new models are drawn from.') 

286 

287 standard_deviation_estimator = StandardDeviationEstimatorChoice.T( 

288 default='median_density_single_chain') 

289 

290 ntries_sample_limit = Int.T(default=1000) 

291 

292 def get_scatter_scale_factor(self, iiter): 

293 s = self.scatter_scale 

294 sa = self.scatter_scale_begin 

295 sb = self.scatter_scale_end 

296 

297 assert s is None or (sa is None and sb is None) 

298 

299 if sa != sb: 

300 tb = float(self.niterations-1) 

301 tau = tb/(math.log(sa) - math.log(sb)) 

302 t0 = math.log(sa) * tau 

303 t = float(iiter) 

304 return num.exp(-(t-t0) / tau) 

305 

306 else: 

307 return s or 1.0 

308 

309 def get_raw_sample(self, problem, iiter, chains): 

310 rstate = self.get_rstate(problem) 

311 factor = self.get_scatter_scale_factor(iiter) 

312 npar = problem.nparameters 

313 pnames = problem.parameter_names 

314 xbounds = problem.get_parameter_bounds() 

315 

316 ilink_choice = None 

317 ichain_choice = num.argmin(chains.accept_sum) 

318 

319 if self.starting_point == 'excentricity_compensated': 

320 models = chains.models(ichain_choice) 

321 ilink_choice = excentricity_compensated_choice( 

322 models, 

323 chains.standard_deviation_models( 

324 ichain_choice, self.standard_deviation_estimator), 

325 2., rstate) 

326 

327 xchoice = chains.model(ichain_choice, ilink_choice) 

328 

329 elif self.starting_point == 'random': 

330 ilink_choice = rstate.randint(0, chains.nlinks) 

331 xchoice = chains.model(ichain_choice, ilink_choice) 

332 

333 elif self.starting_point == 'mean': 

334 xchoice = chains.mean_model(ichain_choice) 

335 

336 else: 

337 assert False, 'invalid starting_point choice: %s' % ( 

338 self.starting_point) 

339 

340 ntries_sample = 0 

341 if self.sampler_distribution == 'normal': 

342 x = num.zeros(npar, dtype=float) 

343 sx = chains.standard_deviation_models( 

344 ichain_choice, self.standard_deviation_estimator) 

345 

346 for ipar in range(npar): 

347 ntries = 0 

348 while True: 

349 if sx[ipar] > 0.: 

350 v = rstate.normal( 

351 xchoice[ipar], 

352 factor*sx[ipar]) 

353 else: 

354 v = xchoice[ipar] 

355 

356 if xbounds[ipar, 0] <= v and \ 

357 v <= xbounds[ipar, 1]: 

358 

359 break 

360 

361 if ntries > self.ntries_sample_limit: 

362 logger.warning( 

363 'failed to produce a suitable ' 

364 'candidate sample from normal ' 

365 'distribution for parameter \'%s\'' 

366 '- drawing from uniform instead.' % 

367 pnames[ipar]) 

368 v = rstate.uniform(xbounds[ipar, 0], 

369 xbounds[ipar, 1]) 

370 break 

371 

372 ntries += 1 

373 

374 x[ipar] = v 

375 

376 elif self.sampler_distribution == 'multivariate_normal': 

377 ok_mask_sum = num.zeros(npar, dtype=int) 

378 while True: 

379 ntries_sample += 1 

380 xcandi = rstate.multivariate_normal( 

381 xchoice, factor**2 * chains.cov(ichain_choice)) 

382 

383 ok_mask = num.logical_and( 

384 xbounds[:, 0] <= xcandi, xcandi <= xbounds[:, 1]) 

385 

386 if num.all(ok_mask): 

387 break 

388 

389 ok_mask_sum += ok_mask 

390 

391 if ntries_sample > self.ntries_sample_limit: 

392 logger.warning( 

393 'failed to produce a suitable candidate ' 

394 'sample from multivariate normal ' 

395 'distribution, (%s) - drawing from uniform instead' % 

396 ', '.join('%s:%i' % xx for xx in 

397 zip(pnames, ok_mask_sum))) 

398 xbounds = problem.get_parameter_bounds() 

399 xcandi = problem.random_uniform(xbounds, rstate) 

400 break 

401 

402 x = xcandi 

403 

404 imodel_base = None 

405 if ilink_choice is not None: 

406 imodel_base = chains.imodel(ichain_choice, ilink_choice) 

407 

408 return Sample( 

409 model=x, 

410 ichain_base=ichain_choice, 

411 ilink_base=ilink_choice, 

412 imodel_base=imodel_base) 

413 

414 

415def make_bayesian_weights(nbootstrap, nmisfits, 

416 type='bayesian', rstate=None): 

417 ws = num.zeros((nbootstrap, nmisfits)) 

418 if rstate is None: 

419 rstate = num.random.RandomState() 

420 

421 for ibootstrap in range(nbootstrap): 

422 if type == 'classic': 

423 ii = rstate.randint(0, nmisfits, size=nmisfits) 

424 ws[ibootstrap, :] = num.histogram( 

425 ii, nmisfits, (-0.5, nmisfits - 0.5))[0] 

426 elif type == 'bayesian': 

427 f = rstate.uniform(0., 1., size=nmisfits+1) 

428 f[0] = 0. 

429 f[-1] = 1. 

430 f = num.sort(f) 

431 g = f[1:] - f[:-1] 

432 ws[ibootstrap, :] = g * nmisfits 

433 else: 

434 assert False 

435 return ws 

436 

437 

438class Chains(object): 

439 def __init__( 

440 self, problem, history, nchains, nlinks_cap): 

441 

442 self.problem = problem 

443 self.history = history 

444 self.nchains = nchains 

445 self.nlinks_cap = nlinks_cap 

446 self.chains_m = num.zeros( 

447 (self.nchains, nlinks_cap), dtype=float) 

448 self.chains_i = num.zeros( 

449 (self.nchains, nlinks_cap), dtype=int) 

450 self.nlinks = 0 

451 self.nread = 0 

452 

453 self.accept_sum = num.zeros(self.nchains, dtype=int) 

454 self._acceptance_history = num.zeros( 

455 (self.nchains, 1024), dtype=bool) 

456 

457 history.add_listener(self) 

458 

459 def goto(self, n=None): 

460 if n is None: 

461 n = self.history.nmodels 

462 

463 n = min(self.history.nmodels, n) 

464 

465 assert self.nread <= n 

466 

467 while self.nread < n: 

468 nread = self.nread 

469 gbms = self.history.bootstrap_misfits[nread, :] 

470 

471 self.chains_m[:, self.nlinks] = gbms 

472 self.chains_i[:, self.nlinks] = nread 

473 nbootstrap = self.chains_m.shape[0] 

474 

475 self.nlinks += 1 

476 chains_m = self.chains_m 

477 chains_i = self.chains_i 

478 

479 for ichain in range(nbootstrap): 

480 isort = num.argsort(chains_m[ichain, :self.nlinks]) 

481 chains_m[ichain, :self.nlinks] = chains_m[ichain, isort] 

482 chains_i[ichain, :self.nlinks] = chains_i[ichain, isort] 

483 

484 if self.nlinks == self.nlinks_cap: 

485 accept = (chains_i[:, self.nlinks_cap-1] != nread) \ 

486 .astype(bool) 

487 self.nlinks -= 1 

488 else: 

489 accept = num.ones(self.nchains, dtype=bool) 

490 

491 self._append_acceptance(accept) 

492 self.accept_sum += accept 

493 self.nread += 1 

494 

495 def load(self): 

496 return self.goto() 

497 

498 def extend(self, ioffset, n, models, misfits, sampler_contexts): 

499 self.goto(ioffset + n) 

500 

501 def indices(self, ichain): 

502 if ichain is not None: 

503 return self.chains_i[ichain, :self.nlinks] 

504 else: 

505 return self.chains_i[:, :self.nlinks].ravel() 

506 

507 def models(self, ichain=None): 

508 return self.history.models[self.indices(ichain), :] 

509 

510 def model(self, ichain, ilink): 

511 return self.history.models[self.chains_i[ichain, ilink], :] 

512 

513 def imodel(self, ichain, ilink): 

514 return self.chains_i[ichain, ilink] 

515 

516 def misfits(self, ichain=0): 

517 return self.chains_m[ichain, :self.nlinks] 

518 

519 def misfit(self, ichain, ilink): 

520 assert ilink < self.nlinks 

521 return self.chains_m[ichain, ilink] 

522 

523 def mean_model(self, ichain=None): 

524 xs = self.models(ichain) 

525 return num.mean(xs, axis=0) 

526 

527 def best_model(self, ichain=0): 

528 xs = self.models(ichain) 

529 return xs[0] 

530 

531 def best_model_misfit(self, ichain=0): 

532 return self.chains_m[ichain, 0] 

533 

534 def standard_deviation_models(self, ichain, estimator): 

535 if estimator == 'median_density_single_chain': 

536 xs = self.models(ichain) 

537 return local_std(xs) 

538 elif estimator == 'standard_deviation_all_chains': 

539 bxs = self.models() 

540 return num.std(bxs, axis=0) 

541 elif estimator == 'standard_deviation_single_chain': 

542 xs = self.models(ichain) 

543 return num.std(xs, axis=0) 

544 else: 

545 assert False, 'invalid standard_deviation_estimator choice' 

546 

547 def covariance_models(self, ichain): 

548 xs = self.models(ichain) 

549 return num.cov(xs.T) 

550 

551 @property 

552 def acceptance_history(self): 

553 return self._acceptance_history[:, :self.nread] 

554 

555 def _append_acceptance(self, acceptance): 

556 if self.nread >= self._acceptance_history.shape[1]: 

557 new_buf = num.zeros( 

558 (self.nchains, nextpow2(self.nread+1)), dtype=bool) 

559 new_buf[:, :self._acceptance_history.shape[1]] = \ 

560 self._acceptance_history 

561 self._acceptance_history = new_buf 

562 self._acceptance_history[:, self.nread] = acceptance 

563 

564 

565@has_get_plot_classes 

566class HighScoreOptimiser(Optimiser): 

567 '''Monte-Carlo-based directed search optimisation with bootstrap.''' 

568 

569 sampler_phases = List.T(SamplerPhase.T()) 

570 chain_length_factor = Float.T(default=8.) 

571 nbootstrap = Int.T(default=100) 

572 bootstrap_type = BootstrapTypeChoice.T(default='bayesian') 

573 bootstrap_seed = Int.T(default=23) 

574 

575 SPARKS = u'\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588' 

576 ACCEPTANCE_AVG_LEN = 100 

577 

578 def __init__(self, **kwargs): 

579 Optimiser.__init__(self, **kwargs) 

580 self._bootstrap_weights = None 

581 self._bootstrap_residuals = None 

582 self._correlated_weights = None 

583 self._status_chains = None 

584 self._rstate_bootstrap = None 

585 for phase in self.sampler_phases: 

586 phase.set_optimiser(self) 

587 

588 def get_rstate_bootstrap(self, problem): 

589 if self._rstate_bootstrap is None: 

590 self._rstate_bootstrap = problem.get_rstate_manager().get_rstate( 

591 'bootstraps', self.bootstrap_seed) 

592 

593 return self._rstate_bootstrap 

594 

595 def init_bootstraps(self, problem): 

596 self.init_bootstrap_weights(problem) 

597 self.init_bootstrap_residuals(problem) 

598 

599 def init_bootstrap_weights(self, problem): 

600 logger.info('Initializing Bayesian bootstrap weights.') 

601 

602 nmisfits_w = sum( 

603 t.nmisfits for t in problem.targets if t.can_bootstrap_weights) 

604 

605 ws = make_bayesian_weights( 

606 self.nbootstrap, 

607 nmisfits=nmisfits_w, 

608 rstate=self.get_rstate_bootstrap(problem)) 

609 

610 imf = 0 

611 for t in problem.targets: 

612 if t.can_bootstrap_weights: 

613 t.set_bootstrap_weights(ws[:, imf:imf+t.nmisfits]) 

614 imf += t.nmisfits 

615 else: 

616 t.set_bootstrap_weights( 

617 num.ones((self.nbootstrap, t.nmisfits))) 

618 

619 def init_bootstrap_residuals(self, problem): 

620 logger.info('Initializing Bayesian bootstrap residuals.') 

621 

622 for t in problem.targets: 

623 if t.can_bootstrap_residuals: 

624 t.init_bootstrap_residuals( 

625 self.nbootstrap, rstate=self.get_rstate_bootstrap(problem)) 

626 else: 

627 t.set_bootstrap_residuals( 

628 num.zeros((self.nbootstrap, t.nmisfits))) 

629 

630 def get_bootstrap_weights(self, problem): 

631 if self._bootstrap_weights is None: 

632 try: 

633 problem.targets[0].get_bootstrap_weights() 

634 except Exception: 

635 self.init_bootstraps(problem) 

636 

637 bootstrap_weights = num.hstack( 

638 [t.get_bootstrap_weights() 

639 for t in problem.targets]) 

640 

641 self._bootstrap_weights = num.vstack(( 

642 num.ones((1, problem.nmisfits)), 

643 bootstrap_weights)) 

644 

645 return self._bootstrap_weights 

646 

647 def get_bootstrap_residuals(self, problem): 

648 if self._bootstrap_residuals is None: 

649 try: 

650 problem.targets[0].get_bootstrap_residuals() 

651 except Exception: 

652 self.init_bootstraps(problem) 

653 

654 bootstrap_residuals = num.hstack( 

655 [t.get_bootstrap_residuals() 

656 for t in problem.targets]) 

657 

658 self._bootstrap_residuals = num.vstack(( 

659 num.zeros((1, problem.nmisfits)), 

660 bootstrap_residuals)) 

661 

662 return self._bootstrap_residuals 

663 

664 def get_correlated_weights(self, problem): 

665 if self._correlated_weights is None: 

666 corr = dict() 

667 misfit_idx = num.cumsum( 

668 [0.] + [t.nmisfits for t in problem.targets], dtype=int) 

669 

670 for it, target in enumerate(problem.targets): 

671 weights = target.get_correlated_weights() 

672 if weights is None: 

673 continue 

674 corr[misfit_idx[it]] = weights 

675 

676 self._correlated_weights = corr 

677 

678 return self._correlated_weights 

679 

680 @property 

681 def nchains(self): 

682 return self.nbootstrap + 1 

683 

684 def chains(self, problem, history): 

685 nlinks_cap = int(round( 

686 self.chain_length_factor * problem.nparameters + 1)) 

687 

688 return Chains( 

689 problem, history, 

690 nchains=self.nchains, nlinks_cap=nlinks_cap) 

691 

692 def get_sampler_phase(self, iiter): 

693 niter = 0 

694 for iphase, phase in enumerate(self.sampler_phases): 

695 if iiter < niter + phase.niterations: 

696 return iphase, phase, iiter - niter 

697 

698 niter += phase.niterations 

699 

700 assert False, 'sample out of bounds' 

701 

702 def log_progress(self, problem, iiter, niter, phase, iiter_phase): 

703 t = time.time() 

704 if self._tlog_last < t - 10. \ 

705 or iiter_phase == 0 \ 

706 or iiter_phase == phase.niterations - 1: 

707 

708 logger.info( 

709 '%s at %i/%i (%s, %i/%i)' % ( 

710 problem.name, 

711 iiter+1, niter, 

712 phase.__class__.__name__, iiter_phase, phase.niterations)) 

713 

714 self._tlog_last = t 

715 

716 def optimise(self, problem, rundir=None, history=None): 

717 if rundir is not None: 

718 self.dump(filename=op.join(rundir, 'optimiser.yaml')) 

719 

720 if not history: 

721 history = ModelHistory( 

722 problem, 

723 nchains=self.nchains, 

724 path=rundir, mode='w') 

725 

726 chains = self.chains(problem, history) 

727 

728 if history.mode == 'r': 

729 if history.nmodels == self.niterations: 

730 return 

731 logger.info('Continuing run at %d iterations...', history.nmodels) 

732 history.mode = 'w' 

733 

734 chains.goto() 

735 

736 niter = self.niterations 

737 isbad_mask = None 

738 self._tlog_last = 0 

739 for iiter in range(history.nmodels, niter): 

740 self.iiter = iiter 

741 iphase, phase, iiter_phase = self.get_sampler_phase(iiter) 

742 self.log_progress(problem, iiter, niter, phase, iiter_phase) 

743 

744 sample = phase.get_sample(problem, iiter_phase, chains) 

745 sample.iphase = iphase 

746 

747 if isbad_mask is not None and num.any(isbad_mask): 

748 isok_mask = num.logical_not(isbad_mask) 

749 else: 

750 isok_mask = None 

751 

752 misfits = problem.misfits(sample.model, mask=isok_mask) 

753 

754 bootstrap_misfits = problem.combine_misfits( 

755 misfits, 

756 extra_weights=self.get_bootstrap_weights(problem), 

757 extra_residuals=self.get_bootstrap_residuals(problem), 

758 extra_correlated_weights=self.get_correlated_weights(problem)) 

759 isbad_mask_new = num.isnan(misfits[:, 0]) 

760 if isbad_mask is not None and num.any( 

761 isbad_mask != isbad_mask_new): 

762 

763 errmess = [ 

764 'problem %s: inconsistency in data availability' 

765 ' at iteration %i' % 

766 (problem.name, iiter)] 

767 

768 for target, isbad_new, isbad in zip( 

769 problem.targets, isbad_mask_new, isbad_mask): 

770 

771 if isbad_new != isbad: 

772 errmess.append(' %s, %s -> %s' % ( 

773 target.string_id(), isbad, isbad_new)) 

774 

775 raise BadProblem('\n'.join(errmess)) 

776 

777 isbad_mask = isbad_mask_new 

778 

779 if num.all(isbad_mask): 

780 raise BadProblem( 

781 'Problem %s: all target misfit values are NaN.' 

782 % problem.name) 

783 

784 history.append( 

785 sample.model, misfits, 

786 bootstrap_misfits, 

787 sample.pack_context()) 

788 

789 @property 

790 def niterations(self): 

791 return sum([ph.niterations for ph in self.sampler_phases]) 

792 

793 def get_status(self, history): 

794 if self._status_chains is None: 

795 self._status_chains = self.chains(history.problem, history) 

796 

797 self._status_chains.goto(history.nmodels) 

798 

799 chains = self._status_chains 

800 problem = history.problem 

801 

802 row_names = [p.name_nogroups for p in problem.parameters] 

803 row_names.append('Misfit') 

804 

805 def colum_array(data): 

806 arr = num.full(len(row_names), fill_value=num.nan) 

807 arr[:data.size] = data 

808 return arr 

809 

810 phase = self.get_sampler_phase(history.nmodels-1)[1] 

811 

812 bs_mean = colum_array(chains.mean_model(ichain=None)) 

813 bs_std = colum_array(chains.standard_deviation_models( 

814 ichain=None, estimator='standard_deviation_all_chains')) 

815 

816 glob_mean = colum_array(chains.mean_model(ichain=0)) 

817 glob_mean[-1] = num.mean(chains.misfits(ichain=0)) 

818 

819 glob_std = colum_array(chains.standard_deviation_models( 

820 ichain=0, estimator='standard_deviation_single_chain')) 

821 glob_std[-1] = num.std(chains.misfits(ichain=0)) 

822 

823 glob_best = colum_array(chains.best_model(ichain=0)) 

824 glob_best[-1] = chains.best_model_misfit() 

825 

826 glob_misfits = chains.misfits(ichain=0) 

827 

828 acceptance_latest = chains.acceptance_history[ 

829 :, -min(chains.acceptance_history.shape[1], self.ACCEPTANCE_AVG_LEN):] # noqa 

830 acceptance_avg = acceptance_latest.mean(axis=1) 

831 

832 def spark_plot(data, bins): 

833 hist, _ = num.histogram(data, bins) 

834 hist_max = num.max(hist) 

835 if hist_max == 0.0: 

836 hist_max = 1.0 

837 hist = hist / hist_max 

838 vec = num.digitize(hist, num.linspace(0., 1., len(self.SPARKS))) 

839 return ''.join([self.SPARKS[b-1] for b in vec]) 

840 

841 return OptimiserStatus( 

842 row_names=row_names, 

843 column_data=OrderedDict( 

844 zip(['BS mean', 'BS std', 

845 'Glob mean', 'Glob std', 'Glob best'], 

846 [bs_mean, bs_std, glob_mean, glob_std, glob_best])), 

847 extra_header= # noqa 

848 u'Optimiser phase: {phase}, exploring {nchains} BS chains\n' # noqa 

849 u'Global chain misfit distribution: \u2080{mf_dist}\xb9\n' 

850 u'Acceptance rate distribution: \u2080{acceptance}' 

851 u'\u2081\u2080\u2080\ufe6a (Median {acceptance_med:.1f}%)' 

852 .format( 

853 phase=phase.__class__.__name__, 

854 nchains=chains.nchains, 

855 mf_dist=spark_plot( 

856 glob_misfits, num.linspace(0., 1., 25)), 

857 acceptance=spark_plot( 

858 acceptance_avg, 

859 num.linspace(0., 1., 25)), 

860 acceptance_med=num.median(acceptance_avg) * 100. 

861 )) 

862 

863 def get_movie_maker( 

864 self, problem, history, xpar_name, ypar_name, movie_filename): 

865 

866 from . import plot 

867 return plot.HighScoreOptimiserPlot( 

868 self, problem, history, xpar_name, ypar_name, movie_filename) 

869 

870 @classmethod 

871 def get_plot_classes(cls): 

872 from .plot import HighScoreAcceptancePlot 

873 plots = Optimiser.get_plot_classes() 

874 plots.append(HighScoreAcceptancePlot) 

875 return plots 

876 

877 

878class HighScoreOptimiserConfig(OptimiserConfig): 

879 

880 sampler_phases = List.T( 

881 SamplerPhase.T(), 

882 default=[UniformSamplerPhase(niterations=1000), 

883 DirectedSamplerPhase(niterations=5000)], 

884 help='Stages of the sampler: Start with uniform sampling of the model' 

885 ' model space and narrow down through directed sampling.') 

886 chain_length_factor = Float.T( 

887 default=8., 

888 help='Controls the length of each chain: ' 

889 'chain_length_factor * nparameters + 1') 

890 nbootstrap = Int.T( 

891 default=100, 

892 help='Number of bootstrap realisations to be tracked simultaneously in' 

893 ' the optimisation.') 

894 

895 def get_optimiser(self): 

896 return HighScoreOptimiser( 

897 sampler_phases=list(self.sampler_phases), 

898 chain_length_factor=self.chain_length_factor, 

899 nbootstrap=self.nbootstrap) 

900 

901 

902def load_optimiser_history(dirname, problem): 

903 fn = op.join(dirname, 'accepted') 

904 with open(fn, 'r') as f: 

905 nmodels = os.fstat(f.fileno()).st_size // (problem.nbootstrap+1) 

906 data1 = num.fromfile( 

907 f, 

908 dtype='<i1', 

909 count=nmodels*(problem.nbootstrap+1)).astype(bool) 

910 

911 accepted = data1.reshape((nmodels, problem.nbootstrap+1)) 

912 

913 fn = op.join(dirname, 'choices') 

914 with open(fn, 'r') as f: 

915 data2 = num.fromfile( 

916 f, 

917 dtype='<i8', 

918 count=nmodels*2).astype(num.int64) 

919 

920 ibootstrap_choices, imodel_choices = data2.reshape((nmodels, 2)).T 

921 return ibootstrap_choices, imodel_choices, accepted 

922 

923 

924__all__ = ''' 

925 SamplerDistributionChoice 

926 StandardDeviationEstimatorChoice 

927 SamplerPhase 

928 InjectionSamplerPhase 

929 UniformSamplerPhase 

930 DirectedSamplerPhase 

931 Chains 

932 HighScoreOptimiserConfig 

933 HighScoreOptimiser 

934'''.split()