1''' 

2Base classes for Grond's problem definition and the model history container. 

3 

4Common behaviour of all source models offered by Grond is implemented here. 

5Source model specific details are implemented in the respective submodules. 

6''' 

7 

8import numpy as num 

9import math 

10import copy 

11import logging 

12import os.path as op 

13import os 

14import time 

15 

16from pyrocko import gf, util, guts 

17from pyrocko.guts import Object, String, List, Dict, Int 

18 

19from grond.meta import ADict, Parameter, GrondError, xjoin, Forbidden, \ 

20 StringID, has_get_plot_classes 

21from ..targets import MisfitResult, MisfitTarget, TargetGroup, \ 

22 WaveformMisfitTarget, SatelliteMisfitTarget, GNSSCampaignMisfitTarget 

23 

24from grond import stats 

25 

26from grond.version import __version__ 

27 

28guts_prefix = 'grond' 

29logger = logging.getLogger('grond.problems.base') 

30km = 1e3 

31as_km = dict(scale_factor=km, scale_unit='km') 

32 

33g_rstate = num.random.RandomState() 

34 

35 

36def nextpow2(i): 

37 return 2**int(math.ceil(math.log(i)/math.log(2.))) 

38 

39 

40def correlated_weights(values, weight_matrix): 

41 ''' 

42 Applies correlated weights to values 

43 

44 The resulting weighed values have to be squared! Check out 

45 :meth:`Problem.combine_misfits` for more information. 

46 

47 :param values: Misfits or norms as :class:`numpy.Array` 

48 :param weight: Weight matrix, commonly the inverse of covariance matrix 

49 

50 :returns: :class:`numpy.Array` weighted values 

51 ''' 

52 return num.matmul(values, weight_matrix) 

53 

54 

55class ProblemConfig(Object): 

56 ''' 

57 Base class for config section defining the objective function setup. 

58 

59 Factory for :py:class:`Problem` objects. 

60 ''' 

61 name_template = String.T() 

62 norm_exponent = Int.T(default=2) 

63 nthreads = Int.T(default=1) 

64 

65 def get_problem(self, event, target_groups, targets): 

66 ''' 

67 Instantiate the problem with a given event and targets. 

68 

69 :returns: :py:class:`Problem` object 

70 ''' 

71 raise NotImplementedError 

72 

73 

74@has_get_plot_classes 

75class Problem(Object): 

76 ''' 

77 Base class for objective function setup. 

78 

79 Defines the *problem* to be solved by the optimiser. 

80 ''' 

81 name = String.T() 

82 ranges = Dict.T(String.T(), gf.Range.T()) 

83 dependants = List.T(Parameter.T()) 

84 norm_exponent = Int.T(default=2) 

85 base_source = gf.Source.T(optional=True) 

86 targets = List.T(MisfitTarget.T()) 

87 target_groups = List.T(TargetGroup.T()) 

88 grond_version = String.T(optional=True) 

89 nthreads = Int.T(default=1) 

90 

91 def __init__(self, **kwargs): 

92 Object.__init__(self, **kwargs) 

93 

94 if self.grond_version is None: 

95 self.grond_version = __version__ 

96 

97 self._target_weights = None 

98 self._engine = None 

99 self._family_mask = None 

100 

101 if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms: 

102 self.problem_parameters =\ 

103 self.problem_parameters + self.problem_waveform_parameters 

104 

105 unused_parameters = [] 

106 for p in self.problem_parameters: 

107 if p.optional and p._name not in self.ranges.keys(): 

108 unused_parameters.append(p) 

109 

110 for p in unused_parameters: 

111 self.problem_parameters.remove(p) 

112 

113 self.check() 

114 

115 @classmethod 

116 def get_plot_classes(cls): 

117 from . import plot 

118 return plot.get_plot_classes() 

119 

120 def check(self): 

121 paths = set() 

122 for grp in self.target_groups: 

123 if grp.path == 'all': 

124 continue 

125 if grp.path in paths: 

126 raise ValueError('Path %s defined more than once! In %s' 

127 % (grp.path, grp.__class__.__name__)) 

128 paths.add(grp.path) 

129 logger.debug('TargetGroup check OK.') 

130 

131 def copy(self): 

132 o = copy.copy(self) 

133 o._target_weights = None 

134 return o 

135 

136 def set_target_parameter_values(self, x): 

137 nprob = len(self.problem_parameters) 

138 for target in self.targets: 

139 target.set_parameter_values(x[nprob:nprob+target.nparameters]) 

140 nprob += target.nparameters 

141 

142 def get_parameter_dict(self, model, group=None): 

143 params = [] 

144 for ip, p in enumerate(self.parameters): 

145 if group in p.groups or group is None: 

146 params.append((p.name, model[ip])) 

147 return ADict(params) 

148 

149 def get_parameter_array(self, d): 

150 arr = num.zeros(self.nparameters, dtype=float) 

151 for ip, p in enumerate(self.parameters): 

152 if p.name in d.keys(): 

153 arr[ip] = d[p.name] 

154 return arr 

155 

156 def get_parameter_index(self, param_name): 

157 return {k.name: ik for ik, k in enumerate(self.parameters)}[param_name] 

158 

159 def dump_problem_info(self, dirname): 

160 fn = op.join(dirname, 'problem.yaml') 

161 util.ensuredirs(fn) 

162 guts.dump(self, filename=fn) 

163 

164 def dump_problem_data( 

165 self, dirname, x, misfits, chains=None, 

166 sampler_context=None): 

167 

168 fn = op.join(dirname, 'models') 

169 if not isinstance(x, num.ndarray): 

170 x = num.array(x) 

171 with open(fn, 'ab') as f: 

172 x.astype('<f8').tofile(f) 

173 

174 fn = op.join(dirname, 'misfits') 

175 with open(fn, 'ab') as f: 

176 misfits.astype('<f8').tofile(f) 

177 

178 if chains is not None: 

179 fn = op.join(dirname, 'chains') 

180 with open(fn, 'ab') as f: 

181 chains.astype('<f8').tofile(f) 

182 

183 if sampler_context is not None: 

184 fn = op.join(dirname, 'choices') 

185 with open(fn, 'ab') as f: 

186 num.array(sampler_context, dtype='<i8').tofile(f) 

187 

188 def name_to_index(self, name): 

189 pnames = [p.name for p in self.combined] 

190 return pnames.index(name) 

191 

192 @property 

193 def parameters(self): 

194 target_parameters = [] 

195 for target in self.targets: 

196 target_parameters.extend(target.target_parameters) 

197 return self.problem_parameters + target_parameters 

198 

199 @property 

200 def parameter_names(self): 

201 return [p.name for p in self.combined] 

202 

203 @property 

204 def dependant_names(self): 

205 return [p.name for p in self.dependants] 

206 

207 @property 

208 def nparameters(self): 

209 return len(self.parameters) 

210 

211 @property 

212 def ntargets(self): 

213 return len(self.targets) 

214 

215 @property 

216 def nwaveform_targets(self): 

217 return len(self.waveform_targets) 

218 

219 @property 

220 def nsatellite_targets(self): 

221 return len(self.satellite_targets) 

222 

223 @property 

224 def ngnss_targets(self): 

225 return len(self.gnss_targets) 

226 

227 @property 

228 def nmisfits(self): 

229 nmisfits = 0 

230 for target in self.targets: 

231 nmisfits += target.nmisfits 

232 return nmisfits 

233 

234 @property 

235 def ndependants(self): 

236 return len(self.dependants) 

237 

238 @property 

239 def ncombined(self): 

240 return len(self.parameters) + len(self.dependants) 

241 

242 @property 

243 def combined(self): 

244 return self.parameters + self.dependants 

245 

246 @property 

247 def satellite_targets(self): 

248 return [t for t in self.targets 

249 if isinstance(t, SatelliteMisfitTarget)] 

250 

251 @property 

252 def gnss_targets(self): 

253 return [t for t in self.targets 

254 if isinstance(t, GNSSCampaignMisfitTarget)] 

255 

256 @property 

257 def waveform_targets(self): 

258 return [t for t in self.targets 

259 if isinstance(t, WaveformMisfitTarget)] 

260 

261 @property 

262 def has_satellite(self): 

263 if self.satellite_targets: 

264 return True 

265 return False 

266 

267 @property 

268 def has_waveforms(self): 

269 if self.waveform_targets: 

270 return True 

271 return False 

272 

273 def set_engine(self, engine): 

274 self._engine = engine 

275 

276 def get_engine(self): 

277 return self._engine 

278 

279 def get_gf_store(self, target): 

280 if self.get_engine() is None: 

281 raise GrondError('Cannot get GF Store, modelling is not set up.') 

282 return self.get_engine().get_store(target.store_id) 

283 

284 def random_uniform(self, xbounds, rstate, fixed_magnitude=None): 

285 if fixed_magnitude is not None: 

286 raise GrondError( 

287 'Setting fixed magnitude in random model generation not ' 

288 'supported for this type of problem.') 

289 

290 x = rstate.uniform(0., 1., self.nparameters) 

291 x *= (xbounds[:, 1] - xbounds[:, 0]) 

292 x += xbounds[:, 0] 

293 return x 

294 

295 def preconstrain(self, x): 

296 return x 

297 

298 def extract(self, xs, i): 

299 if xs.ndim == 1: 

300 return self.extract(xs[num.newaxis, :], i)[0] 

301 

302 if i < self.nparameters: 

303 return xs[:, i] 

304 else: 

305 return self.make_dependant( 

306 xs, self.dependants[i-self.nparameters].name) 

307 

308 def get_target_weights(self): 

309 if self._target_weights is None: 

310 self._target_weights = num.concatenate( 

311 [target.get_combined_weight() for target in self.targets]) 

312 

313 return self._target_weights 

314 

315 def get_target_residuals(self): 

316 pass 

317 

318 def inter_family_weights(self, ns): 

319 exp, root = self.get_norm_functions() 

320 

321 family, nfamilies = self.get_family_mask() 

322 

323 ws = num.zeros(self.nmisfits) 

324 for ifamily in range(nfamilies): 

325 mask = family == ifamily 

326 ws[mask] = 1.0 / root(num.nansum(exp(ns[mask]))) 

327 

328 return ws 

329 

330 def inter_family_weights2(self, ns): 

331 ''' 

332 :param ns: 2D array with normalization factors ``ns[imodel, itarget]`` 

333 :returns: 2D array ``weights[imodel, itarget]`` 

334 ''' 

335 

336 exp, root = self.get_norm_functions() 

337 family, nfamilies = self.get_family_mask() 

338 

339 ws = num.zeros(ns.shape) 

340 for ifamily in range(nfamilies): 

341 mask = family == ifamily 

342 ws[:, mask] = (1.0 / root( 

343 num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis] 

344 

345 return ws 

346 

347 def get_reference_model(self): 

348 model = num.zeros(self.nparameters) 

349 model_source_params = self.pack(self.base_source) 

350 model[:model_source_params.size] = model_source_params 

351 return model 

352 

353 def get_parameter_bounds(self): 

354 out = [] 

355 for p in self.problem_parameters: 

356 r = self.ranges[p.name] 

357 out.append((r.start, r.stop)) 

358 

359 for target in self.targets: 

360 for p in target.target_parameters: 

361 r = target.target_ranges[p.name_nogroups] 

362 out.append((r.start, r.stop)) 

363 

364 return num.array(out, dtype=float) 

365 

366 def get_dependant_bounds(self): 

367 return num.zeros((0, 2)) 

368 

369 def get_combined_bounds(self): 

370 return num.vstack(( 

371 self.get_parameter_bounds(), 

372 self.get_dependant_bounds())) 

373 

374 def raise_invalid_norm_exponent(self): 

375 raise GrondError('Invalid norm exponent: %f' % self.norm_exponent) 

376 

377 def get_norm_functions(self): 

378 if self.norm_exponent == 2: 

379 def sqr(x): 

380 return x**2 

381 

382 return sqr, num.sqrt 

383 

384 elif self.norm_exponent == 1: 

385 def noop(x): 

386 return x 

387 

388 return noop, num.abs 

389 

390 else: 

391 self.raise_invalid_norm_exponent() 

392 

393 def combine_misfits( 

394 self, misfits, 

395 extra_weights=None, 

396 extra_residuals=None, 

397 extra_correlated_weights=dict(), 

398 get_contributions=False): 

399 

400 ''' 

401 Combine misfit contributions (residuals) to global or bootstrap misfits 

402 

403 :param misfits: 3D array ``misfits[imodel, iresidual, 0]`` are the 

404 misfit contributions (residuals) ``misfits[imodel, iresidual, 1]`` 

405 are the normalisation contributions. It is also possible to give 

406 the misfit and normalisation contributions for a single model as 

407 ``misfits[iresidual, 0]`` and misfits[iresidual, 1]`` in which 

408 case, the first dimension (imodel) of the result will be stipped 

409 off. 

410 

411 :param extra_weights: if given, 2D array of extra weights to be applied 

412 to the contributions, indexed as 

413 ``extra_weights[ibootstrap, iresidual]``. 

414 

415 :param extra_residuals: if given, 2D array of perturbations to be added 

416 to the residuals, indexed as 

417 ``extra_residuals[ibootstrap, iresidual]``. 

418 

419 :param extra_correlated_weights: if a dictionary of 

420 ``imisfit: correlated weight matrix`` is passed a correlated 

421 weight matrix is applied to the misfit and normalisation values. 

422 `imisfit` is the starting index in the misfits vector the 

423 correlated weight matrix applies to. 

424 

425 :param get_contributions: get the weighted and perturbed contributions 

426 (don't do the sum). 

427 

428 :returns: if no *extra_weights* or *extra_residuals* are given, a 1D 

429 array indexed as ``misfits[imodel]`` containing the global misfit 

430 for each model is returned, otherwise a 2D array 

431 ``misfits[imodel, ibootstrap]`` with the misfit for every model and 

432 weighting/residual set is returned. 

433 ''' 

434 if misfits.ndim == 2: 

435 misfits = misfits[num.newaxis, :, :] 

436 return self.combine_misfits( 

437 misfits, extra_weights, extra_residuals, 

438 extra_correlated_weights, get_contributions)[0, ...] 

439 

440 if extra_weights is None and extra_residuals is None: 

441 return self.combine_misfits( 

442 misfits, False, False, 

443 extra_correlated_weights, get_contributions)[:, 0] 

444 

445 assert misfits.ndim == 3 

446 assert not num.any(extra_weights) or extra_weights.ndim == 2 

447 assert not num.any(extra_residuals) or extra_residuals.ndim == 2 

448 

449 if self.norm_exponent != 2 and extra_correlated_weights: 

450 raise GrondError('Correlated weights can only be used ' 

451 ' with norm_exponent=2') 

452 

453 exp, root = self.get_norm_functions() 

454 

455 nmodels = misfits.shape[0] 

456 nmisfits = misfits.shape[1] # noqa 

457 

458 mf = misfits[:, num.newaxis, :, :].copy() 

459 

460 if num.any(extra_residuals): 

461 mf = mf + extra_residuals[num.newaxis, :, :, num.newaxis] 

462 

463 res = mf[..., 0] 

464 norms = mf[..., 1] 

465 

466 for imisfit, corr_weight_mat in extra_correlated_weights.items(): 

467 

468 jmisfit = imisfit + corr_weight_mat.shape[0] 

469 

470 for imodel in range(nmodels): 

471 corr_res = res[imodel, :, imisfit:jmisfit] 

472 corr_norms = norms[imodel, :, imisfit:jmisfit] 

473 

474 res[imodel, :, imisfit:jmisfit] = \ 

475 correlated_weights(corr_res, corr_weight_mat) 

476 

477 norms[imodel, :, imisfit:jmisfit] = \ 

478 correlated_weights(corr_norms, corr_weight_mat) 

479 

480 # Apply normalization family weights (these weights depend on 

481 # on just calculated correlated norms!) 

482 weights_fam = \ 

483 self.inter_family_weights2(norms[:, 0, :])[:, num.newaxis, :] 

484 

485 weights_fam = exp(weights_fam) 

486 

487 res = exp(res) 

488 norms = exp(norms) 

489 

490 res *= weights_fam 

491 norms *= weights_fam 

492 

493 weights_tar = self.get_target_weights()[num.newaxis, num.newaxis, :] 

494 if num.any(extra_weights): 

495 weights_tar = weights_tar * extra_weights[num.newaxis, :, :] 

496 

497 weights_tar = exp(weights_tar) 

498 

499 res = res * weights_tar 

500 norms = norms * weights_tar 

501 

502 if get_contributions: 

503 return res / num.nansum(norms, axis=2)[:, :, num.newaxis] 

504 

505 result = root( 

506 num.nansum(res, axis=2) / 

507 num.nansum(norms, axis=2)) 

508 

509 assert result[result < 0].size == 0 

510 return result 

511 

512 def make_family_mask(self): 

513 family_names = set() 

514 families = num.zeros(self.nmisfits, dtype=int) 

515 

516 idx = 0 

517 for itarget, target in enumerate(self.targets): 

518 family_names.add(target.normalisation_family) 

519 families[idx:idx + target.nmisfits] = len(family_names) - 1 

520 idx += target.nmisfits 

521 

522 return families, len(family_names) 

523 

524 def get_family_mask(self): 

525 if self._family_mask is None: 

526 self._family_mask = self.make_family_mask() 

527 

528 return self._family_mask 

529 

530 def evaluate(self, x, mask=None, result_mode='full', targets=None, 

531 nthreads=1): 

532 source = self.get_source(x) 

533 engine = self.get_engine() 

534 

535 self.set_target_parameter_values(x) 

536 

537 if mask is not None and targets is not None: 

538 raise ValueError('Mask cannot be defined with targets set.') 

539 targets = targets if targets is not None else self.targets 

540 

541 for target in targets: 

542 target.set_result_mode(result_mode) 

543 

544 modelling_targets = [] 

545 t2m_map = {} 

546 for itarget, target in enumerate(targets): 

547 t2m_map[target] = target.prepare_modelling(engine, source, targets) 

548 if mask is None or mask[itarget]: 

549 modelling_targets.extend(t2m_map[target]) 

550 

551 u2m_map = {} 

552 for imtarget, mtarget in enumerate(modelling_targets): 

553 if mtarget not in u2m_map: 

554 u2m_map[mtarget] = [] 

555 

556 u2m_map[mtarget].append(imtarget) 

557 

558 modelling_targets_unique = list(u2m_map.keys()) 

559 

560 resp = engine.process(source, modelling_targets_unique, 

561 nthreads=nthreads) 

562 modelling_results_unique = list(resp.results_list[0]) 

563 

564 modelling_results = [None] * len(modelling_targets) 

565 

566 for mtarget, mresult in zip( 

567 modelling_targets_unique, modelling_results_unique): 

568 

569 for itarget in u2m_map[mtarget]: 

570 modelling_results[itarget] = mresult 

571 

572 imt = 0 

573 results = [] 

574 for itarget, target in enumerate(targets): 

575 nmt_this = len(t2m_map[target]) 

576 if mask is None or mask[itarget]: 

577 result = target.finalize_modelling( 

578 engine, source, 

579 t2m_map[target], 

580 modelling_results[imt:imt+nmt_this]) 

581 

582 imt += nmt_this 

583 else: 

584 result = gf.SeismosizerError( 

585 'target was excluded from modelling') 

586 

587 results.append(result) 

588 

589 return results 

590 

591 def misfits(self, x, mask=None, nthreads=1): 

592 results = self.evaluate( 

593 x, mask=mask, result_mode='sparse', nthreads=nthreads) 

594 misfits = num.full((self.nmisfits, 2), num.nan) 

595 

596 imisfit = 0 

597 for target, result in zip(self.targets, results): 

598 if isinstance(result, MisfitResult): 

599 misfits[imisfit:imisfit+target.nmisfits, :] = result.misfits 

600 

601 imisfit += target.nmisfits 

602 

603 return misfits 

604 

605 def forward(self, x): 

606 source = self.get_source(x) 

607 engine = self.get_engine() 

608 

609 plain_targets = [] 

610 for target in self.targets: 

611 plain_targets.extend(target.get_plain_targets(engine, source)) 

612 

613 resp = engine.process(source, plain_targets) 

614 

615 results = [] 

616 for target, result in zip(plain_targets, resp.results_list[0]): 

617 if isinstance(result, gf.SeismosizerError): 

618 logger.debug( 

619 '%s.%s.%s.%s: %s' % (target.codes + (str(result),))) 

620 else: 

621 results.append(result) 

622 

623 return results 

624 

625 def get_random_model(self, ntries_limit=100): 

626 xbounds = self.get_parameter_bounds() 

627 

628 for _ in range(ntries_limit): 

629 x = self.random_uniform(xbounds, rstate=g_rstate) 

630 try: 

631 return self.preconstrain(x) 

632 

633 except Forbidden: 

634 pass 

635 

636 raise GrondError( 

637 'Could not find any suitable candidate sample within %i tries' % ( 

638 ntries_limit)) 

639 

640 

641class ProblemInfoNotAvailable(GrondError): 

642 pass 

643 

644 

645class ProblemDataNotAvailable(GrondError): 

646 pass 

647 

648 

649class NoSuchAttribute(GrondError): 

650 pass 

651 

652 

653class InvalidAttributeName(GrondError): 

654 pass 

655 

656 

657class ModelHistory(object): 

658 ''' 

659 Write, read and follow sequences of models produced in an optimisation run. 

660 

661 :param problem: :class:`grond.Problem` instance 

662 :param path: path to rundir, defaults to None 

663 :type path: str, optional 

664 :param mode: open mode, 'r': read, 'w': write 

665 :type mode: str, optional 

666 ''' 

667 

668 nmodels_capacity_min = 1024 

669 

670 def __init__(self, problem, nchains=None, path=None, mode='r'): 

671 self.mode = mode 

672 

673 self.problem = problem 

674 self.path = path 

675 self.nchains = nchains 

676 

677 self._models_buffer = None 

678 self._misfits_buffer = None 

679 self._bootstraps_buffer = None 

680 self._sample_contexts_buffer = None 

681 

682 self._sorted_misfit_idx = {} 

683 

684 self.models = None 

685 self.misfits = None 

686 self.bootstrap_misfits = None 

687 

688 self.sampler_contexts = None 

689 

690 self.nmodels_capacity = self.nmodels_capacity_min 

691 self.listeners = [] 

692 

693 self._attributes = {} 

694 

695 if mode == 'r': 

696 self.load() 

697 

698 @staticmethod 

699 def verify_rundir(rundir): 

700 _rundir_files = ('misfits', 'models') 

701 

702 if not op.exists(rundir): 

703 raise ProblemDataNotAvailable( 

704 'Directory does not exist: %s' % rundir) 

705 for f in _rundir_files: 

706 if not op.exists(op.join(rundir, f)): 

707 raise ProblemDataNotAvailable('File not found: %s' % f) 

708 

709 @classmethod 

710 def follow(cls, path, nchains=None, wait=20.): 

711 ''' 

712 Start following a rundir (constructor). 

713 

714 :param path: the path to follow, a grond rundir 

715 :type path: str, optional 

716 :param wait: wait time until the folder become alive 

717 :type wait: number in seconds, optional 

718 :returns: A :py:class:`ModelHistory` instance 

719 ''' 

720 start_watch = time.time() 

721 while (time.time() - start_watch) < wait: 

722 try: 

723 cls.verify_rundir(path) 

724 problem = load_problem_info(path) 

725 return cls(problem, nchains=nchains, path=path, mode='r') 

726 except (ProblemDataNotAvailable, OSError): 

727 time.sleep(.25) 

728 

729 @property 

730 def nmodels(self): 

731 if self.models is None: 

732 return 0 

733 else: 

734 return self.models.shape[0] 

735 

736 @nmodels.setter 

737 def nmodels(self, nmodels_new): 

738 assert 0 <= nmodels_new <= self.nmodels 

739 self.models = self._models_buffer[:nmodels_new, :] 

740 self.misfits = self._misfits_buffer[:nmodels_new, :, :] 

741 if self.nchains is not None: 

742 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels_new, :, :] # noqa 

743 if self._sample_contexts_buffer is not None: 

744 self.sampler_contexts = self._sample_contexts_buffer[:nmodels_new, :] # noqa 

745 

746 @property 

747 def nmodels_capacity(self): 

748 if self._models_buffer is None: 

749 return 0 

750 else: 

751 return self._models_buffer.shape[0] 

752 

753 @nmodels_capacity.setter 

754 def nmodels_capacity(self, nmodels_capacity_new): 

755 if self.nmodels_capacity != nmodels_capacity_new: 

756 

757 models_buffer = num.zeros( 

758 (nmodels_capacity_new, self.problem.nparameters), 

759 dtype=float) 

760 misfits_buffer = num.zeros( 

761 (nmodels_capacity_new, self.problem.nmisfits, 2), 

762 dtype=float) 

763 sample_contexts_buffer = num.zeros( 

764 (nmodels_capacity_new, 4), 

765 dtype=int) 

766 sample_contexts_buffer.fill(-1) 

767 

768 if self.nchains is not None: 

769 bootstraps_buffer = num.zeros( 

770 (nmodels_capacity_new, self.nchains), 

771 dtype=float) 

772 

773 ncopy = min(self.nmodels, nmodels_capacity_new) 

774 

775 if self._models_buffer is not None: 

776 models_buffer[:ncopy, :] = \ 

777 self._models_buffer[:ncopy, :] 

778 misfits_buffer[:ncopy, :, :] = \ 

779 self._misfits_buffer[:ncopy, :, :] 

780 sample_contexts_buffer[:ncopy, :] = \ 

781 self._sample_contexts_buffer[:ncopy, :] 

782 

783 self._models_buffer = models_buffer 

784 self._misfits_buffer = misfits_buffer 

785 self._sample_contexts_buffer = sample_contexts_buffer 

786 

787 if self.nchains is not None: 

788 if self._bootstraps_buffer is not None: 

789 bootstraps_buffer[:ncopy, :] = \ 

790 self._bootstraps_buffer[:ncopy, :] 

791 self._bootstraps_buffer = bootstraps_buffer 

792 

793 def clear(self): 

794 assert self.mode != 'r', 'History is read-only, cannot clear.' 

795 self.nmodels = 0 

796 self.nmodels_capacity = self.nmodels_capacity_min 

797 

798 def extend( 

799 self, models, misfits, 

800 bootstrap_misfits=None, 

801 sampler_contexts=None): 

802 

803 nmodels = self.nmodels 

804 n = models.shape[0] 

805 

806 nmodels_capacity_want = max( 

807 self.nmodels_capacity_min, nextpow2(nmodels + n)) 

808 

809 if nmodels_capacity_want != self.nmodels_capacity: 

810 self.nmodels_capacity = nmodels_capacity_want 

811 

812 self._models_buffer[nmodels:nmodels+n, :] = models 

813 self._misfits_buffer[nmodels:nmodels+n, :, :] = misfits 

814 

815 self.models = self._models_buffer[:nmodels+n, :] 

816 self.misfits = self._misfits_buffer[:nmodels+n, :, :] 

817 

818 if bootstrap_misfits is not None: 

819 self._bootstraps_buffer[nmodels:nmodels+n, :] = bootstrap_misfits 

820 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels+n, :] 

821 

822 if sampler_contexts is not None: 

823 self._sample_contexts_buffer[nmodels:nmodels+n, :] \ 

824 = sampler_contexts 

825 self.sampler_contexts = self._sample_contexts_buffer[:nmodels+n, :] 

826 

827 if self.path and self.mode == 'w': 

828 for i in range(n): 

829 self.problem.dump_problem_data( 

830 self.path, models[i, :], misfits[i, :, :], 

831 bootstrap_misfits[i, :] 

832 if bootstrap_misfits is not None else None, 

833 sampler_contexts[i, :] 

834 if sampler_contexts is not None else None) 

835 

836 self._sorted_misfit_idx.clear() 

837 

838 self.emit('extend', nmodels, n, models, misfits, sampler_contexts) 

839 

840 def append( 

841 self, model, misfits, 

842 bootstrap_misfits=None, 

843 sampler_context=None): 

844 

845 if bootstrap_misfits is not None: 

846 bootstrap_misfits = bootstrap_misfits[num.newaxis, :] 

847 

848 if sampler_context is not None: 

849 sampler_context = sampler_context[num.newaxis, :] 

850 

851 return self.extend( 

852 model[num.newaxis, :], misfits[num.newaxis, :, :], 

853 bootstrap_misfits, sampler_context) 

854 

855 def load(self): 

856 self.mode = 'r' 

857 self.verify_rundir(self.path) 

858 models, misfits, bootstraps, sampler_contexts = load_problem_data( 

859 self.path, self.problem, nchains=self.nchains) 

860 self.extend(models, misfits, bootstraps, sampler_contexts) 

861 

862 def update(self): 

863 ''' Update history from path ''' 

864 nmodels_available = get_nmodels(self.path, self.problem) 

865 if self.nmodels == nmodels_available: 

866 return 

867 

868 try: 

869 new_models, new_misfits, new_bootstraps, new_sampler_contexts = \ 

870 load_problem_data( 

871 self.path, 

872 self.problem, 

873 nmodels_skip=self.nmodels, 

874 nchains=self.nchains) 

875 

876 except ValueError: 

877 return 

878 

879 self.extend( 

880 new_models, 

881 new_misfits, 

882 new_bootstraps, 

883 new_sampler_contexts) 

884 

885 def add_listener(self, listener): 

886 ''' Add a listener to the history 

887 

888 The listening class can implement the following methods: 

889 * ``extend`` 

890 ''' 

891 self.listeners.append(listener) 

892 

893 def emit(self, event_name, *args, **kwargs): 

894 for listener in self.listeners: 

895 slot = getattr(listener, event_name, None) 

896 if callable(slot): 

897 slot(*args, **kwargs) 

898 

899 @property 

900 def attribute_names(self): 

901 apath = op.join(self.path, 'attributes') 

902 if not os.path.exists(apath): 

903 return [] 

904 

905 return [fn for fn in os.listdir(apath) 

906 if StringID.regex.match(fn)] 

907 

908 def get_attribute(self, name): 

909 if name not in self._attributes: 

910 if name not in self.attribute_names: 

911 raise NoSuchAttribute(name) 

912 

913 path = op.join(self.path, 'attributes', name) 

914 

915 with open(path, 'rb') as f: 

916 self._attributes[name] = num.fromfile( 

917 f, dtype='<i4', 

918 count=self.nmodels).astype(int) 

919 

920 assert self._attributes[name].shape == (self.nmodels,) 

921 

922 return self._attributes[name] 

923 

924 def set_attribute(self, name, attribute): 

925 if not StringID.regex.match(name): 

926 raise InvalidAttributeName(name) 

927 

928 attribute = attribute.astype(int) 

929 assert attribute.shape == (self.nmodels,) 

930 

931 apath = op.join(self.path, 'attributes') 

932 

933 if not os.path.exists(apath): 

934 os.mkdir(apath) 

935 

936 path = op.join(apath, name) 

937 

938 with open(path, 'wb') as f: 

939 attribute.astype('<i4').tofile(f) 

940 

941 self._attributes[name] = attribute 

942 

943 def ensure_bootstrap_misfits(self, optimiser): 

944 if self.bootstrap_misfits is None: 

945 problem = self.problem 

946 self.bootstrap_misfits = problem.combine_misfits( 

947 self.misfits, 

948 extra_weights=optimiser.get_bootstrap_weights(problem), 

949 extra_residuals=optimiser.get_bootstrap_residuals(problem)) 

950 

951 def imodels_by_cluster(self, cluster_attribute): 

952 if cluster_attribute is None: 

953 return [(-1, 100.0, num.arange(self.nmodels))] 

954 

955 by_cluster = [] 

956 try: 

957 iclusters = self.get_attribute(cluster_attribute) 

958 iclusters_avail = num.unique(iclusters) 

959 

960 for icluster in iclusters_avail: 

961 imodels = num.where(iclusters == icluster)[0] 

962 by_cluster.append( 

963 (icluster, 

964 (100.0 * imodels.size) / self.nmodels, 

965 imodels)) 

966 

967 if by_cluster and by_cluster[0][0] == -1: 

968 by_cluster.append(by_cluster.pop(0)) 

969 

970 except NoSuchAttribute: 

971 logger.warning( 

972 'Attribute %s not set in run %s.\n' 

973 ' Skipping model retrieval by clusters.' % ( 

974 cluster_attribute, self.problem.name)) 

975 

976 return by_cluster 

977 

978 def models_by_cluster(self, cluster_attribute): 

979 if cluster_attribute is None: 

980 return [(-1, 100.0, self.models)] 

981 

982 return [ 

983 (icluster, percentage, self.models[imodels]) 

984 for (icluster, percentage, imodels) 

985 in self.imodels_by_cluster(cluster_attribute)] 

986 

987 def mean_sources_by_cluster(self, cluster_attribute): 

988 return [ 

989 (icluster, percentage, stats.get_mean_source(self.problem, models)) 

990 for (icluster, percentage, models) 

991 in self.models_by_cluster(cluster_attribute)] 

992 

993 def get_sorted_misfits_idx(self, chain=0): 

994 if chain not in self._sorted_misfit_idx.keys(): 

995 self._sorted_misfit_idx[chain] = num.argsort( 

996 self.bootstrap_misfits[:, chain]) 

997 

998 return self._sorted_misfit_idx[chain] 

999 

1000 def get_sorted_misfits(self, chain=0): 

1001 isort = self.get_sorted_misfits_idx(chain) 

1002 return self.bootstrap_misfits[:, chain][isort] 

1003 

1004 def get_sorted_models(self, chain=0): 

1005 isort = self.get_sorted_misfits_idx(chain=0) 

1006 return self.models[isort, :] 

1007 

1008 def get_sorted_primary_misfits(self): 

1009 return self.get_sorted_misfits(chain=0) 

1010 

1011 def get_sorted_primary_models(self): 

1012 return self.get_sorted_models(chain=0) 

1013 

1014 def get_best_model(self, chain=0): 

1015 return self.get_sorted_models(chain)[0, ...] 

1016 

1017 def get_best_misfit(self, chain=0): 

1018 return self.get_sorted_misfits(chain)[0] 

1019 

1020 def get_mean_model(self): 

1021 return num.mean(self.models, axis=0) 

1022 

1023 def get_mean_misfit(self, chain=0): 

1024 return num.mean(self.bootstrap_misfits[:, chain]) 

1025 

1026 def get_best_source(self, chain=0): 

1027 return self.problem.get_source(self.get_best_model(chain)) 

1028 

1029 def get_mean_source(self, chain=0): 

1030 return self.problem.get_source(self.get_mean_model()) 

1031 

1032 def get_chain_misfits(self, chain=0): 

1033 return self.bootstrap_misfits[:, chain] 

1034 

1035 def get_primary_chain_misfits(self): 

1036 return self.get_chain_misfits(chain=0) 

1037 

1038 

1039def get_nmodels(dirname, problem): 

1040 fn = op.join(dirname, 'models') 

1041 with open(fn, 'r') as f: 

1042 nmodels1 = os.fstat(f.fileno()).st_size // (problem.nparameters * 8) 

1043 

1044 fn = op.join(dirname, 'misfits') 

1045 with open(fn, 'r') as f: 

1046 nmodels2 = os.fstat(f.fileno()).st_size // (problem.nmisfits * 2 * 8) 

1047 

1048 return min(nmodels1, nmodels2) 

1049 

1050 

1051def load_problem_info_and_data(dirname, subset=None, nchains=None): 

1052 problem = load_problem_info(dirname) 

1053 models, misfits, bootstraps, sampler_contexts = load_problem_data( 

1054 xjoin(dirname, subset), problem, nchains=nchains) 

1055 return problem, models, misfits, bootstraps, sampler_contexts 

1056 

1057 

1058def load_optimiser_info(dirname): 

1059 fn = op.join(dirname, 'optimiser.yaml') 

1060 return guts.load(filename=fn) 

1061 

1062 

1063def load_problem_info(dirname): 

1064 try: 

1065 fn = op.join(dirname, 'problem.yaml') 

1066 return guts.load(filename=fn) 

1067 except OSError as e: 

1068 logger.debug(e) 

1069 raise ProblemInfoNotAvailable( 

1070 'No problem info available (%s).' % dirname) 

1071 

1072 

1073def load_problem_data(dirname, problem, nmodels_skip=0, nchains=None): 

1074 

1075 def get_chains_fn(): 

1076 for fn in (op.join(dirname, 'bootstraps'), 

1077 op.join(dirname, 'chains')): 

1078 if op.exists(fn): 

1079 return fn 

1080 return False 

1081 

1082 try: 

1083 nmodels = get_nmodels(dirname, problem) - nmodels_skip 

1084 

1085 fn = op.join(dirname, 'models') 

1086 with open(fn, 'r') as f: 

1087 f.seek(nmodels_skip * problem.nparameters * 8) 

1088 models = num.fromfile( 

1089 f, dtype='<f8', 

1090 count=nmodels * problem.nparameters)\ 

1091 .astype(float) 

1092 

1093 models = models.reshape((nmodels, problem.nparameters)) 

1094 

1095 fn = op.join(dirname, 'misfits') 

1096 with open(fn, 'r') as f: 

1097 f.seek(nmodels_skip * problem.nmisfits * 2 * 8) 

1098 misfits = num.fromfile( 

1099 f, dtype='<f8', 

1100 count=nmodels*problem.nmisfits*2)\ 

1101 .astype(float) 

1102 misfits = misfits.reshape((nmodels, problem.nmisfits, 2)) 

1103 

1104 chains = None 

1105 fn = get_chains_fn() 

1106 if fn and nchains is not None: 

1107 with open(fn, 'r') as f: 

1108 f.seek(nmodels_skip * nchains * 8) 

1109 chains = num.fromfile( 

1110 f, dtype='<f8', 

1111 count=nmodels*nchains)\ 

1112 .astype(float) 

1113 

1114 chains = chains.reshape((nmodels, nchains)) 

1115 

1116 sampler_contexts = None 

1117 fn = op.join(dirname, 'choices') 

1118 if op.exists(fn): 

1119 with open(fn, 'r') as f: 

1120 f.seek(nmodels_skip * 4 * 8) 

1121 sampler_contexts = num.fromfile( 

1122 f, dtype='<i8', 

1123 count=nmodels*4).astype(int) 

1124 

1125 sampler_contexts = sampler_contexts.reshape((nmodels, 4)) 

1126 

1127 except OSError as e: 

1128 logger.debug(str(e)) 

1129 raise ProblemDataNotAvailable( 

1130 'No problem data available (%s).' % dirname) 

1131 

1132 return models, misfits, chains, sampler_contexts 

1133 

1134 

1135__all__ = ''' 

1136 ProblemConfig 

1137 Problem 

1138 ModelHistory 

1139 ProblemInfoNotAvailable 

1140 ProblemDataNotAvailable 

1141 load_problem_info 

1142 load_problem_info_and_data 

1143 InvalidAttributeName 

1144 NoSuchAttribute 

1145'''.split()