1''' 

2Base classes for Grond's problem definition and the model history container. 

3 

4Common behaviour of all source models offered by Grond is implemented here. 

5Source model specific details are implemented in the respective submodules. 

6''' 

7 

8import numpy as num 

9import math 

10import copy 

11import logging 

12import os.path as op 

13import os 

14import time 

15 

16from pyrocko import gf, util, guts 

17from pyrocko.guts import Object, String, List, Dict, Int 

18 

19from grond.meta import ADict, Parameter, GrondError, xjoin, Forbidden, \ 

20 StringID, has_get_plot_classes 

21from ..targets import MisfitResult, MisfitTarget, TargetGroup, \ 

22 WaveformMisfitTarget, SatelliteMisfitTarget, GNSSCampaignMisfitTarget 

23 

24from grond import stats 

25 

26from grond.version import __version__ 

27 

28guts_prefix = 'grond' 

29logger = logging.getLogger('grond.problems.base') 

30km = 1e3 

31as_km = dict(scale_factor=km, scale_unit='km') 

32 

33g_rstate = num.random.RandomState() 

34 

35 

36def nextpow2(i): 

37 return 2**int(math.ceil(math.log(i)/math.log(2.))) 

38 

39 

40def correlated_weights(values, weight_matrix): 

41 ''' 

42 Applies correlated weights to values 

43 

44 The resulting weighed values have to be squared! Check out 

45 :meth:`Problem.combine_misfits` for more information. 

46 

47 :param values: Misfits or norms as :class:`numpy.Array` 

48 :param weight: Weight matrix, commonly the inverse of covariance matrix 

49 

50 :returns: :class:`numpy.Array` weighted values 

51 ''' 

52 return num.matmul(values, weight_matrix) 

53 

54 

55class ProblemConfig(Object): 

56 ''' 

57 Base class for config section defining the objective function setup. 

58 

59 Factory for :py:class:`Problem` objects. 

60 ''' 

61 name_template = String.T() 

62 norm_exponent = Int.T(default=2) 

63 nthreads = Int.T(default=1) 

64 

65 def get_problem(self, event, target_groups, targets): 

66 ''' 

67 Instantiate the problem with a given event and targets. 

68 

69 :returns: :py:class:`Problem` object 

70 ''' 

71 raise NotImplementedError 

72 

73 

74@has_get_plot_classes 

75class Problem(Object): 

76 ''' 

77 Base class for objective function setup. 

78 

79 Defines the *problem* to be solved by the optimiser. 

80 ''' 

81 name = String.T() 

82 ranges = Dict.T(String.T(), gf.Range.T()) 

83 dependants = List.T(Parameter.T()) 

84 norm_exponent = Int.T(default=2) 

85 base_source = gf.Source.T(optional=True) 

86 targets = List.T(MisfitTarget.T()) 

87 target_groups = List.T(TargetGroup.T()) 

88 grond_version = String.T(optional=True) 

89 nthreads = Int.T(default=1) 

90 

91 def __init__(self, **kwargs): 

92 Object.__init__(self, **kwargs) 

93 

94 if self.grond_version is None: 

95 self.grond_version = __version__ 

96 

97 self._target_weights = None 

98 self._engine = None 

99 self._family_mask = None 

100 

101 if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms: 

102 self.problem_parameters =\ 

103 self.problem_parameters + self.problem_waveform_parameters 

104 

105 unused_parameters = [] 

106 for p in self.problem_parameters: 

107 if p.optional and p._name not in self.ranges.keys(): 

108 unused_parameters.append(p) 

109 

110 for p in unused_parameters: 

111 self.problem_parameters.remove(p) 

112 

113 self.check() 

114 

115 @classmethod 

116 def get_plot_classes(cls): 

117 from . import plot 

118 return plot.get_plot_classes() 

119 

120 def check(self): 

121 paths = set() 

122 for grp in self.target_groups: 

123 if grp.path == 'all': 

124 continue 

125 if grp.path in paths: 

126 raise ValueError('Path %s defined more than once! In %s' 

127 % (grp.path, grp.__class__.__name__)) 

128 paths.add(grp.path) 

129 logger.debug('TargetGroup check OK.') 

130 

131 def copy(self): 

132 o = copy.copy(self) 

133 o._target_weights = None 

134 return o 

135 

136 def set_target_parameter_values(self, x): 

137 nprob = len(self.problem_parameters) 

138 for target in self.targets: 

139 target.set_parameter_values(x[nprob:nprob+target.nparameters]) 

140 nprob += target.nparameters 

141 

142 def get_parameter_dict(self, model, group=None): 

143 params = [] 

144 for ip, p in enumerate(self.parameters): 

145 if group in p.groups or group is None: 

146 params.append((p.name, model[ip])) 

147 return ADict(params) 

148 

149 def get_parameter_array(self, d): 

150 arr = num.zeros(self.nparameters, dtype=float) 

151 for ip, p in enumerate(self.parameters): 

152 if p.name in d.keys(): 

153 arr[ip] = d[p.name] 

154 return arr 

155 

156 def dump_problem_info(self, dirname): 

157 fn = op.join(dirname, 'problem.yaml') 

158 util.ensuredirs(fn) 

159 guts.dump(self, filename=fn) 

160 

161 def dump_problem_data( 

162 self, dirname, x, misfits, chains=None, 

163 sampler_context=None): 

164 

165 fn = op.join(dirname, 'models') 

166 if not isinstance(x, num.ndarray): 

167 x = num.array(x) 

168 with open(fn, 'ab') as f: 

169 x.astype('<f8').tofile(f) 

170 

171 fn = op.join(dirname, 'misfits') 

172 with open(fn, 'ab') as f: 

173 misfits.astype('<f8').tofile(f) 

174 

175 if chains is not None: 

176 fn = op.join(dirname, 'chains') 

177 with open(fn, 'ab') as f: 

178 chains.astype('<f8').tofile(f) 

179 

180 if sampler_context is not None: 

181 fn = op.join(dirname, 'choices') 

182 with open(fn, 'ab') as f: 

183 num.array(sampler_context, dtype='<i8').tofile(f) 

184 

185 def name_to_index(self, name): 

186 pnames = [p.name for p in self.combined] 

187 return pnames.index(name) 

188 

189 @property 

190 def parameters(self): 

191 target_parameters = [] 

192 for target in self.targets: 

193 target_parameters.extend(target.target_parameters) 

194 return self.problem_parameters + target_parameters 

195 

196 @property 

197 def parameter_names(self): 

198 return [p.name for p in self.combined] 

199 

200 @property 

201 def dependant_names(self): 

202 return [p.name for p in self.dependants] 

203 

204 @property 

205 def nparameters(self): 

206 return len(self.parameters) 

207 

208 @property 

209 def ntargets(self): 

210 return len(self.targets) 

211 

212 @property 

213 def nwaveform_targets(self): 

214 return len(self.waveform_targets) 

215 

216 @property 

217 def nsatellite_targets(self): 

218 return len(self.satellite_targets) 

219 

220 @property 

221 def ngnss_targets(self): 

222 return len(self.gnss_targets) 

223 

224 @property 

225 def nmisfits(self): 

226 nmisfits = 0 

227 for target in self.targets: 

228 nmisfits += target.nmisfits 

229 return nmisfits 

230 

231 @property 

232 def ndependants(self): 

233 return len(self.dependants) 

234 

235 @property 

236 def ncombined(self): 

237 return len(self.parameters) + len(self.dependants) 

238 

239 @property 

240 def combined(self): 

241 return self.parameters + self.dependants 

242 

243 @property 

244 def satellite_targets(self): 

245 return [t for t in self.targets 

246 if isinstance(t, SatelliteMisfitTarget)] 

247 

248 @property 

249 def gnss_targets(self): 

250 return [t for t in self.targets 

251 if isinstance(t, GNSSCampaignMisfitTarget)] 

252 

253 @property 

254 def waveform_targets(self): 

255 return [t for t in self.targets 

256 if isinstance(t, WaveformMisfitTarget)] 

257 

258 @property 

259 def has_satellite(self): 

260 if self.satellite_targets: 

261 return True 

262 return False 

263 

264 @property 

265 def has_waveforms(self): 

266 if self.waveform_targets: 

267 return True 

268 return False 

269 

270 def set_engine(self, engine): 

271 self._engine = engine 

272 

273 def get_engine(self): 

274 return self._engine 

275 

276 def get_gf_store(self, target): 

277 if self.get_engine() is None: 

278 raise GrondError('Cannot get GF Store, modelling is not set up.') 

279 return self.get_engine().get_store(target.store_id) 

280 

281 def random_uniform(self, xbounds, rstate, fixed_magnitude=None): 

282 if fixed_magnitude is not None: 

283 raise GrondError( 

284 'Setting fixed magnitude in random model generation not ' 

285 'supported for this type of problem.') 

286 

287 x = rstate.uniform(0., 1., self.nparameters) 

288 x *= (xbounds[:, 1] - xbounds[:, 0]) 

289 x += xbounds[:, 0] 

290 return x 

291 

292 def preconstrain(self, x): 

293 return x 

294 

295 def extract(self, xs, i): 

296 if xs.ndim == 1: 

297 return self.extract(xs[num.newaxis, :], i)[0] 

298 

299 if i < self.nparameters: 

300 return xs[:, i] 

301 else: 

302 return self.make_dependant( 

303 xs, self.dependants[i-self.nparameters].name) 

304 

305 def get_target_weights(self): 

306 if self._target_weights is None: 

307 self._target_weights = num.concatenate( 

308 [target.get_combined_weight() for target in self.targets]) 

309 

310 return self._target_weights 

311 

312 def get_target_residuals(self): 

313 pass 

314 

315 def inter_family_weights(self, ns): 

316 exp, root = self.get_norm_functions() 

317 

318 family, nfamilies = self.get_family_mask() 

319 

320 ws = num.zeros(self.nmisfits) 

321 for ifamily in range(nfamilies): 

322 mask = family == ifamily 

323 ws[mask] = 1.0 / root(num.nansum(exp(ns[mask]))) 

324 

325 return ws 

326 

327 def inter_family_weights2(self, ns): 

328 ''' 

329 :param ns: 2D array with normalization factors ``ns[imodel, itarget]`` 

330 :returns: 2D array ``weights[imodel, itarget]`` 

331 ''' 

332 

333 exp, root = self.get_norm_functions() 

334 family, nfamilies = self.get_family_mask() 

335 

336 ws = num.zeros(ns.shape) 

337 for ifamily in range(nfamilies): 

338 mask = family == ifamily 

339 ws[:, mask] = (1.0 / root( 

340 num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis] 

341 

342 return ws 

343 

344 def get_reference_model(self): 

345 model = num.zeros(self.nparameters) 

346 model_source_params = self.pack(self.base_source) 

347 model[:model_source_params.size] = model_source_params 

348 return model 

349 

350 def get_parameter_bounds(self): 

351 out = [] 

352 for p in self.problem_parameters: 

353 r = self.ranges[p.name] 

354 out.append((r.start, r.stop)) 

355 

356 for target in self.targets: 

357 for p in target.target_parameters: 

358 r = target.target_ranges[p.name_nogroups] 

359 out.append((r.start, r.stop)) 

360 

361 return num.array(out, dtype=float) 

362 

363 def get_dependant_bounds(self): 

364 return num.zeros((0, 2)) 

365 

366 def get_combined_bounds(self): 

367 return num.vstack(( 

368 self.get_parameter_bounds(), 

369 self.get_dependant_bounds())) 

370 

371 def raise_invalid_norm_exponent(self): 

372 raise GrondError('Invalid norm exponent: %f' % self.norm_exponent) 

373 

374 def get_norm_functions(self): 

375 if self.norm_exponent == 2: 

376 def sqr(x): 

377 return x**2 

378 

379 return sqr, num.sqrt 

380 

381 elif self.norm_exponent == 1: 

382 def noop(x): 

383 return x 

384 

385 return noop, num.abs 

386 

387 else: 

388 self.raise_invalid_norm_exponent() 

389 

390 def combine_misfits( 

391 self, misfits, 

392 extra_weights=None, 

393 extra_residuals=None, 

394 extra_correlated_weights=dict(), 

395 get_contributions=False): 

396 

397 ''' 

398 Combine misfit contributions (residuals) to global or bootstrap misfits 

399 

400 :param misfits: 3D array ``misfits[imodel, iresidual, 0]`` are the 

401 misfit contributions (residuals) ``misfits[imodel, iresidual, 1]`` 

402 are the normalisation contributions. It is also possible to give 

403 the misfit and normalisation contributions for a single model as 

404 ``misfits[iresidual, 0]`` and misfits[iresidual, 1]`` in which 

405 case, the first dimension (imodel) of the result will be stipped 

406 off. 

407 

408 :param extra_weights: if given, 2D array of extra weights to be applied 

409 to the contributions, indexed as 

410 ``extra_weights[ibootstrap, iresidual]``. 

411 

412 :param extra_residuals: if given, 2D array of perturbations to be added 

413 to the residuals, indexed as 

414 ``extra_residuals[ibootstrap, iresidual]``. 

415 

416 :param extra_correlated_weights: if a dictionary of 

417 ``imisfit: correlated weight matrix`` is passed a correlated 

418 weight matrix is applied to the misfit and normalisation values. 

419 `imisfit` is the starting index in the misfits vector the 

420 correlated weight matrix applies to. 

421 

422 :param get_contributions: get the weighted and perturbed contributions 

423 (don't do the sum). 

424 

425 :returns: if no *extra_weights* or *extra_residuals* are given, a 1D 

426 array indexed as ``misfits[imodel]`` containing the global misfit 

427 for each model is returned, otherwise a 2D array 

428 ``misfits[imodel, ibootstrap]`` with the misfit for every model and 

429 weighting/residual set is returned. 

430 ''' 

431 if misfits.ndim == 2: 

432 misfits = misfits[num.newaxis, :, :] 

433 return self.combine_misfits( 

434 misfits, extra_weights, extra_residuals, 

435 extra_correlated_weights, get_contributions)[0, ...] 

436 

437 if extra_weights is None and extra_residuals is None: 

438 return self.combine_misfits( 

439 misfits, False, False, 

440 extra_correlated_weights, get_contributions)[:, 0] 

441 

442 assert misfits.ndim == 3 

443 assert not num.any(extra_weights) or extra_weights.ndim == 2 

444 assert not num.any(extra_residuals) or extra_residuals.ndim == 2 

445 

446 if self.norm_exponent != 2 and extra_correlated_weights: 

447 raise GrondError('Correlated weights can only be used ' 

448 ' with norm_exponent=2') 

449 

450 exp, root = self.get_norm_functions() 

451 

452 nmodels = misfits.shape[0] 

453 nmisfits = misfits.shape[1] # noqa 

454 

455 mf = misfits[:, num.newaxis, :, :].copy() 

456 

457 if num.any(extra_residuals): 

458 mf = mf + extra_residuals[num.newaxis, :, :, num.newaxis] 

459 

460 res = mf[..., 0] 

461 norms = mf[..., 1] 

462 

463 for imisfit, corr_weight_mat in extra_correlated_weights.items(): 

464 

465 jmisfit = imisfit + corr_weight_mat.shape[0] 

466 

467 for imodel in range(nmodels): 

468 corr_res = res[imodel, :, imisfit:jmisfit] 

469 corr_norms = norms[imodel, :, imisfit:jmisfit] 

470 

471 res[imodel, :, imisfit:jmisfit] = \ 

472 correlated_weights(corr_res, corr_weight_mat) 

473 

474 norms[imodel, :, imisfit:jmisfit] = \ 

475 correlated_weights(corr_norms, corr_weight_mat) 

476 

477 # Apply normalization family weights (these weights depend on 

478 # on just calculated correlated norms!) 

479 weights_fam = \ 

480 self.inter_family_weights2(norms[:, 0, :])[:, num.newaxis, :] 

481 

482 weights_fam = exp(weights_fam) 

483 

484 res = exp(res) 

485 norms = exp(norms) 

486 

487 res *= weights_fam 

488 norms *= weights_fam 

489 

490 weights_tar = self.get_target_weights()[num.newaxis, num.newaxis, :] 

491 if num.any(extra_weights): 

492 weights_tar = weights_tar * extra_weights[num.newaxis, :, :] 

493 

494 weights_tar = exp(weights_tar) 

495 

496 res = res * weights_tar 

497 norms = norms * weights_tar 

498 

499 if get_contributions: 

500 return res / num.nansum(norms, axis=2)[:, :, num.newaxis] 

501 

502 result = root( 

503 num.nansum(res, axis=2) / 

504 num.nansum(norms, axis=2)) 

505 

506 assert result[result < 0].size == 0 

507 return result 

508 

509 def make_family_mask(self): 

510 family_names = set() 

511 families = num.zeros(self.nmisfits, dtype=int) 

512 

513 idx = 0 

514 for itarget, target in enumerate(self.targets): 

515 family_names.add(target.normalisation_family) 

516 families[idx:idx + target.nmisfits] = len(family_names) - 1 

517 idx += target.nmisfits 

518 

519 return families, len(family_names) 

520 

521 def get_family_mask(self): 

522 if self._family_mask is None: 

523 self._family_mask = self.make_family_mask() 

524 

525 return self._family_mask 

526 

527 def evaluate(self, x, mask=None, result_mode='full', targets=None, 

528 nthreads=1): 

529 source = self.get_source(x) 

530 engine = self.get_engine() 

531 

532 self.set_target_parameter_values(x) 

533 

534 if mask is not None and targets is not None: 

535 raise ValueError('Mask cannot be defined with targets set.') 

536 targets = targets if targets is not None else self.targets 

537 

538 for target in targets: 

539 target.set_result_mode(result_mode) 

540 

541 modelling_targets = [] 

542 t2m_map = {} 

543 for itarget, target in enumerate(targets): 

544 t2m_map[target] = target.prepare_modelling(engine, source, targets) 

545 if mask is None or mask[itarget]: 

546 modelling_targets.extend(t2m_map[target]) 

547 

548 u2m_map = {} 

549 for imtarget, mtarget in enumerate(modelling_targets): 

550 if mtarget not in u2m_map: 

551 u2m_map[mtarget] = [] 

552 

553 u2m_map[mtarget].append(imtarget) 

554 

555 modelling_targets_unique = list(u2m_map.keys()) 

556 

557 resp = engine.process(source, modelling_targets_unique, 

558 nthreads=nthreads) 

559 modelling_results_unique = list(resp.results_list[0]) 

560 

561 modelling_results = [None] * len(modelling_targets) 

562 

563 for mtarget, mresult in zip( 

564 modelling_targets_unique, modelling_results_unique): 

565 

566 for itarget in u2m_map[mtarget]: 

567 modelling_results[itarget] = mresult 

568 

569 imt = 0 

570 results = [] 

571 for itarget, target in enumerate(targets): 

572 nmt_this = len(t2m_map[target]) 

573 if mask is None or mask[itarget]: 

574 result = target.finalize_modelling( 

575 engine, source, 

576 t2m_map[target], 

577 modelling_results[imt:imt+nmt_this]) 

578 

579 imt += nmt_this 

580 else: 

581 result = gf.SeismosizerError( 

582 'target was excluded from modelling') 

583 

584 results.append(result) 

585 

586 return results 

587 

588 def misfits(self, x, mask=None, nthreads=1): 

589 results = self.evaluate( 

590 x, mask=mask, result_mode='sparse', nthreads=nthreads) 

591 misfits = num.full((self.nmisfits, 2), num.nan) 

592 

593 imisfit = 0 

594 for target, result in zip(self.targets, results): 

595 if isinstance(result, MisfitResult): 

596 misfits[imisfit:imisfit+target.nmisfits, :] = result.misfits 

597 

598 imisfit += target.nmisfits 

599 

600 return misfits 

601 

602 def forward(self, x): 

603 source = self.get_source(x) 

604 engine = self.get_engine() 

605 

606 plain_targets = [] 

607 for target in self.targets: 

608 plain_targets.extend(target.get_plain_targets(engine, source)) 

609 

610 resp = engine.process(source, plain_targets) 

611 

612 results = [] 

613 for target, result in zip(plain_targets, resp.results_list[0]): 

614 if isinstance(result, gf.SeismosizerError): 

615 logger.debug( 

616 '%s.%s.%s.%s: %s' % (target.codes + (str(result),))) 

617 else: 

618 results.append(result) 

619 

620 return results 

621 

622 def get_random_model(self, ntries_limit=100): 

623 xbounds = self.get_parameter_bounds() 

624 

625 for _ in range(ntries_limit): 

626 x = self.random_uniform(xbounds, rstate=g_rstate) 

627 try: 

628 return self.preconstrain(x) 

629 

630 except Forbidden: 

631 pass 

632 

633 raise GrondError( 

634 'Could not find any suitable candidate sample within %i tries' % ( 

635 ntries_limit)) 

636 

637 

638class ProblemInfoNotAvailable(GrondError): 

639 pass 

640 

641 

642class ProblemDataNotAvailable(GrondError): 

643 pass 

644 

645 

646class NoSuchAttribute(GrondError): 

647 pass 

648 

649 

650class InvalidAttributeName(GrondError): 

651 pass 

652 

653 

654class ModelHistory(object): 

655 ''' 

656 Write, read and follow sequences of models produced in an optimisation run. 

657 

658 :param problem: :class:`grond.Problem` instance 

659 :param path: path to rundir, defaults to None 

660 :type path: str, optional 

661 :param mode: open mode, 'r': read, 'w': write 

662 :type mode: str, optional 

663 ''' 

664 

665 nmodels_capacity_min = 1024 

666 

667 def __init__(self, problem, nchains=None, path=None, mode='r'): 

668 self.mode = mode 

669 

670 self.problem = problem 

671 self.path = path 

672 self.nchains = nchains 

673 

674 self._models_buffer = None 

675 self._misfits_buffer = None 

676 self._bootstraps_buffer = None 

677 self._sample_contexts_buffer = None 

678 

679 self._sorted_misfit_idx = {} 

680 

681 self.models = None 

682 self.misfits = None 

683 self.bootstrap_misfits = None 

684 

685 self.sampler_contexts = None 

686 

687 self.nmodels_capacity = self.nmodels_capacity_min 

688 self.listeners = [] 

689 

690 self._attributes = {} 

691 

692 if mode == 'r': 

693 self.load() 

694 

695 @staticmethod 

696 def verify_rundir(rundir): 

697 _rundir_files = ('misfits', 'models') 

698 

699 if not op.exists(rundir): 

700 raise ProblemDataNotAvailable( 

701 'Directory does not exist: %s' % rundir) 

702 for f in _rundir_files: 

703 if not op.exists(op.join(rundir, f)): 

704 raise ProblemDataNotAvailable('File not found: %s' % f) 

705 

706 @classmethod 

707 def follow(cls, path, nchains=None, wait=20.): 

708 ''' 

709 Start following a rundir (constructor). 

710 

711 :param path: the path to follow, a grond rundir 

712 :type path: str, optional 

713 :param wait: wait time until the folder become alive 

714 :type wait: number in seconds, optional 

715 :returns: A :py:class:`ModelHistory` instance 

716 ''' 

717 start_watch = time.time() 

718 while (time.time() - start_watch) < wait: 

719 try: 

720 cls.verify_rundir(path) 

721 problem = load_problem_info(path) 

722 return cls(problem, nchains=nchains, path=path, mode='r') 

723 except (ProblemDataNotAvailable, OSError): 

724 time.sleep(.25) 

725 

726 @property 

727 def nmodels(self): 

728 if self.models is None: 

729 return 0 

730 else: 

731 return self.models.shape[0] 

732 

733 @nmodels.setter 

734 def nmodels(self, nmodels_new): 

735 assert 0 <= nmodels_new <= self.nmodels 

736 self.models = self._models_buffer[:nmodels_new, :] 

737 self.misfits = self._misfits_buffer[:nmodels_new, :, :] 

738 if self.nchains is not None: 

739 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels_new, :, :] # noqa 

740 if self._sample_contexts_buffer is not None: 

741 self.sampler_contexts = self._sample_contexts_buffer[:nmodels_new, :] # noqa 

742 

743 @property 

744 def nmodels_capacity(self): 

745 if self._models_buffer is None: 

746 return 0 

747 else: 

748 return self._models_buffer.shape[0] 

749 

750 @nmodels_capacity.setter 

751 def nmodels_capacity(self, nmodels_capacity_new): 

752 if self.nmodels_capacity != nmodels_capacity_new: 

753 

754 models_buffer = num.zeros( 

755 (nmodels_capacity_new, self.problem.nparameters), 

756 dtype=float) 

757 misfits_buffer = num.zeros( 

758 (nmodels_capacity_new, self.problem.nmisfits, 2), 

759 dtype=float) 

760 sample_contexts_buffer = num.zeros( 

761 (nmodels_capacity_new, 4), 

762 dtype=int) 

763 sample_contexts_buffer.fill(-1) 

764 

765 if self.nchains is not None: 

766 bootstraps_buffer = num.zeros( 

767 (nmodels_capacity_new, self.nchains), 

768 dtype=float) 

769 

770 ncopy = min(self.nmodels, nmodels_capacity_new) 

771 

772 if self._models_buffer is not None: 

773 models_buffer[:ncopy, :] = \ 

774 self._models_buffer[:ncopy, :] 

775 misfits_buffer[:ncopy, :, :] = \ 

776 self._misfits_buffer[:ncopy, :, :] 

777 sample_contexts_buffer[:ncopy, :] = \ 

778 self._sample_contexts_buffer[:ncopy, :] 

779 

780 self._models_buffer = models_buffer 

781 self._misfits_buffer = misfits_buffer 

782 self._sample_contexts_buffer = sample_contexts_buffer 

783 

784 if self.nchains is not None: 

785 if self._bootstraps_buffer is not None: 

786 bootstraps_buffer[:ncopy, :] = \ 

787 self._bootstraps_buffer[:ncopy, :] 

788 self._bootstraps_buffer = bootstraps_buffer 

789 

790 def clear(self): 

791 assert self.mode != 'r', 'History is read-only, cannot clear.' 

792 self.nmodels = 0 

793 self.nmodels_capacity = self.nmodels_capacity_min 

794 

795 def extend( 

796 self, models, misfits, 

797 bootstrap_misfits=None, 

798 sampler_contexts=None): 

799 

800 nmodels = self.nmodels 

801 n = models.shape[0] 

802 

803 nmodels_capacity_want = max( 

804 self.nmodels_capacity_min, nextpow2(nmodels + n)) 

805 

806 if nmodels_capacity_want != self.nmodels_capacity: 

807 self.nmodels_capacity = nmodels_capacity_want 

808 

809 self._models_buffer[nmodels:nmodels+n, :] = models 

810 self._misfits_buffer[nmodels:nmodels+n, :, :] = misfits 

811 

812 self.models = self._models_buffer[:nmodels+n, :] 

813 self.misfits = self._misfits_buffer[:nmodels+n, :, :] 

814 

815 if bootstrap_misfits is not None: 

816 self._bootstraps_buffer[nmodels:nmodels+n, :] = bootstrap_misfits 

817 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels+n, :] 

818 

819 if sampler_contexts is not None: 

820 self._sample_contexts_buffer[nmodels:nmodels+n, :] \ 

821 = sampler_contexts 

822 self.sampler_contexts = self._sample_contexts_buffer[:nmodels+n, :] 

823 

824 if self.path and self.mode == 'w': 

825 for i in range(n): 

826 self.problem.dump_problem_data( 

827 self.path, models[i, :], misfits[i, :, :], 

828 bootstrap_misfits[i, :] 

829 if bootstrap_misfits is not None else None, 

830 sampler_contexts[i, :] 

831 if sampler_contexts is not None else None) 

832 

833 self._sorted_misfit_idx.clear() 

834 

835 self.emit('extend', nmodels, n, models, misfits, sampler_contexts) 

836 

837 def append( 

838 self, model, misfits, 

839 bootstrap_misfits=None, 

840 sampler_context=None): 

841 

842 if bootstrap_misfits is not None: 

843 bootstrap_misfits = bootstrap_misfits[num.newaxis, :] 

844 

845 if sampler_context is not None: 

846 sampler_context = sampler_context[num.newaxis, :] 

847 

848 return self.extend( 

849 model[num.newaxis, :], misfits[num.newaxis, :, :], 

850 bootstrap_misfits, sampler_context) 

851 

852 def load(self): 

853 self.mode = 'r' 

854 self.verify_rundir(self.path) 

855 models, misfits, bootstraps, sampler_contexts = load_problem_data( 

856 self.path, self.problem, nchains=self.nchains) 

857 self.extend(models, misfits, bootstraps, sampler_contexts) 

858 

859 def update(self): 

860 ''' Update history from path ''' 

861 nmodels_available = get_nmodels(self.path, self.problem) 

862 if self.nmodels == nmodels_available: 

863 return 

864 

865 try: 

866 new_models, new_misfits, new_bootstraps, new_sampler_contexts = \ 

867 load_problem_data( 

868 self.path, 

869 self.problem, 

870 nmodels_skip=self.nmodels, 

871 nchains=self.nchains) 

872 

873 except ValueError: 

874 return 

875 

876 self.extend( 

877 new_models, 

878 new_misfits, 

879 new_bootstraps, 

880 new_sampler_contexts) 

881 

882 def add_listener(self, listener): 

883 ''' Add a listener to the history 

884 

885 The listening class can implement the following methods: 

886 * ``extend`` 

887 ''' 

888 self.listeners.append(listener) 

889 

890 def emit(self, event_name, *args, **kwargs): 

891 for listener in self.listeners: 

892 slot = getattr(listener, event_name, None) 

893 if callable(slot): 

894 slot(*args, **kwargs) 

895 

896 @property 

897 def attribute_names(self): 

898 apath = op.join(self.path, 'attributes') 

899 if not os.path.exists(apath): 

900 return [] 

901 

902 return [fn for fn in os.listdir(apath) 

903 if StringID.regex.match(fn)] 

904 

905 def get_attribute(self, name): 

906 if name not in self._attributes: 

907 if name not in self.attribute_names: 

908 raise NoSuchAttribute(name) 

909 

910 path = op.join(self.path, 'attributes', name) 

911 

912 with open(path, 'rb') as f: 

913 self._attributes[name] = num.fromfile( 

914 f, dtype='<i4', 

915 count=self.nmodels).astype(int) 

916 

917 assert self._attributes[name].shape == (self.nmodels,) 

918 

919 return self._attributes[name] 

920 

921 def set_attribute(self, name, attribute): 

922 if not StringID.regex.match(name): 

923 raise InvalidAttributeName(name) 

924 

925 attribute = attribute.astype(int) 

926 assert attribute.shape == (self.nmodels,) 

927 

928 apath = op.join(self.path, 'attributes') 

929 

930 if not os.path.exists(apath): 

931 os.mkdir(apath) 

932 

933 path = op.join(apath, name) 

934 

935 with open(path, 'wb') as f: 

936 attribute.astype('<i4').tofile(f) 

937 

938 self._attributes[name] = attribute 

939 

940 def ensure_bootstrap_misfits(self, optimiser): 

941 if self.bootstrap_misfits is None: 

942 problem = self.problem 

943 self.bootstrap_misfits = problem.combine_misfits( 

944 self.misfits, 

945 extra_weights=optimiser.get_bootstrap_weights(problem), 

946 extra_residuals=optimiser.get_bootstrap_residuals(problem)) 

947 

948 def imodels_by_cluster(self, cluster_attribute): 

949 if cluster_attribute is None: 

950 return [(-1, 100.0, num.arange(self.nmodels))] 

951 

952 by_cluster = [] 

953 try: 

954 iclusters = self.get_attribute(cluster_attribute) 

955 iclusters_avail = num.unique(iclusters) 

956 

957 for icluster in iclusters_avail: 

958 imodels = num.where(iclusters == icluster)[0] 

959 by_cluster.append( 

960 (icluster, 

961 (100.0 * imodels.size) / self.nmodels, 

962 imodels)) 

963 

964 if by_cluster and by_cluster[0][0] == -1: 

965 by_cluster.append(by_cluster.pop(0)) 

966 

967 except NoSuchAttribute: 

968 logger.warning( 

969 'Attribute %s not set in run %s.\n' 

970 ' Skipping model retrieval by clusters.' % ( 

971 cluster_attribute, self.problem.name)) 

972 

973 return by_cluster 

974 

975 def models_by_cluster(self, cluster_attribute): 

976 if cluster_attribute is None: 

977 return [(-1, 100.0, self.models)] 

978 

979 return [ 

980 (icluster, percentage, self.models[imodels]) 

981 for (icluster, percentage, imodels) 

982 in self.imodels_by_cluster(cluster_attribute)] 

983 

984 def mean_sources_by_cluster(self, cluster_attribute): 

985 return [ 

986 (icluster, percentage, stats.get_mean_source(self.problem, models)) 

987 for (icluster, percentage, models) 

988 in self.models_by_cluster(cluster_attribute)] 

989 

990 def get_sorted_misfits_idx(self, chain=0): 

991 if chain not in self._sorted_misfit_idx.keys(): 

992 self._sorted_misfit_idx[chain] = num.argsort( 

993 self.bootstrap_misfits[:, chain]) 

994 

995 return self._sorted_misfit_idx[chain] 

996 

997 def get_sorted_misfits(self, chain=0): 

998 isort = self.get_sorted_misfits_idx(chain) 

999 return self.bootstrap_misfits[:, chain][isort] 

1000 

1001 def get_sorted_models(self, chain=0): 

1002 isort = self.get_sorted_misfits_idx(chain=0) 

1003 return self.models[isort, :] 

1004 

1005 def get_sorted_primary_misfits(self): 

1006 return self.get_sorted_misfits(chain=0) 

1007 

1008 def get_sorted_primary_models(self): 

1009 return self.get_sorted_models(chain=0) 

1010 

1011 def get_best_model(self, chain=0): 

1012 return self.get_sorted_models(chain)[0, ...] 

1013 

1014 def get_best_misfit(self, chain=0): 

1015 return self.get_sorted_misfits(chain)[0] 

1016 

1017 def get_mean_model(self): 

1018 return num.mean(self.models, axis=0) 

1019 

1020 def get_mean_misfit(self, chain=0): 

1021 return num.mean(self.bootstrap_misfits[:, chain]) 

1022 

1023 def get_best_source(self, chain=0): 

1024 return self.problem.get_source(self.get_best_model(chain)) 

1025 

1026 def get_mean_source(self, chain=0): 

1027 return self.problem.get_source(self.get_mean_model()) 

1028 

1029 def get_chain_misfits(self, chain=0): 

1030 return self.bootstrap_misfits[:, chain] 

1031 

1032 def get_primary_chain_misfits(self): 

1033 return self.get_chain_misfits(chain=0) 

1034 

1035 

1036def get_nmodels(dirname, problem): 

1037 fn = op.join(dirname, 'models') 

1038 with open(fn, 'r') as f: 

1039 nmodels1 = os.fstat(f.fileno()).st_size // (problem.nparameters * 8) 

1040 

1041 fn = op.join(dirname, 'misfits') 

1042 with open(fn, 'r') as f: 

1043 nmodels2 = os.fstat(f.fileno()).st_size // (problem.nmisfits * 2 * 8) 

1044 

1045 return min(nmodels1, nmodels2) 

1046 

1047 

1048def load_problem_info_and_data(dirname, subset=None, nchains=None): 

1049 problem = load_problem_info(dirname) 

1050 models, misfits, bootstraps, sampler_contexts = load_problem_data( 

1051 xjoin(dirname, subset), problem, nchains=nchains) 

1052 return problem, models, misfits, bootstraps, sampler_contexts 

1053 

1054 

1055def load_optimiser_info(dirname): 

1056 fn = op.join(dirname, 'optimiser.yaml') 

1057 return guts.load(filename=fn) 

1058 

1059 

1060def load_problem_info(dirname): 

1061 try: 

1062 fn = op.join(dirname, 'problem.yaml') 

1063 return guts.load(filename=fn) 

1064 except OSError as e: 

1065 logger.debug(e) 

1066 raise ProblemInfoNotAvailable( 

1067 'No problem info available (%s).' % dirname) 

1068 

1069 

1070def load_problem_data(dirname, problem, nmodels_skip=0, nchains=None): 

1071 

1072 def get_chains_fn(): 

1073 for fn in (op.join(dirname, 'bootstraps'), 

1074 op.join(dirname, 'chains')): 

1075 if op.exists(fn): 

1076 return fn 

1077 return False 

1078 

1079 try: 

1080 nmodels = get_nmodels(dirname, problem) - nmodels_skip 

1081 

1082 fn = op.join(dirname, 'models') 

1083 with open(fn, 'r') as f: 

1084 f.seek(nmodels_skip * problem.nparameters * 8) 

1085 models = num.fromfile( 

1086 f, dtype='<f8', 

1087 count=nmodels * problem.nparameters)\ 

1088 .astype(float) 

1089 

1090 models = models.reshape((nmodels, problem.nparameters)) 

1091 

1092 fn = op.join(dirname, 'misfits') 

1093 with open(fn, 'r') as f: 

1094 f.seek(nmodels_skip * problem.nmisfits * 2 * 8) 

1095 misfits = num.fromfile( 

1096 f, dtype='<f8', 

1097 count=nmodels*problem.nmisfits*2)\ 

1098 .astype(float) 

1099 misfits = misfits.reshape((nmodels, problem.nmisfits, 2)) 

1100 

1101 chains = None 

1102 fn = get_chains_fn() 

1103 if fn and nchains is not None: 

1104 with open(fn, 'r') as f: 

1105 f.seek(nmodels_skip * nchains * 8) 

1106 chains = num.fromfile( 

1107 f, dtype='<f8', 

1108 count=nmodels*nchains)\ 

1109 .astype(float) 

1110 

1111 chains = chains.reshape((nmodels, nchains)) 

1112 

1113 sampler_contexts = None 

1114 fn = op.join(dirname, 'choices') 

1115 if op.exists(fn): 

1116 with open(fn, 'r') as f: 

1117 f.seek(nmodels_skip * 4 * 8) 

1118 sampler_contexts = num.fromfile( 

1119 f, dtype='<i8', 

1120 count=nmodels*4).astype(int) 

1121 

1122 sampler_contexts = sampler_contexts.reshape((nmodels, 4)) 

1123 

1124 except OSError as e: 

1125 logger.debug(str(e)) 

1126 raise ProblemDataNotAvailable( 

1127 'No problem data available (%s).' % dirname) 

1128 

1129 return models, misfits, chains, sampler_contexts 

1130 

1131 

1132__all__ = ''' 

1133 ProblemConfig 

1134 Problem 

1135 ModelHistory 

1136 ProblemInfoNotAvailable 

1137 ProblemDataNotAvailable 

1138 load_problem_info 

1139 load_problem_info_and_data 

1140 InvalidAttributeName 

1141 NoSuchAttribute 

1142'''.split()