1'''
2Base classes for Grond's problem definition and the model history container.
4Common behaviour of all source models offered by Grond is implemented here.
5Source model specific details are implemented in the respective submodules.
6'''
8import numpy as num
9import math
10import copy
11import logging
12import os.path as op
13import os
14import time
16from pyrocko import gf, util, guts
17from pyrocko.guts import Object, String, List, Dict, Int
19from grond.meta import ADict, Parameter, GrondError, xjoin, Forbidden, \
20 StringID, has_get_plot_classes
21from ..targets import MisfitResult, MisfitTarget, TargetGroup, \
22 WaveformMisfitTarget, SatelliteMisfitTarget, GNSSCampaignMisfitTarget
24from grond import stats
26from grond.version import __version__
28guts_prefix = 'grond'
29logger = logging.getLogger('grond.problems.base')
30km = 1e3
31as_km = dict(scale_factor=km, scale_unit='km')
33g_rstate = num.random.RandomState()
36def nextpow2(i):
37 return 2**int(math.ceil(math.log(i)/math.log(2.)))
40def correlated_weights(values, weight_matrix):
41 '''
42 Applies correlated weights to values
44 The resulting weighed values have to be squared! Check out
45 :meth:`Problem.combine_misfits` for more information.
47 :param values: Misfits or norms as :class:`numpy.Array`
48 :param weight: Weight matrix, commonly the inverse of covariance matrix
50 :returns: :class:`numpy.Array` weighted values
51 '''
52 return num.matmul(values, weight_matrix)
55class ProblemConfig(Object):
56 '''
57 Base class for config section defining the objective function setup.
59 Factory for :py:class:`Problem` objects.
60 '''
61 name_template = String.T()
62 norm_exponent = Int.T(default=2)
63 nthreads = Int.T(default=1)
65 def get_problem(self, event, target_groups, targets):
66 '''
67 Instantiate the problem with a given event and targets.
69 :returns: :py:class:`Problem` object
70 '''
71 raise NotImplementedError
74@has_get_plot_classes
75class Problem(Object):
76 '''
77 Base class for objective function setup.
79 Defines the *problem* to be solved by the optimiser.
80 '''
81 name = String.T()
82 ranges = Dict.T(String.T(), gf.Range.T())
83 dependants = List.T(Parameter.T())
84 norm_exponent = Int.T(default=2)
85 base_source = gf.Source.T(optional=True)
86 targets = List.T(MisfitTarget.T())
87 target_groups = List.T(TargetGroup.T())
88 grond_version = String.T(optional=True)
89 nthreads = Int.T(default=1)
91 def __init__(self, **kwargs):
92 Object.__init__(self, **kwargs)
94 if self.grond_version is None:
95 self.grond_version = __version__
97 self._target_weights = None
98 self._engine = None
99 self._family_mask = None
101 if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms:
102 self.problem_parameters =\
103 self.problem_parameters + self.problem_waveform_parameters
105 unused_parameters = []
106 for p in self.problem_parameters:
107 if p.optional and p._name not in self.ranges.keys():
108 unused_parameters.append(p)
110 for p in unused_parameters:
111 self.problem_parameters.remove(p)
113 self.check()
115 @classmethod
116 def get_plot_classes(cls):
117 from . import plot
118 return plot.get_plot_classes()
120 def check(self):
121 paths = set()
122 for grp in self.target_groups:
123 if grp.path == 'all':
124 continue
125 if grp.path in paths:
126 raise ValueError('Path %s defined more than once! In %s'
127 % (grp.path, grp.__class__.__name__))
128 paths.add(grp.path)
129 logger.debug('TargetGroup check OK.')
131 def copy(self):
132 o = copy.copy(self)
133 o._target_weights = None
134 return o
136 def set_target_parameter_values(self, x):
137 nprob = len(self.problem_parameters)
138 for target in self.targets:
139 target.set_parameter_values(x[nprob:nprob+target.nparameters])
140 nprob += target.nparameters
142 def get_parameter_dict(self, model, group=None):
143 params = []
144 for ip, p in enumerate(self.parameters):
145 if group in p.groups or group is None:
146 params.append((p.name, model[ip]))
147 return ADict(params)
149 def get_parameter_array(self, d):
150 arr = num.zeros(self.nparameters, dtype=float)
151 for ip, p in enumerate(self.parameters):
152 if p.name in d.keys():
153 arr[ip] = d[p.name]
154 return arr
156 def get_parameter_index(self, param_name):
157 return {k.name: ik for ik, k in enumerate(self.parameters)}[param_name]
159 def dump_problem_info(self, dirname):
160 fn = op.join(dirname, 'problem.yaml')
161 util.ensuredirs(fn)
162 guts.dump(self, filename=fn)
164 def dump_problem_data(
165 self, dirname, x, misfits, chains=None,
166 sampler_context=None):
168 fn = op.join(dirname, 'models')
169 if not isinstance(x, num.ndarray):
170 x = num.array(x)
171 with open(fn, 'ab') as f:
172 x.astype('<f8').tofile(f)
174 fn = op.join(dirname, 'misfits')
175 with open(fn, 'ab') as f:
176 misfits.astype('<f8').tofile(f)
178 if chains is not None:
179 fn = op.join(dirname, 'chains')
180 with open(fn, 'ab') as f:
181 chains.astype('<f8').tofile(f)
183 if sampler_context is not None:
184 fn = op.join(dirname, 'choices')
185 with open(fn, 'ab') as f:
186 num.array(sampler_context, dtype='<i8').tofile(f)
188 def name_to_index(self, name):
189 pnames = [p.name for p in self.combined]
190 return pnames.index(name)
192 @property
193 def parameters(self):
194 target_parameters = []
195 for target in self.targets:
196 target_parameters.extend(target.target_parameters)
197 return self.problem_parameters + target_parameters
199 @property
200 def parameter_names(self):
201 return [p.name for p in self.combined]
203 @property
204 def dependant_names(self):
205 return [p.name for p in self.dependants]
207 @property
208 def nparameters(self):
209 return len(self.parameters)
211 @property
212 def ntargets(self):
213 return len(self.targets)
215 @property
216 def nwaveform_targets(self):
217 return len(self.waveform_targets)
219 @property
220 def nsatellite_targets(self):
221 return len(self.satellite_targets)
223 @property
224 def ngnss_targets(self):
225 return len(self.gnss_targets)
227 @property
228 def nmisfits(self):
229 nmisfits = 0
230 for target in self.targets:
231 nmisfits += target.nmisfits
232 return nmisfits
234 @property
235 def ndependants(self):
236 return len(self.dependants)
238 @property
239 def ncombined(self):
240 return len(self.parameters) + len(self.dependants)
242 @property
243 def combined(self):
244 return self.parameters + self.dependants
246 @property
247 def satellite_targets(self):
248 return [t for t in self.targets
249 if isinstance(t, SatelliteMisfitTarget)]
251 @property
252 def gnss_targets(self):
253 return [t for t in self.targets
254 if isinstance(t, GNSSCampaignMisfitTarget)]
256 @property
257 def waveform_targets(self):
258 return [t for t in self.targets
259 if isinstance(t, WaveformMisfitTarget)]
261 @property
262 def has_satellite(self):
263 if self.satellite_targets:
264 return True
265 return False
267 @property
268 def has_waveforms(self):
269 if self.waveform_targets:
270 return True
271 return False
273 def set_engine(self, engine):
274 self._engine = engine
276 def get_engine(self):
277 return self._engine
279 def get_gf_store(self, target):
280 if self.get_engine() is None:
281 raise GrondError('Cannot get GF Store, modelling is not set up.')
282 return self.get_engine().get_store(target.store_id)
284 def random_uniform(self, xbounds, rstate, fixed_magnitude=None):
285 if fixed_magnitude is not None:
286 raise GrondError(
287 'Setting fixed magnitude in random model generation not '
288 'supported for this type of problem.')
290 x = rstate.uniform(0., 1., self.nparameters)
291 x *= (xbounds[:, 1] - xbounds[:, 0])
292 x += xbounds[:, 0]
293 return x
295 def preconstrain(self, x):
296 return x
298 def extract(self, xs, i):
299 if xs.ndim == 1:
300 return self.extract(xs[num.newaxis, :], i)[0]
302 if i < self.nparameters:
303 return xs[:, i]
304 else:
305 return self.make_dependant(
306 xs, self.dependants[i-self.nparameters].name)
308 def get_target_weights(self):
309 if self._target_weights is None:
310 self._target_weights = num.concatenate(
311 [target.get_combined_weight() for target in self.targets])
313 return self._target_weights
315 def get_target_residuals(self):
316 pass
318 def inter_family_weights(self, ns):
319 exp, root = self.get_norm_functions()
321 family, nfamilies = self.get_family_mask()
323 ws = num.zeros(self.nmisfits)
324 for ifamily in range(nfamilies):
325 mask = family == ifamily
326 ws[mask] = 1.0 / root(num.nansum(exp(ns[mask])))
328 return ws
330 def inter_family_weights2(self, ns):
331 '''
332 :param ns: 2D array with normalization factors ``ns[imodel, itarget]``
333 :returns: 2D array ``weights[imodel, itarget]``
334 '''
336 exp, root = self.get_norm_functions()
337 family, nfamilies = self.get_family_mask()
339 ws = num.zeros(ns.shape)
340 for ifamily in range(nfamilies):
341 mask = family == ifamily
342 ws[:, mask] = (1.0 / root(
343 num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis]
345 return ws
347 def get_reference_model(self):
348 model = num.zeros(self.nparameters)
349 model_source_params = self.pack(self.base_source)
350 model[:model_source_params.size] = model_source_params
351 return model
353 def get_parameter_bounds(self):
354 out = []
355 for p in self.problem_parameters:
356 r = self.ranges[p.name]
357 out.append((r.start, r.stop))
359 for target in self.targets:
360 for p in target.target_parameters:
361 r = target.target_ranges[p.name_nogroups]
362 out.append((r.start, r.stop))
364 return num.array(out, dtype=float)
366 def get_dependant_bounds(self):
367 return num.zeros((0, 2))
369 def get_combined_bounds(self):
370 return num.vstack((
371 self.get_parameter_bounds(),
372 self.get_dependant_bounds()))
374 def raise_invalid_norm_exponent(self):
375 raise GrondError('Invalid norm exponent: %f' % self.norm_exponent)
377 def get_norm_functions(self):
378 if self.norm_exponent == 2:
379 def sqr(x):
380 return x**2
382 return sqr, num.sqrt
384 elif self.norm_exponent == 1:
385 def noop(x):
386 return x
388 return noop, num.abs
390 else:
391 self.raise_invalid_norm_exponent()
393 def combine_misfits(
394 self, misfits,
395 extra_weights=None,
396 extra_residuals=None,
397 extra_correlated_weights=dict(),
398 get_contributions=False):
400 '''
401 Combine misfit contributions (residuals) to global or bootstrap misfits
403 :param misfits: 3D array ``misfits[imodel, iresidual, 0]`` are the
404 misfit contributions (residuals) ``misfits[imodel, iresidual, 1]``
405 are the normalisation contributions. It is also possible to give
406 the misfit and normalisation contributions for a single model as
407 ``misfits[iresidual, 0]`` and misfits[iresidual, 1]`` in which
408 case, the first dimension (imodel) of the result will be stipped
409 off.
411 :param extra_weights: if given, 2D array of extra weights to be applied
412 to the contributions, indexed as
413 ``extra_weights[ibootstrap, iresidual]``.
415 :param extra_residuals: if given, 2D array of perturbations to be added
416 to the residuals, indexed as
417 ``extra_residuals[ibootstrap, iresidual]``.
419 :param extra_correlated_weights: if a dictionary of
420 ``imisfit: correlated weight matrix`` is passed a correlated
421 weight matrix is applied to the misfit and normalisation values.
422 `imisfit` is the starting index in the misfits vector the
423 correlated weight matrix applies to.
425 :param get_contributions: get the weighted and perturbed contributions
426 (don't do the sum).
428 :returns: if no *extra_weights* or *extra_residuals* are given, a 1D
429 array indexed as ``misfits[imodel]`` containing the global misfit
430 for each model is returned, otherwise a 2D array
431 ``misfits[imodel, ibootstrap]`` with the misfit for every model and
432 weighting/residual set is returned.
433 '''
434 if misfits.ndim == 2:
435 misfits = misfits[num.newaxis, :, :]
436 return self.combine_misfits(
437 misfits, extra_weights, extra_residuals,
438 extra_correlated_weights, get_contributions)[0, ...]
440 if extra_weights is None and extra_residuals is None:
441 return self.combine_misfits(
442 misfits, False, False,
443 extra_correlated_weights, get_contributions)[:, 0]
445 assert misfits.ndim == 3
446 assert not num.any(extra_weights) or extra_weights.ndim == 2
447 assert not num.any(extra_residuals) or extra_residuals.ndim == 2
449 if self.norm_exponent != 2 and extra_correlated_weights:
450 raise GrondError('Correlated weights can only be used '
451 ' with norm_exponent=2')
453 exp, root = self.get_norm_functions()
455 nmodels = misfits.shape[0]
456 nmisfits = misfits.shape[1] # noqa
458 mf = misfits[:, num.newaxis, :, :].copy()
460 if num.any(extra_residuals):
461 mf = mf + extra_residuals[num.newaxis, :, :, num.newaxis]
463 res = mf[..., 0]
464 norms = mf[..., 1]
466 for imisfit, corr_weight_mat in extra_correlated_weights.items():
468 jmisfit = imisfit + corr_weight_mat.shape[0]
470 for imodel in range(nmodels):
471 corr_res = res[imodel, :, imisfit:jmisfit]
472 corr_norms = norms[imodel, :, imisfit:jmisfit]
474 res[imodel, :, imisfit:jmisfit] = \
475 correlated_weights(corr_res, corr_weight_mat)
477 norms[imodel, :, imisfit:jmisfit] = \
478 correlated_weights(corr_norms, corr_weight_mat)
480 # Apply normalization family weights (these weights depend on
481 # on just calculated correlated norms!)
482 weights_fam = \
483 self.inter_family_weights2(norms[:, 0, :])[:, num.newaxis, :]
485 weights_fam = exp(weights_fam)
487 res = exp(res)
488 norms = exp(norms)
490 res *= weights_fam
491 norms *= weights_fam
493 weights_tar = self.get_target_weights()[num.newaxis, num.newaxis, :]
494 if num.any(extra_weights):
495 weights_tar = weights_tar * extra_weights[num.newaxis, :, :]
497 weights_tar = exp(weights_tar)
499 res = res * weights_tar
500 norms = norms * weights_tar
502 if get_contributions:
503 return res / num.nansum(norms, axis=2)[:, :, num.newaxis]
505 result = root(
506 num.nansum(res, axis=2) /
507 num.nansum(norms, axis=2))
509 assert result[result < 0].size == 0
510 return result
512 def make_family_mask(self):
513 family_names = set()
514 families = num.zeros(self.nmisfits, dtype=int)
516 idx = 0
517 for itarget, target in enumerate(self.targets):
518 family_names.add(target.normalisation_family)
519 families[idx:idx + target.nmisfits] = len(family_names) - 1
520 idx += target.nmisfits
522 return families, len(family_names)
524 def get_family_mask(self):
525 if self._family_mask is None:
526 self._family_mask = self.make_family_mask()
528 return self._family_mask
530 def evaluate(self, x, mask=None, result_mode='full', targets=None,
531 nthreads=1):
532 source = self.get_source(x)
533 engine = self.get_engine()
535 self.set_target_parameter_values(x)
537 if mask is not None and targets is not None:
538 raise ValueError('Mask cannot be defined with targets set.')
539 targets = targets if targets is not None else self.targets
541 for target in targets:
542 target.set_result_mode(result_mode)
544 modelling_targets = []
545 t2m_map = {}
546 for itarget, target in enumerate(targets):
547 t2m_map[target] = target.prepare_modelling(engine, source, targets)
548 if mask is None or mask[itarget]:
549 modelling_targets.extend(t2m_map[target])
551 u2m_map = {}
552 for imtarget, mtarget in enumerate(modelling_targets):
553 if mtarget not in u2m_map:
554 u2m_map[mtarget] = []
556 u2m_map[mtarget].append(imtarget)
558 modelling_targets_unique = list(u2m_map.keys())
560 resp = engine.process(source, modelling_targets_unique,
561 nthreads=nthreads)
562 modelling_results_unique = list(resp.results_list[0])
564 modelling_results = [None] * len(modelling_targets)
566 for mtarget, mresult in zip(
567 modelling_targets_unique, modelling_results_unique):
569 for itarget in u2m_map[mtarget]:
570 modelling_results[itarget] = mresult
572 imt = 0
573 results = []
574 for itarget, target in enumerate(targets):
575 nmt_this = len(t2m_map[target])
576 if mask is None or mask[itarget]:
577 result = target.finalize_modelling(
578 engine, source,
579 t2m_map[target],
580 modelling_results[imt:imt+nmt_this])
582 imt += nmt_this
583 else:
584 result = gf.SeismosizerError(
585 'target was excluded from modelling')
587 results.append(result)
589 return results
591 def misfits(self, x, mask=None, nthreads=1):
592 results = self.evaluate(
593 x, mask=mask, result_mode='sparse', nthreads=nthreads)
594 misfits = num.full((self.nmisfits, 2), num.nan)
596 imisfit = 0
597 for target, result in zip(self.targets, results):
598 if isinstance(result, MisfitResult):
599 misfits[imisfit:imisfit+target.nmisfits, :] = result.misfits
601 imisfit += target.nmisfits
603 return misfits
605 def forward(self, x):
606 source = self.get_source(x)
607 engine = self.get_engine()
609 plain_targets = []
610 for target in self.targets:
611 plain_targets.extend(target.get_plain_targets(engine, source))
613 resp = engine.process(source, plain_targets)
615 results = []
616 for target, result in zip(plain_targets, resp.results_list[0]):
617 if isinstance(result, gf.SeismosizerError):
618 logger.debug(
619 '%s.%s.%s.%s: %s' % (target.codes + (str(result),)))
620 else:
621 results.append(result)
623 return results
625 def get_random_model(self, ntries_limit=100):
626 xbounds = self.get_parameter_bounds()
628 for _ in range(ntries_limit):
629 x = self.random_uniform(xbounds, rstate=g_rstate)
630 try:
631 return self.preconstrain(x)
633 except Forbidden:
634 pass
636 raise GrondError(
637 'Could not find any suitable candidate sample within %i tries' % (
638 ntries_limit))
641class ProblemInfoNotAvailable(GrondError):
642 pass
645class ProblemDataNotAvailable(GrondError):
646 pass
649class NoSuchAttribute(GrondError):
650 pass
653class InvalidAttributeName(GrondError):
654 pass
657class ModelHistory(object):
658 '''
659 Write, read and follow sequences of models produced in an optimisation run.
661 :param problem: :class:`grond.Problem` instance
662 :param path: path to rundir, defaults to None
663 :type path: str, optional
664 :param mode: open mode, 'r': read, 'w': write
665 :type mode: str, optional
666 '''
668 nmodels_capacity_min = 1024
670 def __init__(self, problem, nchains=None, path=None, mode='r'):
671 self.mode = mode
673 self.problem = problem
674 self.path = path
675 self.nchains = nchains
677 self._models_buffer = None
678 self._misfits_buffer = None
679 self._bootstraps_buffer = None
680 self._sample_contexts_buffer = None
682 self._sorted_misfit_idx = {}
684 self.models = None
685 self.misfits = None
686 self.bootstrap_misfits = None
688 self.sampler_contexts = None
690 self.nmodels_capacity = self.nmodels_capacity_min
691 self.listeners = []
693 self._attributes = {}
695 if mode == 'r':
696 self.load()
698 @staticmethod
699 def verify_rundir(rundir):
700 _rundir_files = ('misfits', 'models')
702 if not op.exists(rundir):
703 raise ProblemDataNotAvailable(
704 'Directory does not exist: %s' % rundir)
705 for f in _rundir_files:
706 if not op.exists(op.join(rundir, f)):
707 raise ProblemDataNotAvailable('File not found: %s' % f)
709 @classmethod
710 def follow(cls, path, nchains=None, wait=20.):
711 '''
712 Start following a rundir (constructor).
714 :param path: the path to follow, a grond rundir
715 :type path: str, optional
716 :param wait: wait time until the folder become alive
717 :type wait: number in seconds, optional
718 :returns: A :py:class:`ModelHistory` instance
719 '''
720 start_watch = time.time()
721 while (time.time() - start_watch) < wait:
722 try:
723 cls.verify_rundir(path)
724 problem = load_problem_info(path)
725 return cls(problem, nchains=nchains, path=path, mode='r')
726 except (ProblemDataNotAvailable, OSError):
727 time.sleep(.25)
729 @property
730 def nmodels(self):
731 if self.models is None:
732 return 0
733 else:
734 return self.models.shape[0]
736 @nmodels.setter
737 def nmodels(self, nmodels_new):
738 assert 0 <= nmodels_new <= self.nmodels
739 self.models = self._models_buffer[:nmodels_new, :]
740 self.misfits = self._misfits_buffer[:nmodels_new, :, :]
741 if self.nchains is not None:
742 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels_new, :, :] # noqa
743 if self._sample_contexts_buffer is not None:
744 self.sampler_contexts = self._sample_contexts_buffer[:nmodels_new, :] # noqa
746 @property
747 def nmodels_capacity(self):
748 if self._models_buffer is None:
749 return 0
750 else:
751 return self._models_buffer.shape[0]
753 @nmodels_capacity.setter
754 def nmodels_capacity(self, nmodels_capacity_new):
755 if self.nmodels_capacity != nmodels_capacity_new:
757 models_buffer = num.zeros(
758 (nmodels_capacity_new, self.problem.nparameters),
759 dtype=float)
760 misfits_buffer = num.zeros(
761 (nmodels_capacity_new, self.problem.nmisfits, 2),
762 dtype=float)
763 sample_contexts_buffer = num.zeros(
764 (nmodels_capacity_new, 4),
765 dtype=int)
766 sample_contexts_buffer.fill(-1)
768 if self.nchains is not None:
769 bootstraps_buffer = num.zeros(
770 (nmodels_capacity_new, self.nchains),
771 dtype=float)
773 ncopy = min(self.nmodels, nmodels_capacity_new)
775 if self._models_buffer is not None:
776 models_buffer[:ncopy, :] = \
777 self._models_buffer[:ncopy, :]
778 misfits_buffer[:ncopy, :, :] = \
779 self._misfits_buffer[:ncopy, :, :]
780 sample_contexts_buffer[:ncopy, :] = \
781 self._sample_contexts_buffer[:ncopy, :]
783 self._models_buffer = models_buffer
784 self._misfits_buffer = misfits_buffer
785 self._sample_contexts_buffer = sample_contexts_buffer
787 if self.nchains is not None:
788 if self._bootstraps_buffer is not None:
789 bootstraps_buffer[:ncopy, :] = \
790 self._bootstraps_buffer[:ncopy, :]
791 self._bootstraps_buffer = bootstraps_buffer
793 def clear(self):
794 assert self.mode != 'r', 'History is read-only, cannot clear.'
795 self.nmodels = 0
796 self.nmodels_capacity = self.nmodels_capacity_min
798 def extend(
799 self, models, misfits,
800 bootstrap_misfits=None,
801 sampler_contexts=None):
803 nmodels = self.nmodels
804 n = models.shape[0]
806 nmodels_capacity_want = max(
807 self.nmodels_capacity_min, nextpow2(nmodels + n))
809 if nmodels_capacity_want != self.nmodels_capacity:
810 self.nmodels_capacity = nmodels_capacity_want
812 self._models_buffer[nmodels:nmodels+n, :] = models
813 self._misfits_buffer[nmodels:nmodels+n, :, :] = misfits
815 self.models = self._models_buffer[:nmodels+n, :]
816 self.misfits = self._misfits_buffer[:nmodels+n, :, :]
818 if bootstrap_misfits is not None:
819 self._bootstraps_buffer[nmodels:nmodels+n, :] = bootstrap_misfits
820 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels+n, :]
822 if sampler_contexts is not None:
823 self._sample_contexts_buffer[nmodels:nmodels+n, :] \
824 = sampler_contexts
825 self.sampler_contexts = self._sample_contexts_buffer[:nmodels+n, :]
827 if self.path and self.mode == 'w':
828 for i in range(n):
829 self.problem.dump_problem_data(
830 self.path, models[i, :], misfits[i, :, :],
831 bootstrap_misfits[i, :]
832 if bootstrap_misfits is not None else None,
833 sampler_contexts[i, :]
834 if sampler_contexts is not None else None)
836 self._sorted_misfit_idx.clear()
838 self.emit('extend', nmodels, n, models, misfits, sampler_contexts)
840 def append(
841 self, model, misfits,
842 bootstrap_misfits=None,
843 sampler_context=None):
845 if bootstrap_misfits is not None:
846 bootstrap_misfits = bootstrap_misfits[num.newaxis, :]
848 if sampler_context is not None:
849 sampler_context = sampler_context[num.newaxis, :]
851 return self.extend(
852 model[num.newaxis, :], misfits[num.newaxis, :, :],
853 bootstrap_misfits, sampler_context)
855 def load(self):
856 self.mode = 'r'
857 self.verify_rundir(self.path)
858 models, misfits, bootstraps, sampler_contexts = load_problem_data(
859 self.path, self.problem, nchains=self.nchains)
860 self.extend(models, misfits, bootstraps, sampler_contexts)
862 def update(self):
863 ''' Update history from path '''
864 nmodels_available = get_nmodels(self.path, self.problem)
865 if self.nmodels == nmodels_available:
866 return
868 try:
869 new_models, new_misfits, new_bootstraps, new_sampler_contexts = \
870 load_problem_data(
871 self.path,
872 self.problem,
873 nmodels_skip=self.nmodels,
874 nchains=self.nchains)
876 except ValueError:
877 return
879 self.extend(
880 new_models,
881 new_misfits,
882 new_bootstraps,
883 new_sampler_contexts)
885 def add_listener(self, listener):
886 ''' Add a listener to the history
888 The listening class can implement the following methods:
889 * ``extend``
890 '''
891 self.listeners.append(listener)
893 def emit(self, event_name, *args, **kwargs):
894 for listener in self.listeners:
895 slot = getattr(listener, event_name, None)
896 if callable(slot):
897 slot(*args, **kwargs)
899 @property
900 def attribute_names(self):
901 apath = op.join(self.path, 'attributes')
902 if not os.path.exists(apath):
903 return []
905 return [fn for fn in os.listdir(apath)
906 if StringID.regex.match(fn)]
908 def get_attribute(self, name):
909 if name not in self._attributes:
910 if name not in self.attribute_names:
911 raise NoSuchAttribute(name)
913 path = op.join(self.path, 'attributes', name)
915 with open(path, 'rb') as f:
916 self._attributes[name] = num.fromfile(
917 f, dtype='<i4',
918 count=self.nmodels).astype(int)
920 assert self._attributes[name].shape == (self.nmodels,)
922 return self._attributes[name]
924 def set_attribute(self, name, attribute):
925 if not StringID.regex.match(name):
926 raise InvalidAttributeName(name)
928 attribute = attribute.astype(int)
929 assert attribute.shape == (self.nmodels,)
931 apath = op.join(self.path, 'attributes')
933 if not os.path.exists(apath):
934 os.mkdir(apath)
936 path = op.join(apath, name)
938 with open(path, 'wb') as f:
939 attribute.astype('<i4').tofile(f)
941 self._attributes[name] = attribute
943 def ensure_bootstrap_misfits(self, optimiser):
944 if self.bootstrap_misfits is None:
945 problem = self.problem
946 self.bootstrap_misfits = problem.combine_misfits(
947 self.misfits,
948 extra_weights=optimiser.get_bootstrap_weights(problem),
949 extra_residuals=optimiser.get_bootstrap_residuals(problem))
951 def imodels_by_cluster(self, cluster_attribute):
952 if cluster_attribute is None:
953 return [(-1, 100.0, num.arange(self.nmodels))]
955 by_cluster = []
956 try:
957 iclusters = self.get_attribute(cluster_attribute)
958 iclusters_avail = num.unique(iclusters)
960 for icluster in iclusters_avail:
961 imodels = num.where(iclusters == icluster)[0]
962 by_cluster.append(
963 (icluster,
964 (100.0 * imodels.size) / self.nmodels,
965 imodels))
967 if by_cluster and by_cluster[0][0] == -1:
968 by_cluster.append(by_cluster.pop(0))
970 except NoSuchAttribute:
971 logger.warning(
972 'Attribute %s not set in run %s.\n'
973 ' Skipping model retrieval by clusters.' % (
974 cluster_attribute, self.problem.name))
976 return by_cluster
978 def models_by_cluster(self, cluster_attribute):
979 if cluster_attribute is None:
980 return [(-1, 100.0, self.models)]
982 return [
983 (icluster, percentage, self.models[imodels])
984 for (icluster, percentage, imodels)
985 in self.imodels_by_cluster(cluster_attribute)]
987 def mean_sources_by_cluster(self, cluster_attribute):
988 return [
989 (icluster, percentage, stats.get_mean_source(self.problem, models))
990 for (icluster, percentage, models)
991 in self.models_by_cluster(cluster_attribute)]
993 def get_sorted_misfits_idx(self, chain=0):
994 if chain not in self._sorted_misfit_idx.keys():
995 self._sorted_misfit_idx[chain] = num.argsort(
996 self.bootstrap_misfits[:, chain])
998 return self._sorted_misfit_idx[chain]
1000 def get_sorted_misfits(self, chain=0):
1001 isort = self.get_sorted_misfits_idx(chain)
1002 return self.bootstrap_misfits[:, chain][isort]
1004 def get_sorted_models(self, chain=0):
1005 isort = self.get_sorted_misfits_idx(chain=0)
1006 return self.models[isort, :]
1008 def get_sorted_primary_misfits(self):
1009 return self.get_sorted_misfits(chain=0)
1011 def get_sorted_primary_models(self):
1012 return self.get_sorted_models(chain=0)
1014 def get_best_model(self, chain=0):
1015 return self.get_sorted_models(chain)[0, ...]
1017 def get_best_misfit(self, chain=0):
1018 return self.get_sorted_misfits(chain)[0]
1020 def get_mean_model(self):
1021 return num.mean(self.models, axis=0)
1023 def get_mean_misfit(self, chain=0):
1024 return num.mean(self.bootstrap_misfits[:, chain])
1026 def get_best_source(self, chain=0):
1027 return self.problem.get_source(self.get_best_model(chain))
1029 def get_mean_source(self, chain=0):
1030 return self.problem.get_source(self.get_mean_model())
1032 def get_chain_misfits(self, chain=0):
1033 return self.bootstrap_misfits[:, chain]
1035 def get_primary_chain_misfits(self):
1036 return self.get_chain_misfits(chain=0)
1039def get_nmodels(dirname, problem):
1040 fn = op.join(dirname, 'models')
1041 with open(fn, 'r') as f:
1042 nmodels1 = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
1044 fn = op.join(dirname, 'misfits')
1045 with open(fn, 'r') as f:
1046 nmodels2 = os.fstat(f.fileno()).st_size // (problem.nmisfits * 2 * 8)
1048 return min(nmodels1, nmodels2)
1051def load_problem_info_and_data(dirname, subset=None, nchains=None):
1052 problem = load_problem_info(dirname)
1053 models, misfits, bootstraps, sampler_contexts = load_problem_data(
1054 xjoin(dirname, subset), problem, nchains=nchains)
1055 return problem, models, misfits, bootstraps, sampler_contexts
1058def load_optimiser_info(dirname):
1059 fn = op.join(dirname, 'optimiser.yaml')
1060 return guts.load(filename=fn)
1063def load_problem_info(dirname):
1064 try:
1065 fn = op.join(dirname, 'problem.yaml')
1066 return guts.load(filename=fn)
1067 except OSError as e:
1068 logger.debug(e)
1069 raise ProblemInfoNotAvailable(
1070 'No problem info available (%s).' % dirname)
1073def load_problem_data(dirname, problem, nmodels_skip=0, nchains=None):
1075 def get_chains_fn():
1076 for fn in (op.join(dirname, 'bootstraps'),
1077 op.join(dirname, 'chains')):
1078 if op.exists(fn):
1079 return fn
1080 return False
1082 try:
1083 nmodels = get_nmodels(dirname, problem) - nmodels_skip
1085 fn = op.join(dirname, 'models')
1086 with open(fn, 'r') as f:
1087 f.seek(nmodels_skip * problem.nparameters * 8)
1088 models = num.fromfile(
1089 f, dtype='<f8',
1090 count=nmodels * problem.nparameters)\
1091 .astype(float)
1093 models = models.reshape((nmodels, problem.nparameters))
1095 fn = op.join(dirname, 'misfits')
1096 with open(fn, 'r') as f:
1097 f.seek(nmodels_skip * problem.nmisfits * 2 * 8)
1098 misfits = num.fromfile(
1099 f, dtype='<f8',
1100 count=nmodels*problem.nmisfits*2)\
1101 .astype(float)
1102 misfits = misfits.reshape((nmodels, problem.nmisfits, 2))
1104 chains = None
1105 fn = get_chains_fn()
1106 if fn and nchains is not None:
1107 with open(fn, 'r') as f:
1108 f.seek(nmodels_skip * nchains * 8)
1109 chains = num.fromfile(
1110 f, dtype='<f8',
1111 count=nmodels*nchains)\
1112 .astype(float)
1114 chains = chains.reshape((nmodels, nchains))
1116 sampler_contexts = None
1117 fn = op.join(dirname, 'choices')
1118 if op.exists(fn):
1119 with open(fn, 'r') as f:
1120 f.seek(nmodels_skip * 4 * 8)
1121 sampler_contexts = num.fromfile(
1122 f, dtype='<i8',
1123 count=nmodels*4).astype(int)
1125 sampler_contexts = sampler_contexts.reshape((nmodels, 4))
1127 except OSError as e:
1128 logger.debug(str(e))
1129 raise ProblemDataNotAvailable(
1130 'No problem data available (%s).' % dirname)
1132 return models, misfits, chains, sampler_contexts
1135__all__ = '''
1136 ProblemConfig
1137 Problem
1138 ModelHistory
1139 ProblemInfoNotAvailable
1140 ProblemDataNotAvailable
1141 load_problem_info
1142 load_problem_info_and_data
1143 InvalidAttributeName
1144 NoSuchAttribute
1145'''.split()