1'''
2Base classes for Grond's problem definition and the model history container.
4Common behaviour of all source models offered by Grond is implemented here.
5Source model specific details are implemented in the respective submodules.
6'''
8import numpy as num
9import math
10import copy
11import logging
12import os.path as op
13import os
14import time
16from pyrocko import gf, util, guts
17from pyrocko.guts import Object, String, List, Dict, Int
19from grond.meta import ADict, Parameter, GrondError, xjoin, Forbidden, \
20 StringID, has_get_plot_classes
21from ..targets import MisfitResult, MisfitTarget, TargetGroup, \
22 WaveformMisfitTarget, SatelliteMisfitTarget, GNSSCampaignMisfitTarget
24from grond import stats
26from grond.version import __version__
28guts_prefix = 'grond'
29logger = logging.getLogger('grond.problems.base')
30km = 1e3
31as_km = dict(scale_factor=km, scale_unit='km')
33g_rstate = num.random.RandomState()
36def nextpow2(i):
37 return 2**int(math.ceil(math.log(i)/math.log(2.)))
40def correlated_weights(values, weight_matrix):
41 '''
42 Applies correlated weights to values
44 The resulting weighed values have to be squared! Check out
45 :meth:`Problem.combine_misfits` for more information.
47 :param values: Misfits or norms as :class:`numpy.Array`
48 :param weight: Weight matrix, commonly the inverse of covariance matrix
50 :returns: :class:`numpy.Array` weighted values
51 '''
52 return num.matmul(values, weight_matrix)
55class ProblemConfig(Object):
56 '''
57 Base class for config section defining the objective function setup.
59 Factory for :py:class:`Problem` objects.
60 '''
61 name_template = String.T()
62 norm_exponent = Int.T(default=2)
63 nthreads = Int.T(default=1)
65 def get_problem(self, event, target_groups, targets):
66 '''
67 Instantiate the problem with a given event and targets.
69 :returns: :py:class:`Problem` object
70 '''
71 raise NotImplementedError
74@has_get_plot_classes
75class Problem(Object):
76 '''
77 Base class for objective function setup.
79 Defines the *problem* to be solved by the optimiser.
80 '''
81 name = String.T()
82 ranges = Dict.T(String.T(), gf.Range.T())
83 dependants = List.T(Parameter.T())
84 norm_exponent = Int.T(default=2)
85 base_source = gf.Source.T(optional=True)
86 targets = List.T(MisfitTarget.T())
87 target_groups = List.T(TargetGroup.T())
88 grond_version = String.T(optional=True)
89 nthreads = Int.T(default=1)
91 def __init__(self, **kwargs):
92 Object.__init__(self, **kwargs)
94 if self.grond_version is None:
95 self.grond_version = __version__
97 self._target_weights = None
98 self._engine = None
99 self._family_mask = None
101 if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms:
102 self.problem_parameters =\
103 self.problem_parameters + self.problem_waveform_parameters
105 unused_parameters = []
106 for p in self.problem_parameters:
107 if p.optional and p._name not in self.ranges.keys():
108 unused_parameters.append(p)
110 for p in unused_parameters:
111 self.problem_parameters.remove(p)
113 self.check()
115 @classmethod
116 def get_plot_classes(cls):
117 from . import plot
118 return plot.get_plot_classes()
120 def check(self):
121 paths = set()
122 for grp in self.target_groups:
123 if grp.path == 'all':
124 continue
125 if grp.path in paths:
126 raise ValueError('Path %s defined more than once! In %s'
127 % (grp.path, grp.__class__.__name__))
128 paths.add(grp.path)
129 logger.debug('TargetGroup check OK.')
131 def copy(self):
132 o = copy.copy(self)
133 o._target_weights = None
134 return o
136 def set_target_parameter_values(self, x):
137 nprob = len(self.problem_parameters)
138 for target in self.targets:
139 target.set_parameter_values(x[nprob:nprob+target.nparameters])
140 nprob += target.nparameters
142 def get_parameter_dict(self, model, group=None):
143 params = []
144 for ip, p in enumerate(self.parameters):
145 if group in p.groups or group is None:
146 params.append((p.name, model[ip]))
147 return ADict(params)
149 def get_parameter_array(self, d):
150 arr = num.zeros(self.nparameters, dtype=float)
151 for ip, p in enumerate(self.parameters):
152 if p.name in d.keys():
153 arr[ip] = d[p.name]
154 return arr
156 def dump_problem_info(self, dirname):
157 fn = op.join(dirname, 'problem.yaml')
158 util.ensuredirs(fn)
159 guts.dump(self, filename=fn)
161 def dump_problem_data(
162 self, dirname, x, misfits, chains=None,
163 sampler_context=None):
165 fn = op.join(dirname, 'models')
166 if not isinstance(x, num.ndarray):
167 x = num.array(x)
168 with open(fn, 'ab') as f:
169 x.astype('<f8').tofile(f)
171 fn = op.join(dirname, 'misfits')
172 with open(fn, 'ab') as f:
173 misfits.astype('<f8').tofile(f)
175 if chains is not None:
176 fn = op.join(dirname, 'chains')
177 with open(fn, 'ab') as f:
178 chains.astype('<f8').tofile(f)
180 if sampler_context is not None:
181 fn = op.join(dirname, 'choices')
182 with open(fn, 'ab') as f:
183 num.array(sampler_context, dtype='<i8').tofile(f)
185 def name_to_index(self, name):
186 pnames = [p.name for p in self.combined]
187 return pnames.index(name)
189 @property
190 def parameters(self):
191 target_parameters = []
192 for target in self.targets:
193 target_parameters.extend(target.target_parameters)
194 return self.problem_parameters + target_parameters
196 @property
197 def parameter_names(self):
198 return [p.name for p in self.combined]
200 @property
201 def dependant_names(self):
202 return [p.name for p in self.dependants]
204 @property
205 def nparameters(self):
206 return len(self.parameters)
208 @property
209 def ntargets(self):
210 return len(self.targets)
212 @property
213 def nwaveform_targets(self):
214 return len(self.waveform_targets)
216 @property
217 def nsatellite_targets(self):
218 return len(self.satellite_targets)
220 @property
221 def ngnss_targets(self):
222 return len(self.gnss_targets)
224 @property
225 def nmisfits(self):
226 nmisfits = 0
227 for target in self.targets:
228 nmisfits += target.nmisfits
229 return nmisfits
231 @property
232 def ndependants(self):
233 return len(self.dependants)
235 @property
236 def ncombined(self):
237 return len(self.parameters) + len(self.dependants)
239 @property
240 def combined(self):
241 return self.parameters + self.dependants
243 @property
244 def satellite_targets(self):
245 return [t for t in self.targets
246 if isinstance(t, SatelliteMisfitTarget)]
248 @property
249 def gnss_targets(self):
250 return [t for t in self.targets
251 if isinstance(t, GNSSCampaignMisfitTarget)]
253 @property
254 def waveform_targets(self):
255 return [t for t in self.targets
256 if isinstance(t, WaveformMisfitTarget)]
258 @property
259 def has_satellite(self):
260 if self.satellite_targets:
261 return True
262 return False
264 @property
265 def has_waveforms(self):
266 if self.waveform_targets:
267 return True
268 return False
270 def set_engine(self, engine):
271 self._engine = engine
273 def get_engine(self):
274 return self._engine
276 def get_gf_store(self, target):
277 if self.get_engine() is None:
278 raise GrondError('Cannot get GF Store, modelling is not set up.')
279 return self.get_engine().get_store(target.store_id)
281 def random_uniform(self, xbounds, rstate, fixed_magnitude=None):
282 if fixed_magnitude is not None:
283 raise GrondError(
284 'Setting fixed magnitude in random model generation not '
285 'supported for this type of problem.')
287 x = rstate.uniform(0., 1., self.nparameters)
288 x *= (xbounds[:, 1] - xbounds[:, 0])
289 x += xbounds[:, 0]
290 return x
292 def preconstrain(self, x):
293 return x
295 def extract(self, xs, i):
296 if xs.ndim == 1:
297 return self.extract(xs[num.newaxis, :], i)[0]
299 if i < self.nparameters:
300 return xs[:, i]
301 else:
302 return self.make_dependant(
303 xs, self.dependants[i-self.nparameters].name)
305 def get_target_weights(self):
306 if self._target_weights is None:
307 self._target_weights = num.concatenate(
308 [target.get_combined_weight() for target in self.targets])
310 return self._target_weights
312 def get_target_residuals(self):
313 pass
315 def inter_family_weights(self, ns):
316 exp, root = self.get_norm_functions()
318 family, nfamilies = self.get_family_mask()
320 ws = num.zeros(self.nmisfits)
321 for ifamily in range(nfamilies):
322 mask = family == ifamily
323 ws[mask] = 1.0 / root(num.nansum(exp(ns[mask])))
325 return ws
327 def inter_family_weights2(self, ns):
328 '''
329 :param ns: 2D array with normalization factors ``ns[imodel, itarget]``
330 :returns: 2D array ``weights[imodel, itarget]``
331 '''
333 exp, root = self.get_norm_functions()
334 family, nfamilies = self.get_family_mask()
336 ws = num.zeros(ns.shape)
337 for ifamily in range(nfamilies):
338 mask = family == ifamily
339 ws[:, mask] = (1.0 / root(
340 num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis]
342 return ws
344 def get_reference_model(self):
345 model = num.zeros(self.nparameters)
346 model_source_params = self.pack(self.base_source)
347 model[:model_source_params.size] = model_source_params
348 return model
350 def get_parameter_bounds(self):
351 out = []
352 for p in self.problem_parameters:
353 r = self.ranges[p.name]
354 out.append((r.start, r.stop))
356 for target in self.targets:
357 for p in target.target_parameters:
358 r = target.target_ranges[p.name_nogroups]
359 out.append((r.start, r.stop))
361 return num.array(out, dtype=float)
363 def get_dependant_bounds(self):
364 return num.zeros((0, 2))
366 def get_combined_bounds(self):
367 return num.vstack((
368 self.get_parameter_bounds(),
369 self.get_dependant_bounds()))
371 def raise_invalid_norm_exponent(self):
372 raise GrondError('Invalid norm exponent: %f' % self.norm_exponent)
374 def get_norm_functions(self):
375 if self.norm_exponent == 2:
376 def sqr(x):
377 return x**2
379 return sqr, num.sqrt
381 elif self.norm_exponent == 1:
382 def noop(x):
383 return x
385 return noop, num.abs
387 else:
388 self.raise_invalid_norm_exponent()
390 def combine_misfits(
391 self, misfits,
392 extra_weights=None,
393 extra_residuals=None,
394 extra_correlated_weights=dict(),
395 get_contributions=False):
397 '''
398 Combine misfit contributions (residuals) to global or bootstrap misfits
400 :param misfits: 3D array ``misfits[imodel, iresidual, 0]`` are the
401 misfit contributions (residuals) ``misfits[imodel, iresidual, 1]``
402 are the normalisation contributions. It is also possible to give
403 the misfit and normalisation contributions for a single model as
404 ``misfits[iresidual, 0]`` and misfits[iresidual, 1]`` in which
405 case, the first dimension (imodel) of the result will be stipped
406 off.
408 :param extra_weights: if given, 2D array of extra weights to be applied
409 to the contributions, indexed as
410 ``extra_weights[ibootstrap, iresidual]``.
412 :param extra_residuals: if given, 2D array of perturbations to be added
413 to the residuals, indexed as
414 ``extra_residuals[ibootstrap, iresidual]``.
416 :param extra_correlated_weights: if a dictionary of
417 ``imisfit: correlated weight matrix`` is passed a correlated
418 weight matrix is applied to the misfit and normalisation values.
419 `imisfit` is the starting index in the misfits vector the
420 correlated weight matrix applies to.
422 :param get_contributions: get the weighted and perturbed contributions
423 (don't do the sum).
425 :returns: if no *extra_weights* or *extra_residuals* are given, a 1D
426 array indexed as ``misfits[imodel]`` containing the global misfit
427 for each model is returned, otherwise a 2D array
428 ``misfits[imodel, ibootstrap]`` with the misfit for every model and
429 weighting/residual set is returned.
430 '''
431 if misfits.ndim == 2:
432 misfits = misfits[num.newaxis, :, :]
433 return self.combine_misfits(
434 misfits, extra_weights, extra_residuals,
435 extra_correlated_weights, get_contributions)[0, ...]
437 if extra_weights is None and extra_residuals is None:
438 return self.combine_misfits(
439 misfits, False, False,
440 extra_correlated_weights, get_contributions)[:, 0]
442 assert misfits.ndim == 3
443 assert not num.any(extra_weights) or extra_weights.ndim == 2
444 assert not num.any(extra_residuals) or extra_residuals.ndim == 2
446 if self.norm_exponent != 2 and extra_correlated_weights:
447 raise GrondError('Correlated weights can only be used '
448 ' with norm_exponent=2')
450 exp, root = self.get_norm_functions()
452 nmodels = misfits.shape[0]
453 nmisfits = misfits.shape[1] # noqa
455 mf = misfits[:, num.newaxis, :, :].copy()
457 if num.any(extra_residuals):
458 mf = mf + extra_residuals[num.newaxis, :, :, num.newaxis]
460 res = mf[..., 0]
461 norms = mf[..., 1]
463 for imisfit, corr_weight_mat in extra_correlated_weights.items():
465 jmisfit = imisfit + corr_weight_mat.shape[0]
467 for imodel in range(nmodels):
468 corr_res = res[imodel, :, imisfit:jmisfit]
469 corr_norms = norms[imodel, :, imisfit:jmisfit]
471 res[imodel, :, imisfit:jmisfit] = \
472 correlated_weights(corr_res, corr_weight_mat)
474 norms[imodel, :, imisfit:jmisfit] = \
475 correlated_weights(corr_norms, corr_weight_mat)
477 # Apply normalization family weights (these weights depend on
478 # on just calculated correlated norms!)
479 weights_fam = \
480 self.inter_family_weights2(norms[:, 0, :])[:, num.newaxis, :]
482 weights_fam = exp(weights_fam)
484 res = exp(res)
485 norms = exp(norms)
487 res *= weights_fam
488 norms *= weights_fam
490 weights_tar = self.get_target_weights()[num.newaxis, num.newaxis, :]
491 if num.any(extra_weights):
492 weights_tar = weights_tar * extra_weights[num.newaxis, :, :]
494 weights_tar = exp(weights_tar)
496 res = res * weights_tar
497 norms = norms * weights_tar
499 if get_contributions:
500 return res / num.nansum(norms, axis=2)[:, :, num.newaxis]
502 result = root(
503 num.nansum(res, axis=2) /
504 num.nansum(norms, axis=2))
506 assert result[result < 0].size == 0
507 return result
509 def make_family_mask(self):
510 family_names = set()
511 families = num.zeros(self.nmisfits, dtype=int)
513 idx = 0
514 for itarget, target in enumerate(self.targets):
515 family_names.add(target.normalisation_family)
516 families[idx:idx + target.nmisfits] = len(family_names) - 1
517 idx += target.nmisfits
519 return families, len(family_names)
521 def get_family_mask(self):
522 if self._family_mask is None:
523 self._family_mask = self.make_family_mask()
525 return self._family_mask
527 def evaluate(self, x, mask=None, result_mode='full', targets=None,
528 nthreads=1):
529 source = self.get_source(x)
530 engine = self.get_engine()
532 self.set_target_parameter_values(x)
534 if mask is not None and targets is not None:
535 raise ValueError('Mask cannot be defined with targets set.')
536 targets = targets if targets is not None else self.targets
538 for target in targets:
539 target.set_result_mode(result_mode)
541 modelling_targets = []
542 t2m_map = {}
543 for itarget, target in enumerate(targets):
544 t2m_map[target] = target.prepare_modelling(engine, source, targets)
545 if mask is None or mask[itarget]:
546 modelling_targets.extend(t2m_map[target])
548 u2m_map = {}
549 for imtarget, mtarget in enumerate(modelling_targets):
550 if mtarget not in u2m_map:
551 u2m_map[mtarget] = []
553 u2m_map[mtarget].append(imtarget)
555 modelling_targets_unique = list(u2m_map.keys())
557 resp = engine.process(source, modelling_targets_unique,
558 nthreads=nthreads)
559 modelling_results_unique = list(resp.results_list[0])
561 modelling_results = [None] * len(modelling_targets)
563 for mtarget, mresult in zip(
564 modelling_targets_unique, modelling_results_unique):
566 for itarget in u2m_map[mtarget]:
567 modelling_results[itarget] = mresult
569 imt = 0
570 results = []
571 for itarget, target in enumerate(targets):
572 nmt_this = len(t2m_map[target])
573 if mask is None or mask[itarget]:
574 result = target.finalize_modelling(
575 engine, source,
576 t2m_map[target],
577 modelling_results[imt:imt+nmt_this])
579 imt += nmt_this
580 else:
581 result = gf.SeismosizerError(
582 'target was excluded from modelling')
584 results.append(result)
586 return results
588 def misfits(self, x, mask=None, nthreads=1):
589 results = self.evaluate(
590 x, mask=mask, result_mode='sparse', nthreads=nthreads)
591 misfits = num.full((self.nmisfits, 2), num.nan)
593 imisfit = 0
594 for target, result in zip(self.targets, results):
595 if isinstance(result, MisfitResult):
596 misfits[imisfit:imisfit+target.nmisfits, :] = result.misfits
598 imisfit += target.nmisfits
600 return misfits
602 def forward(self, x):
603 source = self.get_source(x)
604 engine = self.get_engine()
606 plain_targets = []
607 for target in self.targets:
608 plain_targets.extend(target.get_plain_targets(engine, source))
610 resp = engine.process(source, plain_targets)
612 results = []
613 for target, result in zip(plain_targets, resp.results_list[0]):
614 if isinstance(result, gf.SeismosizerError):
615 logger.debug(
616 '%s.%s.%s.%s: %s' % (target.codes + (str(result),)))
617 else:
618 results.append(result)
620 return results
622 def get_random_model(self, ntries_limit=100):
623 xbounds = self.get_parameter_bounds()
625 for _ in range(ntries_limit):
626 x = self.random_uniform(xbounds, rstate=g_rstate)
627 try:
628 return self.preconstrain(x)
630 except Forbidden:
631 pass
633 raise GrondError(
634 'Could not find any suitable candidate sample within %i tries' % (
635 ntries_limit))
638class ProblemInfoNotAvailable(GrondError):
639 pass
642class ProblemDataNotAvailable(GrondError):
643 pass
646class NoSuchAttribute(GrondError):
647 pass
650class InvalidAttributeName(GrondError):
651 pass
654class ModelHistory(object):
655 '''
656 Write, read and follow sequences of models produced in an optimisation run.
658 :param problem: :class:`grond.Problem` instance
659 :param path: path to rundir, defaults to None
660 :type path: str, optional
661 :param mode: open mode, 'r': read, 'w': write
662 :type mode: str, optional
663 '''
665 nmodels_capacity_min = 1024
667 def __init__(self, problem, nchains=None, path=None, mode='r'):
668 self.mode = mode
670 self.problem = problem
671 self.path = path
672 self.nchains = nchains
674 self._models_buffer = None
675 self._misfits_buffer = None
676 self._bootstraps_buffer = None
677 self._sample_contexts_buffer = None
679 self._sorted_misfit_idx = {}
681 self.models = None
682 self.misfits = None
683 self.bootstrap_misfits = None
685 self.sampler_contexts = None
687 self.nmodels_capacity = self.nmodels_capacity_min
688 self.listeners = []
690 self._attributes = {}
692 if mode == 'r':
693 self.load()
695 @staticmethod
696 def verify_rundir(rundir):
697 _rundir_files = ('misfits', 'models')
699 if not op.exists(rundir):
700 raise ProblemDataNotAvailable(
701 'Directory does not exist: %s' % rundir)
702 for f in _rundir_files:
703 if not op.exists(op.join(rundir, f)):
704 raise ProblemDataNotAvailable('File not found: %s' % f)
706 @classmethod
707 def follow(cls, path, nchains=None, wait=20.):
708 '''
709 Start following a rundir (constructor).
711 :param path: the path to follow, a grond rundir
712 :type path: str, optional
713 :param wait: wait time until the folder become alive
714 :type wait: number in seconds, optional
715 :returns: A :py:class:`ModelHistory` instance
716 '''
717 start_watch = time.time()
718 while (time.time() - start_watch) < wait:
719 try:
720 cls.verify_rundir(path)
721 problem = load_problem_info(path)
722 return cls(problem, nchains=nchains, path=path, mode='r')
723 except (ProblemDataNotAvailable, OSError):
724 time.sleep(.25)
726 @property
727 def nmodels(self):
728 if self.models is None:
729 return 0
730 else:
731 return self.models.shape[0]
733 @nmodels.setter
734 def nmodels(self, nmodels_new):
735 assert 0 <= nmodels_new <= self.nmodels
736 self.models = self._models_buffer[:nmodels_new, :]
737 self.misfits = self._misfits_buffer[:nmodels_new, :, :]
738 if self.nchains is not None:
739 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels_new, :, :] # noqa
740 if self._sample_contexts_buffer is not None:
741 self.sampler_contexts = self._sample_contexts_buffer[:nmodels_new, :] # noqa
743 @property
744 def nmodels_capacity(self):
745 if self._models_buffer is None:
746 return 0
747 else:
748 return self._models_buffer.shape[0]
750 @nmodels_capacity.setter
751 def nmodels_capacity(self, nmodels_capacity_new):
752 if self.nmodels_capacity != nmodels_capacity_new:
754 models_buffer = num.zeros(
755 (nmodels_capacity_new, self.problem.nparameters),
756 dtype=float)
757 misfits_buffer = num.zeros(
758 (nmodels_capacity_new, self.problem.nmisfits, 2),
759 dtype=float)
760 sample_contexts_buffer = num.zeros(
761 (nmodels_capacity_new, 4),
762 dtype=int)
763 sample_contexts_buffer.fill(-1)
765 if self.nchains is not None:
766 bootstraps_buffer = num.zeros(
767 (nmodels_capacity_new, self.nchains),
768 dtype=float)
770 ncopy = min(self.nmodels, nmodels_capacity_new)
772 if self._models_buffer is not None:
773 models_buffer[:ncopy, :] = \
774 self._models_buffer[:ncopy, :]
775 misfits_buffer[:ncopy, :, :] = \
776 self._misfits_buffer[:ncopy, :, :]
777 sample_contexts_buffer[:ncopy, :] = \
778 self._sample_contexts_buffer[:ncopy, :]
780 self._models_buffer = models_buffer
781 self._misfits_buffer = misfits_buffer
782 self._sample_contexts_buffer = sample_contexts_buffer
784 if self.nchains is not None:
785 if self._bootstraps_buffer is not None:
786 bootstraps_buffer[:ncopy, :] = \
787 self._bootstraps_buffer[:ncopy, :]
788 self._bootstraps_buffer = bootstraps_buffer
790 def clear(self):
791 assert self.mode != 'r', 'History is read-only, cannot clear.'
792 self.nmodels = 0
793 self.nmodels_capacity = self.nmodels_capacity_min
795 def extend(
796 self, models, misfits,
797 bootstrap_misfits=None,
798 sampler_contexts=None):
800 nmodels = self.nmodels
801 n = models.shape[0]
803 nmodels_capacity_want = max(
804 self.nmodels_capacity_min, nextpow2(nmodels + n))
806 if nmodels_capacity_want != self.nmodels_capacity:
807 self.nmodels_capacity = nmodels_capacity_want
809 self._models_buffer[nmodels:nmodels+n, :] = models
810 self._misfits_buffer[nmodels:nmodels+n, :, :] = misfits
812 self.models = self._models_buffer[:nmodels+n, :]
813 self.misfits = self._misfits_buffer[:nmodels+n, :, :]
815 if bootstrap_misfits is not None:
816 self._bootstraps_buffer[nmodels:nmodels+n, :] = bootstrap_misfits
817 self.bootstrap_misfits = self._bootstraps_buffer[:nmodels+n, :]
819 if sampler_contexts is not None:
820 self._sample_contexts_buffer[nmodels:nmodels+n, :] \
821 = sampler_contexts
822 self.sampler_contexts = self._sample_contexts_buffer[:nmodels+n, :]
824 if self.path and self.mode == 'w':
825 for i in range(n):
826 self.problem.dump_problem_data(
827 self.path, models[i, :], misfits[i, :, :],
828 bootstrap_misfits[i, :]
829 if bootstrap_misfits is not None else None,
830 sampler_contexts[i, :]
831 if sampler_contexts is not None else None)
833 self._sorted_misfit_idx.clear()
835 self.emit('extend', nmodels, n, models, misfits, sampler_contexts)
837 def append(
838 self, model, misfits,
839 bootstrap_misfits=None,
840 sampler_context=None):
842 if bootstrap_misfits is not None:
843 bootstrap_misfits = bootstrap_misfits[num.newaxis, :]
845 if sampler_context is not None:
846 sampler_context = sampler_context[num.newaxis, :]
848 return self.extend(
849 model[num.newaxis, :], misfits[num.newaxis, :, :],
850 bootstrap_misfits, sampler_context)
852 def load(self):
853 self.mode = 'r'
854 self.verify_rundir(self.path)
855 models, misfits, bootstraps, sampler_contexts = load_problem_data(
856 self.path, self.problem, nchains=self.nchains)
857 self.extend(models, misfits, bootstraps, sampler_contexts)
859 def update(self):
860 ''' Update history from path '''
861 nmodels_available = get_nmodels(self.path, self.problem)
862 if self.nmodels == nmodels_available:
863 return
865 try:
866 new_models, new_misfits, new_bootstraps, new_sampler_contexts = \
867 load_problem_data(
868 self.path,
869 self.problem,
870 nmodels_skip=self.nmodels,
871 nchains=self.nchains)
873 except ValueError:
874 return
876 self.extend(
877 new_models,
878 new_misfits,
879 new_bootstraps,
880 new_sampler_contexts)
882 def add_listener(self, listener):
883 ''' Add a listener to the history
885 The listening class can implement the following methods:
886 * ``extend``
887 '''
888 self.listeners.append(listener)
890 def emit(self, event_name, *args, **kwargs):
891 for listener in self.listeners:
892 slot = getattr(listener, event_name, None)
893 if callable(slot):
894 slot(*args, **kwargs)
896 @property
897 def attribute_names(self):
898 apath = op.join(self.path, 'attributes')
899 if not os.path.exists(apath):
900 return []
902 return [fn for fn in os.listdir(apath)
903 if StringID.regex.match(fn)]
905 def get_attribute(self, name):
906 if name not in self._attributes:
907 if name not in self.attribute_names:
908 raise NoSuchAttribute(name)
910 path = op.join(self.path, 'attributes', name)
912 with open(path, 'rb') as f:
913 self._attributes[name] = num.fromfile(
914 f, dtype='<i4',
915 count=self.nmodels).astype(int)
917 assert self._attributes[name].shape == (self.nmodels,)
919 return self._attributes[name]
921 def set_attribute(self, name, attribute):
922 if not StringID.regex.match(name):
923 raise InvalidAttributeName(name)
925 attribute = attribute.astype(int)
926 assert attribute.shape == (self.nmodels,)
928 apath = op.join(self.path, 'attributes')
930 if not os.path.exists(apath):
931 os.mkdir(apath)
933 path = op.join(apath, name)
935 with open(path, 'wb') as f:
936 attribute.astype('<i4').tofile(f)
938 self._attributes[name] = attribute
940 def ensure_bootstrap_misfits(self, optimiser):
941 if self.bootstrap_misfits is None:
942 problem = self.problem
943 self.bootstrap_misfits = problem.combine_misfits(
944 self.misfits,
945 extra_weights=optimiser.get_bootstrap_weights(problem),
946 extra_residuals=optimiser.get_bootstrap_residuals(problem))
948 def imodels_by_cluster(self, cluster_attribute):
949 if cluster_attribute is None:
950 return [(-1, 100.0, num.arange(self.nmodels))]
952 by_cluster = []
953 try:
954 iclusters = self.get_attribute(cluster_attribute)
955 iclusters_avail = num.unique(iclusters)
957 for icluster in iclusters_avail:
958 imodels = num.where(iclusters == icluster)[0]
959 by_cluster.append(
960 (icluster,
961 (100.0 * imodels.size) / self.nmodels,
962 imodels))
964 if by_cluster and by_cluster[0][0] == -1:
965 by_cluster.append(by_cluster.pop(0))
967 except NoSuchAttribute:
968 logger.warning(
969 'Attribute %s not set in run %s.\n'
970 ' Skipping model retrieval by clusters.' % (
971 cluster_attribute, self.problem.name))
973 return by_cluster
975 def models_by_cluster(self, cluster_attribute):
976 if cluster_attribute is None:
977 return [(-1, 100.0, self.models)]
979 return [
980 (icluster, percentage, self.models[imodels])
981 for (icluster, percentage, imodels)
982 in self.imodels_by_cluster(cluster_attribute)]
984 def mean_sources_by_cluster(self, cluster_attribute):
985 return [
986 (icluster, percentage, stats.get_mean_source(self.problem, models))
987 for (icluster, percentage, models)
988 in self.models_by_cluster(cluster_attribute)]
990 def get_sorted_misfits_idx(self, chain=0):
991 if chain not in self._sorted_misfit_idx.keys():
992 self._sorted_misfit_idx[chain] = num.argsort(
993 self.bootstrap_misfits[:, chain])
995 return self._sorted_misfit_idx[chain]
997 def get_sorted_misfits(self, chain=0):
998 isort = self.get_sorted_misfits_idx(chain)
999 return self.bootstrap_misfits[:, chain][isort]
1001 def get_sorted_models(self, chain=0):
1002 isort = self.get_sorted_misfits_idx(chain=0)
1003 return self.models[isort, :]
1005 def get_sorted_primary_misfits(self):
1006 return self.get_sorted_misfits(chain=0)
1008 def get_sorted_primary_models(self):
1009 return self.get_sorted_models(chain=0)
1011 def get_best_model(self, chain=0):
1012 return self.get_sorted_models(chain)[0, ...]
1014 def get_best_misfit(self, chain=0):
1015 return self.get_sorted_misfits(chain)[0]
1017 def get_mean_model(self):
1018 return num.mean(self.models, axis=0)
1020 def get_mean_misfit(self, chain=0):
1021 return num.mean(self.bootstrap_misfits[:, chain])
1023 def get_best_source(self, chain=0):
1024 return self.problem.get_source(self.get_best_model(chain))
1026 def get_mean_source(self, chain=0):
1027 return self.problem.get_source(self.get_mean_model())
1029 def get_chain_misfits(self, chain=0):
1030 return self.bootstrap_misfits[:, chain]
1032 def get_primary_chain_misfits(self):
1033 return self.get_chain_misfits(chain=0)
1036def get_nmodels(dirname, problem):
1037 fn = op.join(dirname, 'models')
1038 with open(fn, 'r') as f:
1039 nmodels1 = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
1041 fn = op.join(dirname, 'misfits')
1042 with open(fn, 'r') as f:
1043 nmodels2 = os.fstat(f.fileno()).st_size // (problem.nmisfits * 2 * 8)
1045 return min(nmodels1, nmodels2)
1048def load_problem_info_and_data(dirname, subset=None, nchains=None):
1049 problem = load_problem_info(dirname)
1050 models, misfits, bootstraps, sampler_contexts = load_problem_data(
1051 xjoin(dirname, subset), problem, nchains=nchains)
1052 return problem, models, misfits, bootstraps, sampler_contexts
1055def load_optimiser_info(dirname):
1056 fn = op.join(dirname, 'optimiser.yaml')
1057 return guts.load(filename=fn)
1060def load_problem_info(dirname):
1061 try:
1062 fn = op.join(dirname, 'problem.yaml')
1063 return guts.load(filename=fn)
1064 except OSError as e:
1065 logger.debug(e)
1066 raise ProblemInfoNotAvailable(
1067 'No problem info available (%s).' % dirname)
1070def load_problem_data(dirname, problem, nmodels_skip=0, nchains=None):
1072 def get_chains_fn():
1073 for fn in (op.join(dirname, 'bootstraps'),
1074 op.join(dirname, 'chains')):
1075 if op.exists(fn):
1076 return fn
1077 return False
1079 try:
1080 nmodels = get_nmodels(dirname, problem) - nmodels_skip
1082 fn = op.join(dirname, 'models')
1083 with open(fn, 'r') as f:
1084 f.seek(nmodels_skip * problem.nparameters * 8)
1085 models = num.fromfile(
1086 f, dtype='<f8',
1087 count=nmodels * problem.nparameters)\
1088 .astype(float)
1090 models = models.reshape((nmodels, problem.nparameters))
1092 fn = op.join(dirname, 'misfits')
1093 with open(fn, 'r') as f:
1094 f.seek(nmodels_skip * problem.nmisfits * 2 * 8)
1095 misfits = num.fromfile(
1096 f, dtype='<f8',
1097 count=nmodels*problem.nmisfits*2)\
1098 .astype(float)
1099 misfits = misfits.reshape((nmodels, problem.nmisfits, 2))
1101 chains = None
1102 fn = get_chains_fn()
1103 if fn and nchains is not None:
1104 with open(fn, 'r') as f:
1105 f.seek(nmodels_skip * nchains * 8)
1106 chains = num.fromfile(
1107 f, dtype='<f8',
1108 count=nmodels*nchains)\
1109 .astype(float)
1111 chains = chains.reshape((nmodels, nchains))
1113 sampler_contexts = None
1114 fn = op.join(dirname, 'choices')
1115 if op.exists(fn):
1116 with open(fn, 'r') as f:
1117 f.seek(nmodels_skip * 4 * 8)
1118 sampler_contexts = num.fromfile(
1119 f, dtype='<i8',
1120 count=nmodels*4).astype(int)
1122 sampler_contexts = sampler_contexts.reshape((nmodels, 4))
1124 except OSError as e:
1125 logger.debug(str(e))
1126 raise ProblemDataNotAvailable(
1127 'No problem data available (%s).' % dirname)
1129 return models, misfits, chains, sampler_contexts
1132__all__ = '''
1133 ProblemConfig
1134 Problem
1135 ModelHistory
1136 ProblemInfoNotAvailable
1137 ProblemDataNotAvailable
1138 load_problem_info
1139 load_problem_info_and_data
1140 InvalidAttributeName
1141 NoSuchAttribute
1142'''.split()