Source code for grond.problems.base

'''
Base classes for Grond's problem definition and the model history container.

Common behaviour of all source models offered by Grond is implemented here.
Source model specific details are implemented in the respective submodules.
'''

import numpy as num
import math
import copy
import logging
import os.path as op
import os
import time
import struct
import threading

from pyrocko import gf, util, guts, orthodrome as pod
from pyrocko.guts import Object, String, List, Dict, Int

from grond.meta import ADict, Parameter, GrondError, xjoin, Forbidden, \
    StringID, has_get_plot_classes
from ..targets import MisfitResult, MisfitTarget, TargetGroup, \
    WaveformMisfitTarget, SatelliteMisfitTarget, GNSSCampaignMisfitTarget

from grond import stats

from grond.version import __version__

guts_prefix = 'grond'
logger = logging.getLogger('grond.problems.base')
km = 1e3
as_km = dict(scale_factor=km, scale_unit='km')

g_rstate = num.random.RandomState()


def nextpow2(i):
    return 2**int(math.ceil(math.log(i) / math.log(2.)))


def nextcapacity(i):
    return int(math.ceil(i / 1024) * 1024)


def correlated_weights(values, weight_matrix):
    '''
    Applies correlated weights to values

    The resulting weighed values have to be squared! Check out
    :meth:`Problem.combine_misfits` for more information.

    :param values: Misfits or norms as :class:`numpy.Array`
    :param weight: Weight matrix, commonly the inverse of covariance matrix

    :returns: :class:`numpy.Array` weighted values
    '''
    return num.matmul(values, weight_matrix)


[docs] class ProblemConfig(Object): ''' Base class for config section defining the objective function setup. Factory for :py:class:`Problem` objects. ''' name_template = String.T() norm_exponent = Int.T(default=2) nthreads = Int.T( default=1, optional=True, help='Deprecated: use command line argument or global config to set ' 'number of allowed threads.') def check_deprecations(self): if self.nthreads != 1: logger.warning( 'The `nthreads` parameter in `ProblemConfig` has been ' 'deprecated and is ignored now. Please use the `--nthreads` ' 'command line option to set it.')
[docs] def get_problem(self, event, target_groups, targets): ''' Instantiate the problem with a given event and targets. :returns: :py:class:`Problem` object ''' self.check_deprecations() raise NotImplementedError
[docs] @has_get_plot_classes class Problem(Object): ''' Base class for objective function setup. Defines the *problem* to be solved by the optimiser. ''' name = String.T() ranges = Dict.T(String.T(), gf.Range.T()) dependants = List.T(Parameter.T()) norm_exponent = Int.T(default=2) base_source = gf.Source.T(optional=True) targets = List.T(MisfitTarget.T()) target_groups = List.T(TargetGroup.T()) grond_version = String.T(optional=True) def __init__(self, **kwargs): Object.__init__(self, **kwargs) if self.grond_version is None: self.grond_version = __version__ self._target_weights = None self._engine = None self._family_mask = None self._rstate_manager = None if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms: self.problem_parameters =\ self.problem_parameters + self.problem_waveform_parameters unused_parameters = [] for p in self.problem_parameters: if p.optional and p._name not in self.ranges.keys(): unused_parameters.append(p) for p in unused_parameters: self.problem_parameters.remove(p) self.check() @classmethod def get_plot_classes(cls): from . import plot return plot.get_plot_classes() def check(self): paths = set() for grp in self.target_groups: if grp.path == 'all': continue if grp.path in paths: raise ValueError('Path %s defined more than once! In %s' % (grp.path, grp.__class__.__name__)) paths.add(grp.path) logger.debug('TargetGroup check OK.') def copy(self): o = copy.copy(self) o._target_weights = None return o def set_target_parameter_values(self, x): nprob = len(self.problem_parameters) for target in self.targets: target.set_parameter_values(x[nprob:nprob + target.nparameters]) nprob += target.nparameters def get_parameter_dict(self, model, group=None): params = [(p.name, model[ip]) for ip, p in enumerate(self.parameters) if group in p.groups or group is None] return ADict(params) def get_parameter_array(self, d): arr = num.zeros(self.nparameters, dtype=float) for ip, p in enumerate(self.parameters): if p.name in d.keys(): arr[ip] = d[p.name] return arr def get_parameter_index(self, param_name): return {k.name: ik for ik, k in enumerate(self.parameters)}[param_name] def get_rstate_manager(self): if self._rstate_manager is None: self._rstate_manager = RandomStateManager() return self._rstate_manager def dump_problem_info(self, dirname): fn = op.join(dirname, 'problem.yaml') util.ensuredirs(fn) guts.dump(self, filename=fn) def dump_problem_data( self, dirname, x, misfits, chains=None, sampler_context=None): fn = op.join(dirname, 'models') if not isinstance(x, num.ndarray): x = num.array(x) with open(fn, 'ab') as f: x.astype('<f8').tofile(f) fn = op.join(dirname, 'misfits') with open(fn, 'ab') as f: misfits.astype('<f8').tofile(f) if chains is not None: fn = op.join(dirname, 'chains') with open(fn, 'ab') as f: chains.astype('<f8').tofile(f) if sampler_context is not None: fn = op.join(dirname, 'choices') with open(fn, 'ab') as f: num.array(sampler_context, dtype='<i8').tofile(f) fn = op.join(dirname, 'rstate') self.get_rstate_manager().save_state(fn) def name_to_index(self, name): pnames = [p.name for p in self.combined] return pnames.index(name) @property def parameters(self): target_parameters = [] for target in self.targets: target_parameters.extend(target.target_parameters) return self.problem_parameters + target_parameters @property def parameter_names(self): return [p.name for p in self.combined] @property def dependant_names(self): return [p.name for p in self.dependants] @property def nparameters(self): return len(self.parameters) @property def ntargets(self): return len(self.targets) @property def nwaveform_targets(self): return len(self.waveform_targets) @property def nsatellite_targets(self): return len(self.satellite_targets) @property def ngnss_targets(self): return len(self.gnss_targets) @property def nmisfits(self): nmisfits = 0 for target in self.targets: nmisfits += target.nmisfits return nmisfits @property def ndependants(self): return len(self.dependants) @property def ncombined(self): return len(self.parameters) + len(self.dependants) @property def combined(self): return self.parameters + self.dependants @property def satellite_targets(self): return [t for t in self.targets if isinstance(t, SatelliteMisfitTarget)] @property def gnss_targets(self): return [t for t in self.targets if isinstance(t, GNSSCampaignMisfitTarget)] @property def waveform_targets(self): return [t for t in self.targets if isinstance(t, WaveformMisfitTarget)] @property def has_satellite(self): if self.satellite_targets: return True return False @property def has_waveforms(self): if self.waveform_targets: return True return False def set_engine(self, engine): self._engine = engine def get_engine(self): return self._engine def get_source(self, x): raise NotImplementedError def pack(self, source): raise NotImplementedError def source_to_x(self, source): bs = self.base_source n, e = pod.latlon_to_ne( bs.lat, bs.lon, source.effective_lat, source.effective_lon) source.lat, source.lon = bs.lat, bs.lon source.north_shift = n source.east_shift = e tmin, tmax = self.ranges['time'].start, self.ranges['time'].stop if (source.time - bs.time < tmin) or (source.time - bs.time > tmax): rstatem = self.get_rstate_manager() rstate = rstatem.get_rstate(name='source2x') source.time = bs.time source.time += rstate.uniform(low=tmin, high=tmax, size=1) p = {} for k in self.base_source.keys(): val = source[k] if k == 'time': p[k] = float(val) elif k in self.ranges: val = source[k] if self.ranges[k].relative == 'add': val = self.ranges[k].make_relative( -self.base_source[k], source[k]) elif self.ranges[k].relative == 'mult': val = self.ranges[k].make_relative( 1./self.base_source[k], source[k]) p[k] = float(val) source = source.clone(**p) return self.pack(source) def get_gf_store_ids(self): return tuple(set([t.store_id for t in self.targets])) def get_gf_store(self, target): if not isinstance(target, str): target = target.store_id if self.get_engine() is None: raise GrondError('Cannot get GF Store, modelling is not set up.') return self.get_engine().get_store(target) def random_uniform(self, xbounds, rstate, fixed_magnitude=None): if fixed_magnitude is not None: raise GrondError( 'Setting fixed magnitude in random model generation not ' 'supported for this type of problem.') x = rstate.uniform(0., 1., self.nparameters) x *= (xbounds[:, 1] - xbounds[:, 0]) x += xbounds[:, 0] return x def minimum(self, xbounds, fixed_magnitude=None): if fixed_magnitude is not None: raise GrondError( 'Setting fixed magnitude in random model generation not ' 'supported for this type of problem.') x = xbounds[:, 0] return x def maximum(self, xbounds, fixed_magnitude=None): if fixed_magnitude is not None: raise GrondError( 'Setting fixed magnitude in random model generation not ' 'supported for this type of problem.') x = xbounds[:, 1] return x def preconstrain(self, x, optimizer=None): return x def extract(self, xs, i): if xs.ndim == 1: return self.extract(xs[num.newaxis, :], i)[0] if i < self.nparameters: return xs[:, i] else: return self.make_dependant( xs, self.dependants[i - self.nparameters].name) def get_target_weights(self): if self._target_weights is None: self._target_weights = num.concatenate( [target.get_combined_weight() for target in self.targets]) return self._target_weights def get_target_residuals(self): pass def inter_family_weights(self, ns): exp, root = self.get_norm_functions() family, nfamilies = self.get_family_mask() ws = num.zeros(self.nmisfits) for ifamily in range(nfamilies): mask = family == ifamily ws[mask] = 1.0 / root(num.nansum(exp(ns[mask]))) return ws
[docs] def inter_family_weights2(self, ns): ''' :param ns: 2D array with normalization factors ``ns[imodel, itarget]`` :returns: 2D array ``weights[imodel, itarget]`` ''' exp, root = self.get_norm_functions() family, nfamilies = self.get_family_mask() ws = num.zeros(ns.shape) for ifamily in range(nfamilies): mask = family == ifamily ws[:, mask] = (1.0 / root( num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis] return ws
def get_reference_model(self): model = num.zeros(self.nparameters) model_source_params = self.pack(self.base_source) model[:model_source_params.size] = model_source_params return model def get_parameter_bounds(self): out = [] for p in self.problem_parameters: r = self.ranges[p.name] out.append((r.start, r.stop)) for target in self.targets: for p in target.target_parameters: r = target.target_ranges[p.name_nogroups] out.append((r.start, r.stop)) return num.array(out, dtype=float) def get_dependant_bounds(self): return num.zeros((0, 2)) def get_combined_bounds(self): return num.vstack(( self.get_parameter_bounds(), self.get_dependant_bounds())) def raise_invalid_norm_exponent(self): raise GrondError('Invalid norm exponent: %f' % self.norm_exponent) def get_norm_functions(self): if self.norm_exponent == 2: def sqr(x): return x**2 return sqr, num.sqrt elif self.norm_exponent == 1: def noop(x): return x return noop, num.abs else: self.raise_invalid_norm_exponent()
[docs] def combine_misfits( self, misfits, extra_weights=None, extra_residuals=None, extra_correlated_weights=dict(), get_contributions=False): ''' Combine misfit contributions (residuals) to global or bootstrap misfits :param misfits: 3D array ``misfits[imodel, iresidual, 0]`` are the misfit contributions (residuals) ``misfits[imodel, iresidual, 1]`` are the normalisation contributions. It is also possible to give the misfit and normalisation contributions for a single model as ``misfits[iresidual, 0]`` and misfits[iresidual, 1]`` in which case, the first dimension (imodel) of the result will be stipped off. :param extra_weights: if given, 2D array of extra weights to be applied to the contributions, indexed as ``extra_weights[ibootstrap, iresidual]``. :param extra_residuals: if given, 2D array of perturbations to be added to the residuals, indexed as ``extra_residuals[ibootstrap, iresidual]``. :param extra_correlated_weights: if a dictionary of ``imisfit: correlated weight matrix`` is passed a correlated weight matrix is applied to the misfit and normalisation values. `imisfit` is the starting index in the misfits vector the correlated weight matrix applies to. :param get_contributions: get the weighted and perturbed contributions (don't do the sum). :returns: if no *extra_weights* or *extra_residuals* are given, a 1D array indexed as ``misfits[imodel]`` containing the global misfit for each model is returned, otherwise a 2D array ``misfits[imodel, ibootstrap]`` with the misfit for every model and weighting/residual set is returned. ''' if misfits.ndim == 2: misfits = misfits[num.newaxis, :, :] return self.combine_misfits( misfits, extra_weights, extra_residuals, extra_correlated_weights, get_contributions)[0, ...] if extra_weights is None and extra_residuals is None: return self.combine_misfits( misfits, False, False, extra_correlated_weights, get_contributions)[:, 0] assert misfits.ndim == 3 assert not num.any(extra_weights) or extra_weights.ndim == 2 assert not num.any(extra_residuals) or extra_residuals.ndim == 2 if self.norm_exponent != 2 and extra_correlated_weights: raise GrondError('Correlated weights can only be used ' ' with norm_exponent=2') exp, root = self.get_norm_functions() nmodels = misfits.shape[0] nmisfits = misfits.shape[1] # noqa mf = misfits[:, num.newaxis, :, :].copy() if num.any(extra_residuals): mf = mf + extra_residuals[num.newaxis, :, :, num.newaxis] res = mf[..., 0] norms = mf[..., 1] for imisfit, corr_weight_mat in extra_correlated_weights.items(): jmisfit = imisfit + corr_weight_mat.shape[0] for imodel in range(nmodels): corr_res = res[imodel, :, imisfit:jmisfit] corr_norms = norms[imodel, :, imisfit:jmisfit] res[imodel, :, imisfit:jmisfit] = \ correlated_weights(corr_res, corr_weight_mat) norms[imodel, :, imisfit:jmisfit] = \ correlated_weights(corr_norms, corr_weight_mat) # Apply normalization family weights (these weights depend on # on just calculated correlated norms!) weights_fam = \ self.inter_family_weights2(norms[:, 0, :])[:, num.newaxis, :] weights_fam = exp(weights_fam) res = exp(res) norms = exp(norms) res *= weights_fam norms *= weights_fam weights_tar = self.get_target_weights()[num.newaxis, num.newaxis, :] if num.any(extra_weights): weights_tar = weights_tar * extra_weights[num.newaxis, :, :] weights_tar = exp(weights_tar) res = res * weights_tar norms = norms * weights_tar if get_contributions: return res / num.nansum(norms, axis=2)[:, :, num.newaxis] result = root( num.nansum(res, axis=2) / num.nansum(norms, axis=2)) assert result[result < 0].size == 0 return result
def make_family_mask(self): family_names = set() families = num.zeros(self.nmisfits, dtype=int) idx = 0 for itarget, target in enumerate(self.targets): family_names.add(target.normalisation_family) families[idx:idx + target.nmisfits] = len(family_names) - 1 idx += target.nmisfits return families, len(family_names) def get_family_mask(self): if self._family_mask is None: self._family_mask = self.make_family_mask() return self._family_mask def evaluate(self, x, mask=None, result_mode='full', targets=None): source = self.get_source(x) engine = self.get_engine() self.set_target_parameter_values(x) if mask is not None and targets is not None: raise ValueError('Mask cannot be defined with targets set.') targets = targets if targets is not None else self.targets for target in targets: target.set_result_mode(result_mode) modelling_targets = [] t2m_map = {} for itarget, target in enumerate(targets): t2m_map[target] = target.prepare_modelling(engine, source, targets) if mask is None or mask[itarget]: modelling_targets.extend(t2m_map[target]) u2m_map = {} for imtarget, mtarget in enumerate(modelling_targets): if mtarget not in u2m_map: u2m_map[mtarget] = [] u2m_map[mtarget].append(imtarget) modelling_targets_unique = list(u2m_map.keys()) resp = engine.process(source, modelling_targets_unique) modelling_results_unique = list(resp.results_list[0]) modelling_results = [None] * len(modelling_targets) for mtarget, mresult in zip( modelling_targets_unique, modelling_results_unique): for itarget in u2m_map[mtarget]: modelling_results[itarget] = mresult imt = 0 results = [] for itarget, target in enumerate(targets): nmt_this = len(t2m_map[target]) if mask is None or mask[itarget]: result = target.finalize_modelling( engine, source, t2m_map[target], modelling_results[imt:imt + nmt_this]) imt += nmt_this else: result = gf.SeismosizerError( 'target was excluded from modelling') results.append(result) return results def misfits(self, x, mask=None): results = self.evaluate(x, mask=mask, result_mode='sparse') misfits = num.full((self.nmisfits, 2), num.nan) imisfit = 0 for target, result in zip(self.targets, results): if isinstance(result, MisfitResult): misfits[imisfit:imisfit + target.nmisfits, :] = result.misfits imisfit += target.nmisfits return misfits def forward(self, x): source = self.get_source(x) engine = self.get_engine() plain_targets = [] for target in self.targets: plain_targets.extend(target.get_plain_targets(engine, source)) resp = engine.process(source, plain_targets) results = [] for target, result in zip(plain_targets, resp.results_list[0]): if isinstance(result, gf.SeismosizerError): logger.debug( '%s.%s.%s.%s: %s' % (target.codes + (str(result),))) else: results.append(result) return results def get_random_model(self, ntries_limit=100): xbounds = self.get_parameter_bounds() for _ in range(ntries_limit): x = self.random_uniform(xbounds, rstate=g_rstate) try: return self.preconstrain(x) except Forbidden: pass raise GrondError( 'Could not find any suitable candidate sample within %i tries' % ( ntries_limit)) def get_min_model(self, ntries_limit=100): xbounds = self.get_parameter_bounds() for _ in range(ntries_limit): x = self.minimum(xbounds) try: return self.preconstrain(x) except Forbidden: pass raise GrondError( 'Could not find any suitable candidate sample within %i tries' % ( ntries_limit)) def get_max_model(self, ntries_limit=100): xbounds = self.get_parameter_bounds() for _ in range(ntries_limit): x = self.maximum(xbounds) try: return self.preconstrain(x) except Forbidden: pass raise GrondError( 'Could not find any suitable candidate sample within %i tries' % ( ntries_limit))
[docs] class ProblemInfoNotAvailable(GrondError): pass
[docs] class ProblemDataNotAvailable(GrondError): pass
[docs] class NoSuchAttribute(GrondError): pass
[docs] class InvalidAttributeName(GrondError): pass
[docs] class ModelHistory(object): ''' Write, read and follow sequences of models produced in an optimisation run. :param problem: :class:`grond.Problem` instance :param path: path to rundir, defaults to None :type path: str, optional :param mode: open mode, 'r': read, 'w': write :type mode: str, optional ''' nmodels_capacity_min = 1024 def __init__(self, problem, nchains=None, path=None, mode='r'): self.mode = mode self.problem = problem self.path = path self.nchains = nchains self._models_buffer = None self._misfits_buffer = None self._bootstraps_buffer = None self._sample_contexts_buffer = None self._sorted_misfit_idx = {} self.models = None self.misfits = None self.bootstrap_misfits = None self.sampler_contexts = None self.nmodels_capacity = self.nmodels_capacity_min self.listeners = [] self._attributes = {} if mode == 'r': self.load() @staticmethod def verify_rundir(rundir): _rundir_files = ('misfits', 'models') if not op.exists(rundir): raise ProblemDataNotAvailable( 'Directory does not exist: %s' % rundir) for f in _rundir_files: if not op.exists(op.join(rundir, f)): raise ProblemDataNotAvailable('File not found: %s' % f)
[docs] @classmethod def follow(cls, path, nchains=None, wait=20.): ''' Start following a rundir (constructor). :param path: the path to follow, a grond rundir :type path: str, optional :param wait: wait time until the folder become alive :type wait: number in seconds, optional :returns: A :py:class:`ModelHistory` instance ''' start_watch = time.time() while (time.time() - start_watch) < wait: try: cls.verify_rundir(path) problem = load_problem_info(path) return cls(problem, nchains=nchains, path=path, mode='r') except (ProblemDataNotAvailable, OSError): time.sleep(.25)
@property def nmodels(self): if self.models is None: return 0 else: return self.models.shape[0] @nmodels.setter def nmodels(self, nmodels_new): assert 0 <= nmodels_new <= self.nmodels self.models = self._models_buffer[:nmodels_new, :] self.misfits = self._misfits_buffer[:nmodels_new, :, :] if self.nchains is not None: self.bootstrap_misfits = self._bootstraps_buffer[:nmodels_new, :, :] # noqa if self._sample_contexts_buffer is not None: self.sampler_contexts = self._sample_contexts_buffer[:nmodels_new, :] # noqa @property def nmodels_capacity(self): if self._models_buffer is None: return 0 else: return self._models_buffer.shape[0] @nmodels_capacity.setter def nmodels_capacity(self, nmodels_capacity_new): if self.nmodels_capacity != nmodels_capacity_new: models_buffer = num.zeros( (nmodels_capacity_new, self.problem.nparameters), dtype=float) misfits_buffer = num.zeros( (nmodels_capacity_new, self.problem.nmisfits, 2), dtype=float) sample_contexts_buffer = num.zeros( (nmodels_capacity_new, 4), dtype=int) sample_contexts_buffer.fill(-1) if self.nchains is not None: bootstraps_buffer = num.zeros( (nmodels_capacity_new, self.nchains), dtype=float) ncopy = min(self.nmodels, nmodels_capacity_new) if self._models_buffer is not None: models_buffer[:ncopy, :] = \ self._models_buffer[:ncopy, :] misfits_buffer[:ncopy, :, :] = \ self._misfits_buffer[:ncopy, :, :] sample_contexts_buffer[:ncopy, :] = \ self._sample_contexts_buffer[:ncopy, :] self._models_buffer = models_buffer self._misfits_buffer = misfits_buffer self._sample_contexts_buffer = sample_contexts_buffer if self.nchains is not None: if self._bootstraps_buffer is not None: bootstraps_buffer[:ncopy, :] = \ self._bootstraps_buffer[:ncopy, :] self._bootstraps_buffer = bootstraps_buffer def clear(self): assert self.mode != 'r', 'History is read-only, cannot clear.' self.nmodels = 0 self.nmodels_capacity = self.nmodels_capacity_min def extend( self, models, misfits, bootstrap_misfits=None, sampler_contexts=None): nmodels = self.nmodels n = models.shape[0] nmodels_capacity_want = max( self.nmodels_capacity_min, nextpow2(nmodels + n)) if nmodels_capacity_want != self.nmodels_capacity: self.nmodels_capacity = nmodels_capacity_want self._models_buffer[nmodels:nmodels + n, :] = models self._misfits_buffer[nmodels:nmodels + n, :, :] = misfits self.models = self._models_buffer[:nmodels + n, :] self.misfits = self._misfits_buffer[:nmodels + n, :, :] if bootstrap_misfits is not None: self._bootstraps_buffer[nmodels:nmodels + n, :] = bootstrap_misfits self.bootstrap_misfits = self._bootstraps_buffer[:nmodels + n, :] if sampler_contexts is not None: self._sample_contexts_buffer[nmodels:nmodels + n, :] \ = sampler_contexts self.sampler_contexts = self._sample_contexts_buffer[ :nmodels + n, :] if self.path and self.mode == 'w': for i in range(n): self.problem.dump_problem_data( self.path, models[i, :], misfits[i, :, :], bootstrap_misfits[i, :] if bootstrap_misfits is not None else None, sampler_contexts[i, :] if sampler_contexts is not None else None) self._sorted_misfit_idx.clear() self.emit('extend', nmodels, n, models, misfits, sampler_contexts) def append( self, model, misfits, bootstrap_misfits=None, sampler_context=None): if bootstrap_misfits is not None: bootstrap_misfits = bootstrap_misfits[num.newaxis, :] if sampler_context is not None: sampler_context = sampler_context[num.newaxis, :] return self.extend( model[num.newaxis, :], misfits[num.newaxis, :, :], bootstrap_misfits, sampler_context) def load(self): self.mode = 'r' self.verify_rundir(self.path) models, misfits, bootstraps, sampler_contexts = load_problem_data( self.path, self.problem, nchains=self.nchains) self.extend(models, misfits, bootstraps, sampler_contexts)
[docs] def update(self): ''' Update history from path ''' nmodels_available = get_nmodels(self.path, self.problem) if self.nmodels == nmodels_available: return try: new_models, new_misfits, new_bootstraps, new_sampler_contexts = \ load_problem_data( self.path, self.problem, nmodels_skip=self.nmodels, nchains=self.nchains) except ValueError: return self.extend( new_models, new_misfits, new_bootstraps, new_sampler_contexts)
[docs] def add_listener(self, listener): ''' Add a listener to the history The listening class can implement the following methods: * ``extend`` ''' self.listeners.append(listener)
def emit(self, event_name, *args, **kwargs): for listener in self.listeners: slot = getattr(listener, event_name, None) if callable(slot): slot(*args, **kwargs) @property def attribute_names(self): apath = op.join(self.path, 'attributes') if not os.path.exists(apath): return [] return [fn for fn in os.listdir(apath) if StringID.regex.match(fn)] def get_attribute(self, name): if name not in self._attributes: if name not in self.attribute_names: raise NoSuchAttribute(name) path = op.join(self.path, 'attributes', name) with open(path, 'rb') as f: self._attributes[name] = num.fromfile( f, dtype='<i4', count=self.nmodels).astype(int) assert self._attributes[name].shape == (self.nmodels,) return self._attributes[name] def set_attribute(self, name, attribute): if not StringID.regex.match(name): raise InvalidAttributeName(name) attribute = attribute.astype(int) assert attribute.shape == (self.nmodels,) apath = op.join(self.path, 'attributes') if not os.path.exists(apath): os.mkdir(apath) path = op.join(apath, name) with open(path, 'wb') as f: attribute.astype('<i4').tofile(f) self._attributes[name] = attribute def ensure_bootstrap_misfits(self, optimiser): if self.bootstrap_misfits is None: problem = self.problem self.bootstrap_misfits = problem.combine_misfits( self.misfits, extra_weights=optimiser.get_bootstrap_weights(problem), extra_residuals=optimiser.get_bootstrap_residuals(problem)) def imodels_by_cluster(self, cluster_attribute): if cluster_attribute is None: return [(-1, 100.0, num.arange(self.nmodels))] by_cluster = [] try: iclusters = self.get_attribute(cluster_attribute) iclusters_avail = num.unique(iclusters) for icluster in iclusters_avail: imodels = num.where(iclusters == icluster)[0] by_cluster.append( (icluster, (100.0 * imodels.size) / self.nmodels, imodels)) if by_cluster and by_cluster[0][0] == -1: by_cluster.append(by_cluster.pop(0)) except NoSuchAttribute: logger.warning( 'Attribute %s not set in run %s.\n' ' Skipping model retrieval by clusters.' % ( cluster_attribute, self.problem.name)) return by_cluster def models_by_cluster(self, cluster_attribute): if cluster_attribute is None: return [(-1, 100.0, self.models)] return [ (icluster, percentage, self.models[imodels]) for (icluster, percentage, imodels) in self.imodels_by_cluster(cluster_attribute)] def mean_sources_by_cluster(self, cluster_attribute): return [ (icluster, percentage, stats.get_mean_source(self.problem, models)) for (icluster, percentage, models) in self.models_by_cluster(cluster_attribute)] def get_sorted_misfits_idx(self, chain=0): if chain not in self._sorted_misfit_idx.keys(): self._sorted_misfit_idx[chain] = num.argsort( self.bootstrap_misfits[:, chain]) return self._sorted_misfit_idx[chain] def get_sorted_misfits(self, chain=0): isort = self.get_sorted_misfits_idx(chain) return self.bootstrap_misfits[:, chain][isort] def get_sorted_models(self, chain=0): isort = self.get_sorted_misfits_idx(chain=0) return self.models[isort, :] def get_sorted_primary_misfits(self): return self.get_sorted_misfits(chain=0) def get_sorted_primary_models(self): return self.get_sorted_models(chain=0) def get_best_model(self, chain=0): return self.get_sorted_models(chain)[0, ...] def get_best_misfit(self, chain=0): return self.get_sorted_misfits(chain)[0] def get_mean_model(self): return num.mean(self.models, axis=0) def get_mean_misfit(self, chain=0): return num.mean(self.bootstrap_misfits[:, chain]) def get_best_source(self, chain=0): return self.problem.get_source(self.get_best_model(chain)) def get_mean_source(self, chain=0): return self.problem.get_source(self.get_mean_model()) def get_chain_misfits(self, chain=0): return self.bootstrap_misfits[:, chain] def get_primary_chain_misfits(self): return self.get_chain_misfits(chain=0)
class RandomStateManager(object): MAX_LEN = 64 save_struct = struct.Struct('%ds7s2496sqqd' % MAX_LEN) def __init__(self): self.rstates = {} self.lock = threading.Lock() def get_rstate(self, name, seed=None): assert len(name) <= self.MAX_LEN if name not in self.rstates: self.rstates[name] = num.random.RandomState(seed) return self.rstates[name] @property def nstates(self): return len(self.rstates) def save_state(self, fname): with self.lock: with open(fname, 'wb') as f: for name, rstate in self.rstates.items(): s, arr, pos, has_gauss, chached_gauss = rstate.get_state() f.write( self.save_struct.pack( name.encode(), s.encode(), arr.tobytes(), pos, has_gauss, chached_gauss)) def load_state(self, fname): with self.lock: with open(fname, 'rb') as f: while True: buff = f.read(self.save_struct.size) if not buff: break name, s, arr, pos, has_gauss, chached_gauss = \ self.save_struct.unpack(buff) name = name.replace(b'\x00', b'').decode() s = s.replace(b'\x00', b'').decode() arr = num.frombuffer(arr, dtype=num.uint32) rstate = num.random.RandomState() rstate.set_state((s, arr, pos, has_gauss, chached_gauss)) self.rstates[name] = rstate def get_nmodels(dirname, problem): fn = op.join(dirname, 'models') with open(fn, 'r') as f: nmodels1 = os.fstat(f.fileno()).st_size // (problem.nparameters * 8) fn = op.join(dirname, 'misfits') with open(fn, 'r') as f: nmodels2 = os.fstat(f.fileno()).st_size // (problem.nmisfits * 2 * 8) return min(nmodels1, nmodels2) def load_problem_info_and_data(dirname, subset=None, nchains=None): problem = load_problem_info(dirname) models, misfits, bootstraps, sampler_contexts = load_problem_data( xjoin(dirname, subset), problem, nchains=nchains) return problem, models, misfits, bootstraps, sampler_contexts def load_optimiser_info(dirname): fn = op.join(dirname, 'optimiser.yaml') return guts.load(filename=fn) def load_problem_info(dirname): try: fn = op.join(dirname, 'problem.yaml') return guts.load(filename=fn) except OSError as e: logger.debug(e) raise ProblemInfoNotAvailable( 'No problem info available (%s).' % dirname) def load_problem_data(dirname, problem, nmodels_skip=0, nchains=None): def get_chains_fn(): for fn in (op.join(dirname, 'bootstraps'), op.join(dirname, 'chains')): if op.exists(fn): return fn return False try: nmodels = get_nmodels(dirname, problem) - nmodels_skip fn = op.join(dirname, 'models') offset = nmodels_skip * problem.nparameters * 8 models = num.memmap( fn, dtype='<f8', offset=offset, shape=(nmodels, problem.nparameters))\ .astype(float, copy=False) fn = op.join(dirname, 'misfits') offset = nmodels_skip * problem.nmisfits * 2 * 8 misfits = num.memmap( fn, dtype='<f8', offset=offset, shape=(nmodels, problem.nmisfits, 2))\ .astype(float, copy=False) chains = None fn = get_chains_fn() if fn and nchains is not None: offset = nmodels_skip * nchains * 8 chains = num.memmap( fn, dtype='<f8', offset=offset, shape=(nmodels, nchains))\ .astype(float, copy=False) sampler_contexts = None fn = op.join(dirname, 'choices') if op.exists(fn): offset = nmodels_skip * 4 * 8 sampler_contexts = num.memmap( fn, dtype='<i8', offset=offset, shape=(nmodels, 4))\ .astype(int, copy=False) fn = op.join(dirname, 'rstate') if op.exists(fn): problem.get_rstate_manager().load_state(fn) except OSError as e: logger.debug(str(e)) raise ProblemDataNotAvailable( 'No problem data available (%s).' % dirname) return models, misfits, chains, sampler_contexts __all__ = ''' ProblemConfig Problem ModelHistory ProblemInfoNotAvailable ProblemDataNotAvailable load_problem_info load_problem_info_and_data InvalidAttributeName NoSuchAttribute '''.split()