Coverage for /usr/local/lib/python3.11/dist-packages/grond/core.py: 52%
523 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 18:31 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 18:31 +0000
1from __future__ import print_function
3import sys
4import logging
5import time
6import copy
7import shutil
8import glob
9import math
10import os
11import numpy as num
12from contextlib import contextmanager
14from pyrocko.guts import Object, String, Float, List
15from pyrocko import gf, trace, guts, util, weeding
16from pyrocko import parimap, model, marker as pmarker
18from .dataset import NotFound, InvalidObject
19from .problems.base import Problem, load_problem_info_and_data, \
20 load_problem_data, ProblemDataNotAvailable
22from .optimisers.base import BadProblem
23from .targets.waveform.target import WaveformMisfitResult
24from .targets.base import dump_misfit_result_collection, \
25 MisfitResultCollection, MisfitResult, MisfitResultError
26from .meta import expand_template, GrondError, selected
27from .environment import Environment
28from .monitor import GrondMonitor
29from .config import get_global_config
31logger = logging.getLogger('grond.core')
32guts_prefix = 'grond'
33op = os.path
36class RingBuffer(num.ndarray):
37 def __new__(cls, *args, **kwargs):
38 cls = num.ndarray.__new__(cls, *args, **kwargs)
39 cls.fill(0.)
40 return cls
42 def __init__(self, *args, **kwargs):
43 self.pos = 0
45 def put(self, value):
46 self[self.pos] = value
47 self.pos += 1
48 self.pos %= self.size
51def mahalanobis_distance(xs, mx, cov):
52 imask = num.diag(cov) != 0.
53 icov = num.linalg.inv(cov[imask, :][:, imask])
54 temp = xs[:, imask] - mx[imask]
55 return num.sqrt(num.sum(temp * num.dot(icov, temp.T).T, axis=1))
58@contextmanager
59def lock_rundir(rundir):
60 statefn = op.join(rundir, '.running')
61 if op.exists(statefn):
62 raise EnvironmentError('file %s already exists!' % statefn)
63 try:
64 with open(statefn, 'w') as f:
65 f.write('')
66 yield True
67 finally:
68 os.remove(statefn)
71class DirectoryAlreadyExists(GrondError):
72 pass
75def weed(origin, targets, limit, neighborhood=3):
77 azimuths = num.zeros(len(targets))
78 dists = num.zeros(len(targets))
79 for i, target in enumerate(targets):
80 _, azimuths[i] = target.azibazi_to(origin)
81 dists[i] = target.distance_to(origin)
83 badnesses = num.ones(len(targets), dtype=float)
84 deleted, meandists_kept = weeding.weed(
85 azimuths, dists, badnesses,
86 nwanted=limit,
87 neighborhood=neighborhood)
89 targets_weeded = [
90 target for (delete, target) in zip(deleted, targets) if not delete]
92 return targets_weeded, meandists_kept, deleted
95def sarr(a):
96 return ' '.join('%15g' % x for x in a)
99def forward(env, show='filtered'):
100 payload = []
101 if env.have_rundir():
102 env.setup_modelling()
103 history = env.get_history(subset='harvest')
104 xbest = history.get_best_model()
105 problem = env.get_problem()
106 ds = env.get_dataset()
107 payload.append((ds, problem, xbest))
109 else:
110 for event_name in env.get_selected_event_names():
111 env.set_current_event_name(event_name)
112 env.setup_modelling()
113 problem = env.get_problem()
114 ds = env.get_dataset()
115 xref = problem.preconstrain(problem.get_reference_model())
116 payload.append((ds, problem, xref))
118 all_trs = []
119 events = []
120 stations = {}
121 for (ds, problem, x) in payload:
122 results = problem.evaluate(x)
124 event = problem.get_source(x).pyrocko_event()
125 events.append(event)
127 for result in results:
128 if isinstance(result, WaveformMisfitResult):
129 if show == 'filtered':
130 result.filtered_obs.set_codes(location='ob')
131 result.filtered_syn.set_codes(location='sy')
132 all_trs.append(result.filtered_obs)
133 all_trs.append(result.filtered_syn)
134 elif show == 'processed':
135 result.processed_obs.set_codes(location='ob')
136 result.processed_syn.set_codes(location='sy')
137 all_trs.append(result.processed_obs)
138 all_trs.append(result.processed_syn)
139 else:
140 raise ValueError('Invalid argument for show: %s' % show)
142 for station in ds.get_stations():
143 stations[station.nsl()] = station
145 markers = []
146 for ev in events:
147 markers.append(pmarker.EventMarker(ev))
149 trace.snuffle(all_trs, markers=markers, stations=list(stations.values()))
152def harvest(
153 rundir, problem=None, nbest=10, force=False, weed=0,
154 export_fits=[]):
156 env = Environment([rundir])
157 optimiser = env.get_optimiser()
158 nchains = env.get_optimiser().nchains
160 if problem is None:
161 problem, xs, misfits, bootstrap_misfits, _ = \
162 load_problem_info_and_data(rundir, nchains=nchains)
163 else:
164 xs, misfits, bootstrap_misfits, _ = \
165 load_problem_data(rundir, problem, nchains=nchains)
167 logger.info('Harvesting problem "%s"...' % problem.name)
169 dumpdir = op.join(rundir, 'harvest')
170 if op.exists(dumpdir):
171 if force:
172 shutil.rmtree(dumpdir)
173 else:
174 raise DirectoryAlreadyExists(
175 'Harvest directory already exists: %s' % dumpdir)
177 util.ensuredir(dumpdir)
179 ibests_list = []
180 ibests = []
181 gms = bootstrap_misfits[:, 0]
182 isort = num.argsort(gms)
184 ibests_list.append(isort[:nbest])
186 if weed != 3:
187 for ibootstrap in range(optimiser.nbootstrap):
188 bms = bootstrap_misfits[:, ibootstrap]
189 isort = num.argsort(bms)
190 ibests_list.append(isort[:nbest])
191 ibests.append(isort[0])
193 if weed:
194 mean_gm_best = num.median(gms[ibests])
195 std_gm_best = num.std(gms[ibests])
196 ibad = set()
198 for ibootstrap, ibest in enumerate(ibests):
199 if gms[ibest] > mean_gm_best + std_gm_best:
200 ibad.add(ibootstrap)
202 ibests_list = [
203 ibests_ for (ibootstrap, ibests_) in enumerate(ibests_list)
204 if ibootstrap not in ibad]
206 ibests = num.concatenate(ibests_list)
208 if weed == 2:
209 ibests = ibests[gms[ibests] < mean_gm_best]
211 for i in ibests:
212 problem.dump_problem_data(dumpdir, xs[i], misfits[i, :, :])
214 if export_fits:
215 env.setup_modelling()
216 problem = env.get_problem()
217 history = env.get_history(subset='harvest')
219 for what in export_fits:
220 if what == 'best':
221 models = [history.get_best_model()]
222 elif what == 'mean':
223 models = [history.get_mean_model()]
224 elif what == 'ensemble':
225 models = history.models
226 else:
227 raise GrondError(
228 'Invalid option for harvest\'s export_fits argument: %s'
229 % what)
231 results = []
232 for x in models:
233 results.append([
234 (result if isinstance(result, MisfitResult)
235 else MisfitResultError(message=str(result))) for
236 result in problem.evaluate(x)])
238 emr = MisfitResultCollection(results=results)
240 dump_misfit_result_collection(
241 emr,
242 op.join(dumpdir, 'fits-%s.yaml' % what))
244 logger.info('Done harvesting problem "%s".' % problem.name)
247def cluster(rundir, clustering, metric):
248 env = Environment([rundir])
249 history = env.get_history(subset='harvest')
250 problem = history.problem
251 models = history.models
253 events = [problem.get_source(model).pyrocko_event() for model in models]
255 from grond.clustering import metrics
257 if metric not in metrics.metrics:
258 raise GrondError('Unknown metric: %s' % metric)
260 mat = metrics.compute_similarity_matrix(events, metric)
262 clusters = clustering.perform(mat)
264 labels = num.sort(num.unique(clusters))
265 bins = num.concatenate((labels, [labels[-1]+1]))
266 ns = num.histogram(clusters, bins)[0]
268 history.set_attribute('cluster', clusters)
270 for i in range(labels.size):
271 if labels[i] == -1:
272 logging.info(
273 'Number of unclustered events: %5i' % ns[i])
274 else:
275 logging.info(
276 'Number of events in cluster %i: %5i' % (labels[i], ns[i]))
279def get_event_names(config):
280 return config.get_event_names()
283def check_problem(problem):
284 if len(problem.targets) == 0:
285 raise GrondError('No targets available')
288def check(
289 config,
290 event_names=None,
291 target_string_ids=None,
292 show_waveforms=False,
293 n_random_synthetics=10,
294 stations_used_path=None):
296 markers = []
297 stations_used = {}
298 erroneous = []
299 for ievent, event_name in enumerate(event_names):
300 ds = config.get_dataset(event_name)
301 event = ds.get_event()
302 trs_all = []
303 try:
304 problem = config.get_problem(event)
306 _, nfamilies = problem.get_family_mask()
307 logger.info('Problem: %s' % problem.name)
308 logger.info('Number of target families: %i' % nfamilies)
309 logger.info('Number of targets (total): %i' % len(problem.targets))
311 if target_string_ids:
312 problem.targets = [
313 target for target in problem.targets
314 if util.match_nslc(target_string_ids, target.string_id())]
316 logger.info(
317 'Number of targets (selected): %i' % len(problem.targets))
319 check_problem(problem)
321 results_list = []
322 sources = []
323 if n_random_synthetics == 0:
324 x = problem.preconstrain(problem.get_reference_model())
325 sources.append(problem.base_source)
326 results = problem.evaluate(x)
327 results_list.append(results)
329 else:
330 for i in range(n_random_synthetics):
331 x = problem.get_random_model()
332 sources.append(problem.get_source(x))
333 results = problem.evaluate(x)
334 results_list.append(results)
336 if show_waveforms:
337 engine = config.engine_config.get_engine()
338 times = []
339 tdata = []
340 for target in problem.targets:
341 tobs_shift_group = []
342 tcuts = []
343 for source in sources:
344 tmin_fit, tmax_fit, tfade, tfade_taper = \
345 target.get_taper_params(engine, source)
347 times.extend((tmin_fit-tfade*2., tmax_fit+tfade*2.))
349 tobs, tsyn = target.get_pick_shift(engine, source)
350 if None not in (tobs, tsyn):
351 tobs_shift = tobs - tsyn
352 else:
353 tobs_shift = 0.0
355 tcuts.append(target.get_cutout_timespan(
356 tmin_fit+tobs_shift, tmax_fit+tobs_shift, tfade))
358 tobs_shift_group.append(tobs_shift)
360 tcuts = num.array(tcuts, dtype=float)
362 tdata.append((
363 tfade,
364 num.mean(tobs_shift_group),
365 (num.min(tcuts[:, 0]), num.max(tcuts[:, 1]))))
367 tmin = min(times)
368 tmax = max(times)
370 tmax += (tmax-tmin)*2
372 for (tfade, tobs_shift, tcut), target in zip(
373 tdata, problem.targets):
375 store = engine.get_store(target.store_id)
377 deltat = store.config.deltat
379 freqlimits = list(target.get_freqlimits())
380 freqlimits[2] = 0.45/deltat
381 freqlimits[3] = 0.5/deltat
382 freqlimits = tuple(freqlimits)
384 try:
385 trs_projected, trs_restituted, trs_raw, _ = \
386 ds.get_waveform(
387 target.codes,
388 tmin=tmin+tobs_shift,
389 tmax=tmax+tobs_shift,
390 tfade=tfade,
391 freqlimits=freqlimits,
392 deltat=deltat,
393 backazimuth=target.
394 get_backazimuth_for_waveform(),
395 debug=True)
397 except NotFound as e:
398 logger.warning(str(e))
399 continue
401 trs_projected = copy.deepcopy(trs_projected)
402 trs_restituted = copy.deepcopy(trs_restituted)
403 trs_raw = copy.deepcopy(trs_raw)
405 for trx in trs_projected + trs_restituted + trs_raw:
406 trx.shift(-tobs_shift)
407 trx.set_codes(
408 network='',
409 station=target.string_id(),
410 location='')
412 for trx in trs_projected:
413 trx.set_codes(location=trx.location + '2_proj')
415 for trx in trs_restituted:
416 trx.set_codes(location=trx.location + '1_rest')
418 for trx in trs_raw:
419 trx.set_codes(location=trx.location + '0_raw')
421 trs_all.extend(trs_projected)
422 trs_all.extend(trs_restituted)
423 trs_all.extend(trs_raw)
425 for source in sources:
426 tmin_fit, tmax_fit, tfade, tfade_taper = \
427 target.get_taper_params(engine, source)
429 markers.append(pmarker.Marker(
430 nslc_ids=[('', target.string_id(), '*_proj', '*')],
431 tmin=tmin_fit, tmax=tmax_fit))
433 markers.append(pmarker.Marker(
434 nslc_ids=[('', target.string_id(), '*_raw', '*')],
435 tmin=tcut[0]-tobs_shift, tmax=tcut[1]-tobs_shift,
436 kind=1))
438 else:
439 for itarget, target in enumerate(problem.targets):
441 nok = 0
442 for results in results_list:
443 result = results[itarget]
444 if not isinstance(result, gf.SeismosizerError):
445 nok += 1
447 if nok == 0:
448 sok = 'not used'
449 elif nok == len(results_list):
450 sok = 'ok'
451 try:
452 s = ds.get_station(target)
453 stations_used[s.nsl()] = s
454 except (NotFound, InvalidObject):
455 pass
456 else:
457 sok = 'not used (%i/%i ok)' % (nok, len(results_list))
459 logger.info('%-40s %s' % (
460 (target.string_id() + ':', sok)))
462 except GrondError as e:
463 logger.error('Event %i, "%s": %s' % (
464 ievent,
465 event.name or util.time_to_str(event.time),
466 str(e)))
468 erroneous.append(event)
470 if show_waveforms:
471 trace.snuffle(trs_all, stations=ds.get_stations(), markers=markers)
473 if stations_used_path:
474 stations = list(stations_used.values())
475 stations.sort(key=lambda s: s.nsl())
476 model.dump_stations(stations, stations_used_path)
478 if erroneous:
479 raise GrondError(
480 'Check failed for events: %s'
481 % ', '.join(ev.name for ev in erroneous))
484g_state = {}
487def go(environment, force=False, preserve=False):
489 global_config = get_global_config()
491 g_data = (environment, force, preserve, global_config)
492 g_state[id(g_data)] = g_data
494 nevents = environment.nevents_selected
495 for x in parimap.parimap(
496 process_event,
497 range(environment.nevents_selected),
498 [id(g_data)] * nevents,
499 nprocs=global_config.nparallel):
501 pass
504def process_event(ievent, g_data_id):
506 environment, force, preserve, global_config = g_state[g_data_id]
508 config = environment.get_config()
509 event_name = environment.get_selected_event_names()[ievent]
510 nevents = environment.nevents_selected
512 ds = config.get_dataset(event_name)
513 event = ds.get_event()
514 problem = config.get_problem(event)
516 tstart = time.time()
517 monitor = None
518 rundir = None
519 try:
521 synt = ds.synthetic_test
522 if synt:
523 problem.base_source = problem.get_source(synt.get_x())
525 check_problem(problem)
527 rundir = expand_template(
528 config.rundir_template,
529 dict(problem_name=problem.name))
530 environment.set_rundir_path(rundir)
532 if op.exists(rundir):
533 if preserve:
534 nold_rundirs = len(glob.glob(rundir + '*'))
535 shutil.move(rundir, rundir+'-old-%d' % (nold_rundirs))
536 elif force:
537 shutil.rmtree(rundir)
538 else:
539 logger.warning(
540 'Skipping problem "%s": rundir already exists: %s' % (
541 problem.name, rundir))
542 return
544 util.ensuredir(rundir)
546 logger.info(
547 'Starting event %i / %i' % (ievent+1, nevents))
549 logger.info('Rundir: %s' % rundir)
551 logger.info('Analysing problem "%s".' % problem.name)
553 for analyser_conf in config.analyser_configs:
554 analyser = analyser_conf.get_analyser()
555 analyser.analyse(problem, ds)
557 basepath = config.get_basepath()
558 config.change_basepath(rundir)
559 guts.dump(config, filename=op.join(rundir, 'config.yaml'))
560 config.change_basepath(basepath)
562 optimiser = config.optimiser_config.get_optimiser()
564 optimiser.init_bootstraps(problem)
565 problem.dump_problem_info(rundir)
567 xs_inject = None
568 synt = ds.synthetic_test
569 if synt and synt.inject_solution:
570 xs_inject = synt.get_x()[num.newaxis, :]
572 if xs_inject is not None:
573 from .optimisers import highscore
574 if not isinstance(optimiser, highscore.HighScoreOptimiser):
575 raise GrondError(
576 'Optimiser does not support injections.')
578 optimiser.sampler_phases[0:0] = [
579 highscore.InjectionSamplerPhase(xs_inject=xs_inject)]
581 gconf = get_global_config()
582 with lock_rundir(rundir):
583 if gconf.status == 'state':
584 monitor = GrondMonitor.watch(rundir)
585 optimiser.optimise(
586 problem,
587 rundir=rundir)
589 harvest(rundir, problem, force=True)
591 except BadProblem as e:
592 logger.error(str(e))
594 except GrondError as e:
595 logger.error(str(e))
597 finally:
598 if monitor:
599 monitor.terminate()
601 tstop = time.time()
602 logger.info(
603 'Stop %i / %i (%g min)' % (ievent+1, nevents, (tstop - tstart)/60.))
605 if rundir:
606 logger.info(
607 'Done with problem "%s", rundir is "%s".' % (problem.name, rundir))
610class ParameterStats(Object):
611 name = String.T()
612 mean = Float.T()
613 std = Float.T()
614 best = Float.T()
615 minimum = Float.T()
616 percentile5 = Float.T()
617 percentile16 = Float.T()
618 median = Float.T()
619 percentile84 = Float.T()
620 percentile95 = Float.T()
621 maximum = Float.T()
623 def __init__(self, *args, **kwargs):
624 kwargs.update(zip(self.T.propnames, args))
625 Object.__init__(self, **kwargs)
627 def get_values_dict(self):
628 return dict(
629 (self.name+'.' + k, getattr(self, k))
630 for k in self.T.propnames
631 if k != 'name')
634class ResultStats(Object):
635 problem = Problem.T()
636 parameter_stats_list = List.T(ParameterStats.T())
638 def get_values_dict(self):
639 d = {}
640 for ps in self.parameter_stats_list:
641 d.update(ps.get_values_dict())
642 return d
645def make_stats(problem, models, gms, pnames=None):
646 ibest = num.argmin(gms)
647 rs = ResultStats(problem=problem)
648 if pnames is None:
649 pnames = problem.parameter_names
651 for pname in pnames:
652 iparam = problem.name_to_index(pname)
653 vs = problem.extract(models, iparam)
654 mi, p5, p16, median, p84, p95, ma = map(float, num.percentile(
655 vs, [0., 5., 16., 50., 84., 95., 100.]))
657 mean = float(num.mean(vs))
658 std = float(num.std(vs))
659 best = float(vs[ibest])
660 s = ParameterStats(
661 pname, mean, std, best, mi, p5, p16, median, p84, p95, ma)
663 rs.parameter_stats_list.append(s)
665 return rs
668def try_add_location_uncertainty(data, types):
669 vs = [data.get(k, None) for k in (
670 'north_shift.std', 'east_shift.std', 'depth.std')]
672 if None not in vs:
673 data['location_uncertainty'] = math.sqrt(sum(v**2 for v in vs))
674 types['location_uncertainty'] = float
677def format_stats(rs, fmt):
678 pname_to_pindex = dict(
679 (p.name, i) for (i, p) in enumerate(rs.parameter_stats_list))
681 values = []
682 headers = []
683 for x in fmt:
684 if x == 'problem.name':
685 headers.append(x)
686 values.append('%-16s' % rs.problem.name)
687 else:
688 pname, qname = x.split('.')
689 pindex = pname_to_pindex[pname]
690 values.append(
691 '%16.7g' % getattr(rs.parameter_stats_list[pindex], qname))
692 headers.append(x)
694 return ' '.join(values)
697def export(
698 what, rundirs, type=None, pnames=None, filename=None, selection=None,
699 effective_lat_lon=False):
701 if pnames is not None:
702 pnames_clean = [
703 pname.split('.')[0] for pname in pnames
704 if not pname.startswith('problem.')]
705 shortform = all(len(pname.split('.')) == 2 for pname in pnames)
706 else:
707 pnames_clean = None
708 shortform = False
710 if what == 'stats' and type is not None:
711 raise GrondError('Invalid argument combination: what=%s, type=%s' % (
712 repr(what), repr(type)))
714 if what != 'stats' and shortform:
715 raise GrondError('Invalid argument combination: what=%s, pnames=%s' % (
716 repr(what), repr(pnames)))
718 if what != 'stats' and type != 'vector' and pnames is not None:
719 raise GrondError(
720 'Invalid argument combination: what=%s, type=%s, pnames=%s' % (
721 repr(what), repr(type), repr(pnames)))
723 if filename is None:
724 out = sys.stdout
725 else:
726 out = open(filename, 'w')
728 if type is None:
729 type = 'event'
731 if shortform:
732 print('#', ' '.join(['%16s' % x for x in pnames]), file=out)
734 def dump(x, gm, indices):
735 if type == 'vector':
736 print(' ', ' '.join(
737 '%16.7g' % problem.extract(x, i) for i in indices),
738 '%16.7g' % gm, file=out)
740 elif type == 'source':
741 source = problem.get_source(x)
742 if effective_lat_lon:
743 source.set_origin(*source.effective_latlon)
744 guts.dump(source, stream=out)
746 elif type == 'event':
747 ev = problem.get_source(x).pyrocko_event()
748 if effective_lat_lon:
749 ev.set_origin(*ev.effective_latlon)
751 model.dump_events([ev], stream=out)
753 elif type == 'event-yaml':
754 ev = problem.get_source(x).pyrocko_event()
755 if effective_lat_lon:
756 ev.set_origin(*ev.effective_latlon)
757 guts.dump_all([ev], stream=out)
759 else:
760 raise GrondError('Invalid argument: type=%s' % repr(type))
762 header = None
763 for rundir in rundirs:
764 env = Environment(rundir)
765 info = env.get_run_info()
767 try:
768 history = env.get_history(subset='harvest')
769 except ProblemDataNotAvailable as e:
770 logger.error(
771 'Harvest not available (Did the run succeed?): %s' % str(e))
772 continue
774 problem = history.problem
775 models = history.models
776 misfits = history.get_primary_chain_misfits()
778 if selection:
779 rs = make_stats(
780 problem, models,
781 history.get_primary_chain_misfits())
783 data = dict(tags=info.tags)
784 types = dict(tags=(list, str))
786 for k, v in rs.get_values_dict().items():
787 data[k] = v
788 types[k] = float
790 try_add_location_uncertainty(data, types)
792 if not selected(selection, data=data, types=types):
793 continue
795 else:
796 rs = None
798 if type == 'vector':
799 pnames_take = pnames_clean or \
800 problem.parameter_names[:problem.nparameters]
802 indices = num.array(
803 [problem.name_to_index(pname) for pname in pnames_take])
805 if type == 'vector' and what in ('best', 'mean', 'ensemble'):
806 extra = ['global_misfit']
807 else:
808 extra = []
810 new_header = '# ' + ' '.join(
811 '%16s' % x for x in pnames_take + extra)
813 if type == 'vector' and header != new_header:
814 print(new_header, file=out)
816 header = new_header
817 else:
818 indices = None
820 if what == 'best':
821 x_best = history.get_best_model()
822 gm_best = history.get_best_misfit()
823 dump(x_best, gm_best, indices)
825 elif what == 'mean':
826 x_mean = history.get_mean_model()
827 gm_mean = history.get_mean_misfit(chain=0)
828 dump(x_mean, gm_mean, indices)
830 elif what == 'ensemble':
831 isort = num.argsort(misfits)
832 for i in isort:
833 dump(models[i], misfits[i], indices)
835 elif what == 'stats':
836 if not rs:
837 rs = make_stats(problem, models,
838 history.get_primary_chain_misfits(),
839 pnames_clean)
841 if shortform:
842 print(' ', format_stats(rs, pnames), file=out)
843 else:
844 print(rs, file=out)
846 else:
847 raise GrondError('Invalid argument: what=%s' % repr(what))
849 if out is not sys.stdout:
850 out.close()
853__all__ = '''
854 DirectoryAlreadyExists
855 forward
856 harvest
857 cluster
858 go
859 get_event_names
860 check
861 export
862'''.split()