1import glob
2import copy
3import os.path as op
4import logging
5import math
6import numpy as num
8from collections import defaultdict
9from pyrocko import util, pile, model, config, trace, \
10 marker as pmarker
11from pyrocko.io.io_common import FileLoadError
12from pyrocko.fdsn import enhanced_sacpz, station as fs
13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
14 load_all)
16from pyrocko import gf
18from .meta import Path, HasPaths, expand_template, GrondError
20from .synthetic_tests import SyntheticTest
22guts_prefix = 'grond'
23logger = logging.getLogger('grond.dataset')
26def quote_paths(paths):
27 return ', '.join('"%s"' % path for path in paths)
30class InvalidObject(Exception):
31 pass
34class NotFound(Exception):
35 def __init__(self, reason, codes=None, time_range=None):
36 self.reason = reason
37 self.time_range = time_range
38 self.codes = codes
40 def __str__(self):
41 s = self.reason
42 if self.codes:
43 s += ' (%s)' % '.'.join(self.codes)
45 if self.time_range:
46 s += ' (%s - %s)' % (
47 util.time_to_str(self.time_range[0]),
48 util.time_to_str(self.time_range[1]))
50 return s
53class DatasetError(GrondError):
54 pass
57class StationCorrection(Object):
58 codes = Tuple.T(4, String.T())
59 delay = Float.T()
60 factor = Float.T()
63def load_station_corrections(filename):
64 scs = load_all(filename=filename)
65 for sc in scs:
66 assert isinstance(sc, StationCorrection)
68 return scs
71def dump_station_corrections(station_corrections, filename):
72 return dump_all(station_corrections, filename=filename)
75class Dataset(object):
77 def __init__(self, event_name=None):
78 self.events = []
79 self.pile = pile.Pile()
80 self.stations = {}
81 self.responses = defaultdict(list)
82 self.responses_stationxml = []
83 self.clippings = {}
84 self.blacklist = set()
85 self.whitelist_nslc = None
86 self.whitelist_nsl_xx = None
87 self.whitelist = None
88 self.station_corrections = {}
89 self.station_factors = {}
90 self.pick_markers = []
91 self.apply_correction_delays = True
92 self.apply_correction_factors = True
93 self.apply_displaced_sampling_workaround = False
94 self.extend_incomplete = False
95 self.clip_handling = 'by_nsl'
96 self.kite_scenes = []
97 self.gnss_campaigns = []
98 self.synthetic_test = None
99 self._picks = None
100 self._cache = {}
101 self._event_name = event_name
103 def empty_cache(self):
104 self._cache = {}
106 def set_synthetic_test(self, synthetic_test):
107 self.synthetic_test = synthetic_test
109 def add_stations(
110 self,
111 stations=None,
112 pyrocko_stations_filename=None,
113 stationxml_filenames=None):
115 if stations is not None:
116 for station in stations:
117 self.stations[station.nsl()] = station
119 if pyrocko_stations_filename is not None:
120 logger.debug(
121 'Loading stations from file "%s"...' %
122 pyrocko_stations_filename)
124 for station in model.load_stations(pyrocko_stations_filename):
125 self.stations[station.nsl()] = station
127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:
129 for stationxml_filename in stationxml_filenames:
130 if not op.exists(stationxml_filename):
131 continue
133 logger.debug(
134 'Loading stations from StationXML file "%s"...' %
135 stationxml_filename)
137 sx = fs.load_xml(filename=stationxml_filename)
138 ev = self.get_event()
139 stations = sx.get_pyrocko_stations(time=ev.time)
140 if len(stations) == 0:
141 logger.warning(
142 'No stations found for time %s in file "%s".' % (
143 util.time_to_str(ev.time), stationxml_filename))
145 for station in stations:
146 logger.debug('Adding station: %s.%s.%s' % station.nsl())
147 channels = station.get_channels()
148 if len(channels) == 1 and channels[0].name.endswith('Z'):
149 logger.warning(
150 'Station "%s" has vertical component'
151 ' information only, adding mocked channels.'
152 % station.nsl_string())
153 station.add_channel(
154 model.Channel(channels[0].name[:-1] + 'N'))
155 station.add_channel(
156 model.Channel(channels[0].name[:-1] + 'E'))
158 self.stations[station.nsl()] = station
160 def add_events(self, events=None, filename=None):
161 if events is not None:
162 self.events.extend(events)
164 if filename is not None:
165 logger.debug('Loading events from file "%s"...' % filename)
166 try:
167 events = model.load_events(filename)
168 self.events.extend(events)
169 logger.info(
170 'Loading events from %s: %i events found.' %
171 (filename, len(events)))
172 except Exception as e:
173 logger.warning('Could not load events from %s!', filename)
174 raise e
176 def add_waveforms(self, paths, regex=None, fileformat='detect',
177 show_progress=False):
178 cachedirname = config.config().cache_dir
180 logger.debug('Selecting waveform files %s...' % quote_paths(paths))
181 fns = util.select_files(paths, include=regex,
182 show_progress=show_progress)
183 cache = pile.get_cache(cachedirname)
184 logger.debug('Scanning waveform files %s...' % quote_paths(paths))
185 self.pile.load_files(sorted(fns), cache=cache,
186 fileformat=fileformat,
187 show_progress=show_progress)
189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
190 if sacpz_dirname:
191 logger.debug(
192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
194 self.responses[x.codes].append(x)
196 if stationxml_filenames:
197 for stationxml_filename in stationxml_filenames:
198 if not op.exists(stationxml_filename):
199 continue
201 logger.debug(
202 'Loading StationXML responses from "%s"...' %
203 stationxml_filename)
205 self.responses_stationxml.append(
206 fs.load_xml(filename=stationxml_filename))
208 def add_clippings(self, markers_filename):
209 markers = pmarker.load_markers(markers_filename)
210 clippings = {}
211 for marker in markers:
212 nslc = marker.one_nslc()
213 nsl = nslc[:3]
214 if nsl not in clippings:
215 clippings[nsl] = []
217 if nslc not in clippings:
218 clippings[nslc] = []
220 clippings[nsl].append(marker.tmin)
221 clippings[nslc].append(marker.tmin)
223 for k, times in clippings.items():
224 atimes = num.array(times, dtype=float)
225 if k not in self.clippings:
226 self.clippings[k] = atimes
227 else:
228 self.clippings[k] = num.concatenate(self.clippings, atimes)
230 def add_blacklist(self, blacklist=[], filenames=None):
231 logger.debug('Loading blacklisted stations...')
232 if filenames:
233 blacklist = list(blacklist)
234 for filename in filenames:
235 if op.exists(filename):
236 with open(filename, 'r') as f:
237 blacklist.extend(
238 s.strip() for s in f.read().splitlines())
239 else:
240 logger.warning('No such blacklist file: %s' % filename)
242 for x in blacklist:
243 if isinstance(x, str):
244 x = tuple(x.split('.'))
245 self.blacklist.add(x)
247 def add_whitelist(self, whitelist=[], filenames=None):
248 logger.debug('Loading whitelisted stations...')
249 if filenames:
250 whitelist = list(whitelist)
251 for filename in filenames:
252 with open(filename, 'r') as f:
253 whitelist.extend(s.strip() for s in f.read().splitlines())
255 if self.whitelist_nslc is None:
256 self.whitelist_nslc = set()
257 self.whitelist = set()
258 self.whitelist_nsl_xx = set()
260 for x in whitelist:
261 if isinstance(x, str):
262 x = tuple(x.split('.'))
263 if len(x) == 4:
264 self.whitelist_nslc.add(x)
265 self.whitelist_nsl_xx.add(x[:3])
266 else:
267 self.whitelist.add(x)
269 def add_station_corrections(self, filename):
270 self.station_corrections.update(
271 (sc.codes, sc) for sc in load_station_corrections(filename))
273 def add_picks(self, filename):
274 self.pick_markers.extend(
275 pmarker.load_markers(filename))
277 self._picks = None
279 def add_gnss_campaigns(self, paths):
280 paths = util.select_files(
281 paths,
282 include=r'\.yml|\.yaml',
283 show_progress=False)
285 for path in paths:
286 self.add_gnss_campaign(filename=path)
288 def add_gnss_campaign(self, filename):
289 try:
290 from pyrocko.model import gnss # noqa
291 except ImportError:
292 raise ImportError('Module pyrocko.model.gnss not found,'
293 ' please upgrade pyrocko!')
294 logger.debug('Loading GNSS campaign from "%s"...' % filename)
296 campaign = load_all(filename=filename)
297 self.gnss_campaigns.append(campaign[0])
299 def add_kite_scenes(self, paths):
300 logger.info('Loading kite InSAR scenes...')
301 paths = util.select_files(
302 paths,
303 include=r'\.npz',
304 show_progress=False)
306 for path in paths:
307 self.add_kite_scene(filename=path)
309 if not self.kite_scenes:
310 logger.warning('Could not find any kite scenes at %s.' %
311 quote_paths(self.kite_scene_paths))
313 def add_kite_scene(self, filename):
314 try:
315 from kite import Scene
316 except ImportError:
317 raise ImportError('Module kite could not be imported,'
318 ' please install from https://pyrocko.org.')
319 logger.debug('Loading kite scene from "%s"...' % filename)
321 scene = Scene.load(filename)
322 scene._log.setLevel(logger.level)
324 try:
325 self.get_kite_scene(scene.meta.scene_id)
326 except NotFound:
327 self.kite_scenes.append(scene)
328 else:
329 raise AttributeError('Kite scene_id not unique for "%s".'
330 % filename)
332 def is_blacklisted(self, obj):
333 try:
334 nslc = self.get_nslc(obj)
335 if nslc in self.blacklist:
336 return True
338 except InvalidObject:
339 pass
341 nsl = self.get_nsl(obj)
342 return (
343 nsl in self.blacklist or
344 nsl[1:2] in self.blacklist or
345 nsl[:2] in self.blacklist)
347 def is_whitelisted(self, obj):
348 if self.whitelist_nslc is None:
349 return True
351 nsl = self.get_nsl(obj)
353 if (
354 nsl in self.whitelist or
355 nsl[1:2] in self.whitelist or
356 nsl[:2] in self.whitelist):
358 return True
360 try:
361 nslc = self.get_nslc(obj)
362 if nslc in self.whitelist_nslc:
363 return True
365 except InvalidObject:
366 return nsl in self.whitelist_nsl_xx
368 def has_clipping(self, nsl_or_nslc, tmin, tmax):
369 if nsl_or_nslc not in self.clippings:
370 return False
372 atimes = self.clippings[nsl_or_nslc]
373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
375 def get_nsl(self, obj):
376 if isinstance(obj, trace.Trace):
377 net, sta, loc, _ = obj.nslc_id
378 elif isinstance(obj, model.Station):
379 net, sta, loc = obj.nsl()
380 elif isinstance(obj, gf.Target):
381 net, sta, loc, _ = obj.codes
382 elif isinstance(obj, tuple) and len(obj) in (3, 4):
383 net, sta, loc = obj[:3]
384 else:
385 raise InvalidObject(
386 'Cannot get nsl code from given object of type "%s".'
387 % type(obj))
389 return net, sta, loc
391 def get_nslc(self, obj):
392 if isinstance(obj, trace.Trace):
393 return obj.nslc_id
394 elif isinstance(obj, gf.Target):
395 return obj.codes
396 elif isinstance(obj, tuple) and len(obj) == 4:
397 return obj
398 else:
399 raise InvalidObject(
400 'Cannot get nslc code from given object "%s"' % type(obj))
402 def get_tmin_tmax(self, obj):
403 if isinstance(obj, trace.Trace):
404 return obj.tmin, obj.tmax
405 else:
406 raise InvalidObject(
407 'Cannot get tmin and tmax from given object of type "%s"' %
408 type(obj))
410 def get_station(self, obj):
411 if self.is_blacklisted(obj):
412 raise NotFound('Station is blacklisted:', self.get_nsl(obj))
414 if not self.is_whitelisted(obj):
415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))
417 if isinstance(obj, model.Station):
418 return obj
420 net, sta, loc = self.get_nsl(obj)
422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
423 for k in keys:
424 if k in self.stations:
425 return self.stations[k]
427 raise NotFound('No station information:', keys)
429 def get_stations(self):
430 return [self.stations[k] for k in sorted(self.stations)
431 if not self.is_blacklisted(self.stations[k])
432 and self.is_whitelisted(self.stations[k])]
434 def get_kite_scenes(self):
435 return self.kite_scenes
437 def get_kite_scene(self, scene_id=None):
438 if scene_id is None:
439 if len(self.kite_scenes) == 0:
440 raise AttributeError('No kite displacements defined.')
441 return self.kite_scenes[0]
442 else:
443 for scene in self.kite_scenes:
444 if scene.meta.scene_id == scene_id:
445 return scene
446 raise NotFound('No kite scene with id "%s" defined.' % scene_id)
448 def get_gnss_campaigns(self):
449 return self.gnss_campaigns
451 def get_gnss_campaign(self, name):
452 for camp in self.gnss_campaigns:
453 if camp.name == name:
454 return camp
455 raise NotFound('GNSS campaign %s not found!' % name)
457 def get_response(self, obj, quantity='displacement'):
458 if (self.responses is None or len(self.responses) == 0) \
459 and (self.responses_stationxml is None
460 or len(self.responses_stationxml) == 0):
462 raise NotFound('No response information available.')
464 quantity_to_unit = {
465 'displacement': 'M',
466 'velocity': 'M/S',
467 'acceleration': 'M/S**2'}
469 if self.is_blacklisted(obj):
470 raise NotFound('Response is blacklisted:', self.get_nslc(obj))
472 if not self.is_whitelisted(obj):
473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))
475 net, sta, loc, cha = self.get_nslc(obj)
476 tmin, tmax = self.get_tmin_tmax(obj)
478 keys_x = [
479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
481 keys = []
482 for k in keys_x:
483 if k not in keys:
484 keys.append(k)
486 candidates = []
487 for k in keys:
488 if k in self.responses:
489 for x in self.responses[k]:
490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
491 if quantity == 'displacement':
492 candidates.append(x.response)
493 elif quantity == 'velocity':
494 candidates.append(trace.MultiplyResponse([
495 x.response,
496 trace.DifferentiationResponse()]))
497 elif quantity == 'acceleration':
498 candidates.append(trace.MultiplyResponse([
499 x.response,
500 trace.DifferentiationResponse(2)]))
501 else:
502 assert False
504 for sx in self.responses_stationxml:
505 try:
506 candidates.append(
507 sx.get_pyrocko_response(
508 (net, sta, loc, cha),
509 timespan=(tmin, tmax),
510 fake_input_units=quantity_to_unit[quantity]))
512 except (fs.NoResponseInformation, fs.MultipleResponseInformation):
513 pass
515 if len(candidates) == 1:
516 return candidates[0]
518 elif len(candidates) == 0:
519 raise NotFound('No response found:', (net, sta, loc, cha))
520 else:
521 raise NotFound('Multiple responses found:', (net, sta, loc, cha))
523 def get_waveform_raw(
524 self, obj,
525 tmin,
526 tmax,
527 tpad=0.,
528 toffset_noise_extract=0.,
529 want_incomplete=False,
530 extend_incomplete=False):
532 net, sta, loc, cha = self.get_nslc(obj)
534 if self.is_blacklisted((net, sta, loc, cha)):
535 raise NotFound(
536 'Waveform is blacklisted:', (net, sta, loc, cha))
538 if not self.is_whitelisted((net, sta, loc, cha)):
539 raise NotFound(
540 'Waveform is not on whitelist:', (net, sta, loc, cha))
542 if self.clip_handling == 'by_nsl':
543 if self.has_clipping((net, sta, loc), tmin, tmax):
544 raise NotFound(
545 'Waveform clipped:', (net, sta, loc))
547 elif self.clip_handling == 'by_nslc':
548 if self.has_clipping((net, sta, loc, cha), tmin, tmax):
549 raise NotFound(
550 'Waveform clipped:', (net, sta, loc, cha))
552 trs = self.pile.all(
553 tmin=tmin+toffset_noise_extract,
554 tmax=tmax+toffset_noise_extract,
555 tpad=tpad,
556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),
557 want_incomplete=want_incomplete or extend_incomplete)
559 if self.apply_displaced_sampling_workaround:
560 for tr in trs:
561 tr.snap()
563 if toffset_noise_extract != 0.0:
564 for tr in trs:
565 tr.shift(-toffset_noise_extract)
567 if extend_incomplete and len(trs) == 1:
568 trs[0].extend(
569 tmin + toffset_noise_extract - tpad,
570 tmax + toffset_noise_extract + tpad,
571 fillmethod='repeat')
573 if not want_incomplete and len(trs) != 1:
574 if len(trs) == 0:
575 message = 'Waveform missing or incomplete.'
576 else:
577 message = 'Waveform has gaps.'
579 raise NotFound(
580 message,
581 codes=(net, sta, loc, cha),
582 time_range=(
583 tmin + toffset_noise_extract - tpad,
584 tmax + toffset_noise_extract + tpad))
586 return trs
588 def get_waveform_restituted(
589 self,
590 obj, quantity='displacement',
591 tmin=None, tmax=None, tpad=0.,
592 tfade=0., freqlimits=None, deltat=None,
593 toffset_noise_extract=0.,
594 want_incomplete=False,
595 extend_incomplete=False):
597 trs_raw = self.get_waveform_raw(
598 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade,
599 toffset_noise_extract=toffset_noise_extract,
600 want_incomplete=want_incomplete,
601 extend_incomplete=extend_incomplete)
603 trs_restituted = []
604 for tr in trs_raw:
605 if deltat is not None:
606 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
607 tr.deltat = deltat
609 resp = self.get_response(tr, quantity=quantity)
610 try:
611 trs_restituted.append(
612 tr.transfer(
613 tfade=tfade, freqlimits=freqlimits,
614 transfer_function=resp, invert=True, demean=True))
616 except trace.InfiniteResponse:
617 raise NotFound(
618 'Instrument response deconvolution failed '
619 '(divide by zero)', tr.nslc_id)
621 return trs_restituted, trs_raw
623 def _get_projections(
624 self, station, backazimuth, source, target, tmin, tmax):
626 # fill in missing channel information (happens when station file
627 # does not contain any channel information)
628 if not station.get_channels():
629 station = copy.deepcopy(station)
631 nsl = station.nsl()
632 trs = self.pile.all(
633 tmin=tmin, tmax=tmax,
634 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
635 load_data=False)
637 channels = list(set(tr.channel for tr in trs))
638 station.set_channels_by_name(*channels)
640 projections = []
641 projections.extend(station.guess_projections_to_enu(
642 out_channels=('E', 'N', 'Z')))
644 if source is not None and target is not None:
645 backazimuth = source.azibazi_to(target)[1]
647 if backazimuth is not None:
648 projections.extend(station.guess_projections_to_rtu(
649 out_channels=('R', 'T', 'Z'),
650 backazimuth=backazimuth))
652 if not projections:
653 raise NotFound(
654 'Cannot determine projection of data components:',
655 station.nsl())
657 return projections
659 def _get_waveform(
660 self,
661 obj, quantity='displacement',
662 tmin=None, tmax=None, tpad=0.,
663 tfade=0., freqlimits=None, deltat=None, cache=None,
664 backazimuth=None,
665 source=None,
666 target=None,
667 debug=False):
669 assert not debug or (debug and cache is None)
671 if cache is True:
672 cache = self._cache
674 _, _, _, channel = self.get_nslc(obj)
675 station = self.get_station(self.get_nsl(obj))
677 nslc = station.nsl() + (channel,)
679 if self.is_blacklisted(nslc):
680 raise NotFound(
681 'Waveform is blacklisted:', nslc)
683 if not self.is_whitelisted(nslc):
684 raise NotFound(
685 'Waveform is not on whitelist:', nslc)
687 assert tmin is not None
688 assert tmax is not None
690 tmin = float(tmin)
691 tmax = float(tmax)
693 nslc = tuple(nslc)
695 cache_k = nslc + (
696 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
697 if cache is not None and (nslc + cache_k) in cache:
698 obj = cache[nslc + cache_k]
699 if isinstance(obj, Exception):
700 raise obj
701 elif obj is None:
702 raise NotFound('Waveform not found!', nslc)
703 else:
704 return obj
706 syn_test = self.synthetic_test
707 toffset_noise_extract = 0.0
708 if syn_test:
709 if not syn_test.respect_data_availability:
710 if syn_test.real_noise_scale != 0.0:
711 raise DatasetError(
712 'respect_data_availability=False and '
713 'addition of real noise cannot be combined.')
715 tr = syn_test.get_waveform(
716 nslc, tmin, tmax,
717 tfade=tfade,
718 freqlimits=freqlimits)
720 if cache is not None:
721 cache[tr.nslc_id + cache_k] = tr
723 if debug:
724 return [], [], []
725 else:
726 return tr
728 if syn_test.real_noise_scale != 0.0:
729 toffset_noise_extract = syn_test.real_noise_toffset
731 abs_delays = []
732 for ocha in 'ENZRT':
733 sc = self.station_corrections.get(station.nsl() + (channel,), None)
734 if sc:
735 abs_delays.append(abs(sc.delay))
737 if abs_delays:
738 abs_delay_max = max(abs_delays)
739 else:
740 abs_delay_max = 0.0
742 projections = self._get_projections(
743 station, backazimuth, source, target, tmin, tmax)
745 try:
746 trs_projected = []
747 trs_restituted = []
748 trs_raw = []
749 exceptions = []
750 for matrix, in_channels, out_channels in projections:
751 deps = trace.project_dependencies(
752 matrix, in_channels, out_channels)
754 try:
755 trs_restituted_group = []
756 trs_raw_group = []
757 if channel in deps:
758 for cha in deps[channel]:
759 trs_restituted_this, trs_raw_this = \
760 self.get_waveform_restituted(
761 station.nsl() + (cha,),
762 quantity=quantity,
763 tmin=tmin, tmax=tmax,
764 tpad=tpad+abs_delay_max,
765 toffset_noise_extract=toffset_noise_extract, # noqa
766 tfade=tfade,
767 freqlimits=freqlimits,
768 deltat=deltat,
769 want_incomplete=debug,
770 extend_incomplete=self.extend_incomplete)
772 trs_restituted_group.extend(trs_restituted_this)
773 trs_raw_group.extend(trs_raw_this)
775 trs_projected.extend(
776 trace.project(
777 trs_restituted_group, matrix,
778 in_channels, out_channels))
780 trs_restituted.extend(trs_restituted_group)
781 trs_raw.extend(trs_raw_group)
783 except NotFound as e:
784 exceptions.append((in_channels, out_channels, e))
786 if not trs_projected:
787 err = []
788 for (in_channels, out_channels, e) in exceptions:
789 sin = ', '.join(c.name for c in in_channels)
790 sout = ', '.join(c.name for c in out_channels)
791 err.append('(%s) -> (%s): %s' % (sin, sout, e))
793 raise NotFound('\n'.join(err))
795 for tr in trs_projected:
796 sc = self.station_corrections.get(tr.nslc_id, None)
797 if sc:
798 if self.apply_correction_factors:
799 tr.ydata /= sc.factor
801 if self.apply_correction_delays:
802 tr.shift(-sc.delay)
804 if tmin is not None and tmax is not None:
805 tr.chop(tmin, tmax)
807 if syn_test:
808 trs_projected_synthetic = []
809 for tr in trs_projected:
810 if tr.channel == channel:
811 tr_syn = syn_test.get_waveform(
812 tr.nslc_id, tmin, tmax,
813 tfade=tfade, freqlimits=freqlimits)
815 if tr_syn:
816 if syn_test.real_noise_scale != 0.0:
817 tr_syn = tr_syn.copy()
818 tr_noise = tr.copy()
819 tr_noise.set_ydata(
820 tr_noise.get_ydata()
821 * syn_test.real_noise_scale)
823 tr_syn.add(tr_noise)
825 trs_projected_synthetic.append(tr_syn)
827 trs_projected = trs_projected_synthetic
829 if cache is not None:
830 for tr in trs_projected:
831 cache[tr.nslc_id + cache_k] = tr
833 tr_return = None
834 for tr in trs_projected:
835 if tr.channel == channel:
836 tr_return = tr
838 if debug:
839 return trs_projected, trs_restituted, trs_raw, tr_return
841 elif tr_return:
842 return tr_return
844 else:
845 raise NotFound(
846 'waveform not available', station.nsl() + (channel,))
848 except NotFound:
849 if cache is not None:
850 cache[nslc + cache_k] = None
851 raise
853 def get_waveform(self, obj, tinc_cache=None, **kwargs):
854 tmin = kwargs['tmin']
855 tmax = kwargs['tmax']
856 if tinc_cache is not None:
857 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
858 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
859 else:
860 tmin_r = tmin
861 tmax_r = tmax
863 kwargs['tmin'] = tmin_r
864 kwargs['tmax'] = tmax_r
866 if kwargs.get('debug', None):
867 return self._get_waveform(obj, **kwargs)
868 else:
869 tr = self._get_waveform(obj, **kwargs)
870 return tr.chop(tmin, tmax, inplace=False)
872 def get_events(self, magmin=None, event_names=None):
873 evs = []
874 for ev in self.events:
875 if ((magmin is None or ev.magnitude >= magmin) and
876 (event_names is None or ev.name in event_names)):
877 evs.append(ev)
879 return evs
881 def get_event_by_time(self, t, magmin=None):
882 evs = self.get_events(magmin=magmin)
883 ev_x = None
884 for ev in evs:
885 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
886 ev_x = ev
888 if not ev_x:
889 raise NotFound(
890 'No event information matching criteria (t=%s, magmin=%s).' %
891 (t, magmin))
893 return ev_x
895 def get_event(self):
896 if self._event_name is None:
897 raise NotFound('No main event selected in dataset!')
899 for ev in self.events:
900 if ev.name == self._event_name:
901 return ev
903 raise NotFound('No such event: %s' % self._event_name)
905 def get_picks(self):
906 if self._picks is None:
907 hash_to_name = {}
908 names = set()
909 for marker in self.pick_markers:
910 if isinstance(marker, pmarker.EventMarker):
911 name = marker.get_event().name
912 if name in names:
913 raise DatasetError(
914 'Duplicate event name "%s" in picks.' % name)
916 names.add(name)
917 hash_to_name[marker.get_event_hash()] = name
919 for ev in self.events:
920 hash_to_name[ev.get_hash()] = ev.name
922 picks = {}
923 for marker in self.pick_markers:
924 if isinstance(marker, pmarker.PhaseMarker):
925 ehash = marker.get_event_hash()
927 nsl = marker.one_nslc()[:3]
928 phasename = marker.get_phasename()
930 if ehash is None or ehash not in hash_to_name:
931 raise DatasetError(
932 'Unassociated pick: %s.%s.%s, %s' %
933 (nsl + (phasename, )))
935 eventname = hash_to_name[ehash]
937 if (nsl, phasename, eventname) in picks:
938 raise DatasetError(
939 'Duplicate pick: %s.%s.%s, %s' %
940 (nsl + (phasename, )))
942 picks[nsl, phasename, eventname] = marker
944 self._picks = picks
946 return self._picks
948 def get_pick(self, eventname, obj, phasename):
949 nsl = self.get_nsl(obj)
950 return self.get_picks().get((nsl, phasename, eventname), None)
953class DatasetConfig(HasPaths):
954 ''' Configuration for a Grond `Dataset` object. '''
956 stations_path = Path.T(
957 optional=True,
958 help='List of files with station coordinates in Pyrocko format.')
959 stations_stationxml_paths = List.T(
960 Path.T(),
961 optional=True,
962 help='List of files with station coordinates in StationXML format.')
963 events_path = Path.T(
964 optional=True,
965 help='File with hypocenter information and possibly'
966 ' reference solution')
967 waveform_paths = List.T(
968 Path.T(),
969 optional=True,
970 help='List of directories with raw waveform data')
971 clippings_path = Path.T(
972 optional=True)
973 responses_sacpz_path = Path.T(
974 optional=True,
975 help='List of SACPZ response files for restitution of'
976 ' the raw waveform data.')
977 responses_stationxml_paths = List.T(
978 Path.T(),
979 optional=True,
980 help='List of StationXML response files for restitution of'
981 ' the raw waveform data.')
982 station_corrections_path = Path.T(
983 optional=True,
984 help='File containing station correction informations.')
985 apply_correction_factors = Bool.T(
986 optional=True,
987 default=True,
988 help='Apply correction factors from station corrections.')
989 apply_correction_delays = Bool.T(
990 optional=True,
991 default=True,
992 help='Apply correction delays from station corrections.')
993 apply_displaced_sampling_workaround = Bool.T(
994 optional=True,
995 default=False,
996 help='Work around displaced sampling issues.')
997 extend_incomplete = Bool.T(
998 default=False,
999 help='Extend incomplete seismic traces.')
1000 picks_paths = List.T(
1001 Path.T())
1002 blacklist_paths = List.T(
1003 Path.T(),
1004 help='List of text files with blacklisted stations.')
1005 blacklist = List.T(
1006 String.T(),
1007 help='Stations/components to be excluded according to their STA, '
1008 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
1009 whitelist_paths = List.T(
1010 Path.T(),
1011 help='List of text files with whitelisted stations.')
1012 whitelist = List.T(
1013 String.T(),
1014 optional=True,
1015 help='If not None, list of stations/components to include according '
1016 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
1017 'Note: ''when whitelisting on channel level, both, the raw and '
1018 'the processed channel codes have to be listed.')
1019 synthetic_test = SyntheticTest.T(
1020 optional=True)
1022 kite_scene_paths = List.T(
1023 Path.T(),
1024 optional=True,
1025 help='List of directories for the InSAR scenes.')
1027 gnss_campaign_paths = List.T(
1028 Path.T(),
1029 optional=True,
1030 help='List of directories for the GNSS campaign data.')
1032 def __init__(self, *args, **kwargs):
1033 HasPaths.__init__(self, *args, **kwargs)
1034 self._ds = {}
1036 def get_event_names(self):
1037 logger.info('Loading events ...')
1039 def extra(path):
1040 return expand_template(path, dict(
1041 event_name='*'))
1043 def fp(path):
1044 return self.expand_path(path, extra=extra)
1046 def check_events(events, fn):
1047 for ev in events:
1048 if not ev.name:
1049 logger.warning('Event in %s has no name!', fn)
1050 return
1051 if not ev.lat or not ev.lon:
1052 logger.warning('Event %s has inconsistent coordinates!',
1053 ev.name)
1054 if not ev.depth:
1055 logger.warning('Event %s has no depth!', ev.name)
1056 if not ev.time:
1057 logger.warning('Event %s has no time!', ev.name)
1059 events = []
1060 events_path = fp(self.events_path)
1061 fns = glob.glob(events_path)
1062 if not fns:
1063 raise DatasetError('No event files matching "%s".' % events_path)
1065 for fn in fns:
1066 logger.debug('Loading from file %s' % fn)
1067 ev = model.load_events(filename=fn)
1068 check_events(ev, fn)
1070 events.extend(ev)
1072 event_names = [ev_.name for ev_ in events]
1073 event_names.sort()
1074 return event_names
1076 def get_dataset(self, event_name):
1077 if event_name not in self._ds:
1078 def extra(path):
1079 return expand_template(path, dict(
1080 event_name=event_name))
1082 def fp(path):
1083 p = self.expand_path(path, extra=extra)
1084 if p is None:
1085 return None
1087 if isinstance(p, list):
1088 for path in p:
1089 if not op.exists(path):
1090 logger.warning('Path %s does not exist.' % path)
1091 else:
1092 if not op.exists(p):
1093 logger.warning('Path %s does not exist.' % p)
1095 return p
1097 ds = Dataset(event_name)
1098 try:
1099 ds.add_events(filename=fp(self.events_path))
1101 ds.add_stations(
1102 pyrocko_stations_filename=fp(self.stations_path),
1103 stationxml_filenames=fp(self.stations_stationxml_paths))
1105 if self.waveform_paths:
1106 ds.add_waveforms(paths=fp(self.waveform_paths))
1108 if self.kite_scene_paths:
1109 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
1111 if self.gnss_campaign_paths:
1112 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
1114 if self.clippings_path:
1115 ds.add_clippings(markers_filename=fp(self.clippings_path))
1117 if self.responses_sacpz_path:
1118 ds.add_responses(
1119 sacpz_dirname=fp(self.responses_sacpz_path))
1121 if self.responses_stationxml_paths:
1122 ds.add_responses(
1123 stationxml_filenames=fp(
1124 self.responses_stationxml_paths))
1126 if self.station_corrections_path:
1127 ds.add_station_corrections(
1128 filename=fp(self.station_corrections_path))
1130 ds.apply_correction_factors = self.apply_correction_factors
1131 ds.apply_correction_delays = self.apply_correction_delays
1132 ds.apply_displaced_sampling_workaround = \
1133 self.apply_displaced_sampling_workaround
1134 ds.extend_incomplete = self.extend_incomplete
1136 for picks_path in self.picks_paths:
1137 ds.add_picks(
1138 filename=fp(picks_path))
1140 ds.add_blacklist(self.blacklist)
1141 ds.add_blacklist(filenames=fp(self.blacklist_paths))
1142 if self.whitelist:
1143 ds.add_whitelist(self.whitelist)
1144 if self.whitelist_paths:
1145 ds.add_whitelist(filenames=fp(self.whitelist_paths))
1147 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
1148 self._ds[event_name] = ds
1149 except (FileLoadError, OSError) as e:
1150 raise DatasetError(str(e))
1152 return self._ds[event_name]
1155__all__ = '''
1156 Dataset
1157 DatasetConfig
1158 DatasetError
1159 InvalidObject
1160 NotFound
1161 StationCorrection
1162 load_station_corrections
1163 dump_station_corrections
1164'''.split()