1import glob
2import copy
3import os.path as op
4import logging
5import math
6import numpy as num
8from collections import defaultdict
9from pyrocko import util, model, trace, marker as pmarker
10from pyrocko.io.io_common import FileLoadError
11from pyrocko.fdsn import enhanced_sacpz, station as fs
12from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
13 load_all)
14from pyrocko import squirrel
16from pyrocko import gf
18from .meta import Path, HasPaths, expand_template, GrondError
20from .synthetic_tests import SyntheticTest
22guts_prefix = 'grond'
23logger = logging.getLogger('grond.dataset')
26def quote_paths(paths):
27 return ', '.join('"%s"' % path for path in paths)
30class InvalidObject(Exception):
31 pass
34class NotFound(Exception):
35 def __init__(self, reason, codes=None, time_range=None):
36 self.reason = reason
37 self.time_range = time_range
38 self.codes = codes
40 def __str__(self):
41 s = self.reason
42 if self.codes:
43 s += ' (%s)' % '.'.join(self.codes)
45 if self.time_range:
46 s += ' (%s - %s)' % (
47 util.time_to_str(self.time_range[0]),
48 util.time_to_str(self.time_range[1]))
50 return s
53class DatasetError(GrondError):
54 pass
57class StationCorrection(Object):
58 codes = Tuple.T(4, String.T())
59 delay = Float.T()
60 factor = Float.T()
63def load_station_corrections(filename):
64 scs = load_all(filename=filename)
65 for sc in scs:
66 assert isinstance(sc, StationCorrection)
68 return scs
71def dump_station_corrections(station_corrections, filename):
72 return dump_all(station_corrections, filename=filename)
75class Dataset(object):
77 def __init__(self, event_name=None):
78 self.events = []
79 self.squirrel = squirrel.Squirrel()
80 self.stations = {}
81 self.responses = defaultdict(list)
82 self.responses_stationxml = []
83 self.clippings = {}
84 self.blacklist = set()
85 self.whitelist_nslc = None
86 self.whitelist_nsl_xx = None
87 self.whitelist = None
88 self.station_corrections = {}
89 self.station_factors = {}
90 self.pick_markers = []
91 self.apply_correction_delays = True
92 self.apply_correction_factors = True
93 self.apply_displaced_sampling_workaround = False
94 self.extend_incomplete = False
95 self.clip_handling = 'by_nsl'
96 self.kite_scenes = []
97 self.gnss_campaigns = []
98 self.synthetic_test = None
99 self._picks = None
100 self._cache = {}
101 self._event_name = event_name
103 def empty_cache(self):
104 self._cache = {}
106 def set_synthetic_test(self, synthetic_test):
107 self.synthetic_test = synthetic_test
109 def add_stations(
110 self,
111 stations=None,
112 pyrocko_stations_filename=None,
113 stationxml_filenames=None):
115 if stations is not None:
116 for station in stations:
117 self.stations[station.nsl()] = station
119 if pyrocko_stations_filename is not None:
120 logger.debug(
121 'Loading stations from file "%s"...' %
122 pyrocko_stations_filename)
124 for station in model.load_stations(pyrocko_stations_filename):
125 self.stations[station.nsl()] = station
127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:
129 for stationxml_filename in stationxml_filenames:
130 if not op.exists(stationxml_filename):
131 continue
133 logger.debug(
134 'Loading stations from StationXML file "%s"...' %
135 stationxml_filename)
137 sx = fs.load_xml(filename=stationxml_filename)
138 ev = self.get_event()
139 stations = sx.get_pyrocko_stations(time=ev.time)
140 if len(stations) == 0:
141 logger.warning(
142 'No stations found for time %s in file "%s".' % (
143 util.time_to_str(ev.time), stationxml_filename))
145 for station in stations:
146 logger.debug('Adding station: %s.%s.%s' % station.nsl())
147 channels = station.get_channels()
148 if len(channels) == 1 and channels[0].name.endswith('Z'):
149 logger.warning(
150 'Station "%s" has vertical component'
151 ' information only, adding mocked channels.'
152 % station.nsl_string())
153 station.add_channel(
154 model.Channel(channels[0].name[:-1] + 'N'))
155 station.add_channel(
156 model.Channel(channels[0].name[:-1] + 'E'))
158 self.stations[station.nsl()] = station
160 def add_events(self, events=None, filename=None):
161 if events is not None:
162 self.events.extend(events)
164 if filename is not None:
165 logger.debug('Loading events from file "%s"...' % filename)
166 try:
167 events = model.load_events(filename)
168 self.events.extend(events)
169 logger.info(
170 'Loading events from %s: %i events found.' %
171 (filename, len(events)))
172 except Exception as e:
173 logger.warning('Could not load events from %s!', filename)
174 raise e
176 def add_waveforms(self, paths, regex=None, fileformat='detect',
177 show_progress=False):
179 self.squirrel.add(paths)
181 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
182 if sacpz_dirname:
183 logger.debug(
184 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
185 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
186 self.responses[x.codes].append(x)
188 if stationxml_filenames:
189 for stationxml_filename in stationxml_filenames:
190 if not op.exists(stationxml_filename):
191 continue
193 logger.debug(
194 'Loading StationXML responses from "%s"...' %
195 stationxml_filename)
197 self.responses_stationxml.append(
198 fs.load_xml(filename=stationxml_filename))
200 def add_clippings(self, markers_filename):
201 markers = pmarker.load_markers(markers_filename)
202 clippings = {}
203 for marker in markers:
204 nslc = marker.one_nslc()
205 nsl = nslc[:3]
206 if nsl not in clippings:
207 clippings[nsl] = []
209 if nslc not in clippings:
210 clippings[nslc] = []
212 clippings[nsl].append(marker.tmin)
213 clippings[nslc].append(marker.tmin)
215 for k, times in clippings.items():
216 atimes = num.array(times, dtype=float)
217 if k not in self.clippings:
218 self.clippings[k] = atimes
219 else:
220 self.clippings[k] = num.concatenate(self.clippings, atimes)
222 def add_blacklist(self, blacklist=[], filenames=None):
223 logger.debug('Loading blacklisted stations...')
224 if filenames:
225 blacklist = list(blacklist)
226 for filename in filenames:
227 if op.exists(filename):
228 with open(filename, 'r') as f:
229 blacklist.extend(
230 s.strip() for s in f.read().splitlines())
231 else:
232 logger.warning('No such blacklist file: %s' % filename)
234 for x in blacklist:
235 if isinstance(x, str):
236 x = tuple(x.split('.'))
237 self.blacklist.add(x)
239 def add_whitelist(self, whitelist=[], filenames=None):
240 logger.debug('Loading whitelisted stations...')
241 if filenames:
242 whitelist = list(whitelist)
243 for filename in filenames:
244 with open(filename, 'r') as f:
245 whitelist.extend(s.strip() for s in f.read().splitlines())
247 if self.whitelist_nslc is None:
248 self.whitelist_nslc = set()
249 self.whitelist = set()
250 self.whitelist_nsl_xx = set()
252 for x in whitelist:
253 if isinstance(x, str):
254 x = tuple(x.split('.'))
255 if len(x) == 4:
256 self.whitelist_nslc.add(x)
257 self.whitelist_nsl_xx.add(x[:3])
258 else:
259 self.whitelist.add(x)
261 def add_station_corrections(self, filename):
262 self.station_corrections.update(
263 (sc.codes, sc) for sc in load_station_corrections(filename))
265 def add_picks(self, filename):
266 self.pick_markers.extend(
267 pmarker.load_markers(filename))
269 self._picks = None
271 def add_gnss_campaigns(self, paths):
272 paths = util.select_files(
273 paths,
274 include=r'\.yml|\.yaml',
275 show_progress=False)
277 for path in paths:
278 self.add_gnss_campaign(filename=path)
280 def add_gnss_campaign(self, filename):
281 try:
282 from pyrocko.model import gnss # noqa
283 except ImportError:
284 raise ImportError('Module pyrocko.model.gnss not found,'
285 ' please upgrade pyrocko!')
286 logger.debug('Loading GNSS campaign from "%s"...' % filename)
288 campaign = load_all(filename=filename)
289 self.gnss_campaigns.append(campaign[0])
291 def add_kite_scenes(self, paths):
292 logger.info('Loading kite InSAR scenes...')
293 paths = util.select_files(
294 paths,
295 include=r'\.npz',
296 show_progress=False)
298 for path in paths:
299 self.add_kite_scene(filename=path)
301 if not self.kite_scenes:
302 logger.warning('Could not find any kite scenes at %s.' %
303 quote_paths(self.kite_scene_paths))
305 def add_kite_scene(self, filename):
306 try:
307 from kite import Scene
308 except ImportError:
309 raise ImportError('Module kite could not be imported,'
310 ' please install from https://pyrocko.org.')
311 logger.debug('Loading kite scene from "%s"...' % filename)
313 scene = Scene.load(filename)
314 scene._log.setLevel(logger.level)
316 try:
317 self.get_kite_scene(scene.meta.scene_id)
318 except NotFound:
319 self.kite_scenes.append(scene)
320 else:
321 raise AttributeError('Kite scene_id not unique for "%s".'
322 % filename)
324 def is_blacklisted(self, obj):
325 try:
326 nslc = self.get_nslc(obj)
327 if nslc in self.blacklist:
328 return True
330 except InvalidObject:
331 pass
333 nsl = self.get_nsl(obj)
334 return (
335 nsl in self.blacklist or
336 nsl[1:2] in self.blacklist or
337 nsl[:2] in self.blacklist)
339 def is_whitelisted(self, obj):
340 if self.whitelist_nslc is None:
341 return True
343 nsl = self.get_nsl(obj)
345 if (
346 nsl in self.whitelist or
347 nsl[1:2] in self.whitelist or
348 nsl[:2] in self.whitelist):
350 return True
352 try:
353 nslc = self.get_nslc(obj)
354 if nslc in self.whitelist_nslc:
355 return True
357 except InvalidObject:
358 return nsl in self.whitelist_nsl_xx
360 def has_clipping(self, nsl_or_nslc, tmin, tmax):
361 if nsl_or_nslc not in self.clippings:
362 return False
364 atimes = self.clippings[nsl_or_nslc]
365 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
367 def get_nsl(self, obj):
368 if isinstance(obj, trace.Trace):
369 net, sta, loc, _ = obj.nslc_id
370 elif isinstance(obj, model.Station):
371 net, sta, loc = obj.nsl()
372 elif isinstance(obj, gf.Target):
373 net, sta, loc, _ = obj.codes
374 elif isinstance(obj, tuple) and len(obj) in (3, 4):
375 net, sta, loc = obj[:3]
376 else:
377 raise InvalidObject(
378 'Cannot get nsl code from given object of type "%s".'
379 % type(obj))
381 return net, sta, loc
383 def get_nslc(self, obj):
384 if isinstance(obj, trace.Trace):
385 return obj.nslc_id
386 elif isinstance(obj, gf.Target):
387 return obj.codes
388 elif isinstance(obj, tuple) and len(obj) == 4:
389 return obj
390 else:
391 raise InvalidObject(
392 'Cannot get nslc code from given object "%s"' % type(obj))
394 def get_tmin_tmax(self, obj):
395 if isinstance(obj, trace.Trace):
396 return obj.tmin, obj.tmax
397 else:
398 raise InvalidObject(
399 'Cannot get tmin and tmax from given object of type "%s"' %
400 type(obj))
402 def get_station(self, obj):
403 if self.is_blacklisted(obj):
404 raise NotFound('Station is blacklisted:', self.get_nsl(obj))
406 if not self.is_whitelisted(obj):
407 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))
409 if isinstance(obj, model.Station):
410 return obj
412 net, sta, loc = self.get_nsl(obj)
414 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
415 for k in keys:
416 if k in self.stations:
417 return self.stations[k]
419 raise NotFound('No station information:', keys)
421 def get_stations(self):
422 return [self.stations[k] for k in sorted(self.stations)
423 if not self.is_blacklisted(self.stations[k])
424 and self.is_whitelisted(self.stations[k])]
426 def get_kite_scenes(self):
427 return self.kite_scenes
429 def get_kite_scene(self, scene_id=None):
430 if scene_id is None:
431 if len(self.kite_scenes) == 0:
432 raise AttributeError('No kite displacements defined.')
433 return self.kite_scenes[0]
434 else:
435 for scene in self.kite_scenes:
436 if scene.meta.scene_id == scene_id:
437 return scene
438 raise NotFound('No kite scene with id "%s" defined.' % scene_id)
440 def get_gnss_campaigns(self):
441 return self.gnss_campaigns
443 def get_gnss_campaign(self, name):
444 for camp in self.gnss_campaigns:
445 if camp.name == name:
446 return camp
447 raise NotFound('GNSS campaign %s not found!' % name)
449 def get_response(self, obj, quantity='displacement'):
450 if (self.responses is None or len(self.responses) == 0) \
451 and (self.responses_stationxml is None
452 or len(self.responses_stationxml) == 0):
454 raise NotFound('No response information available.')
456 quantity_to_unit = {
457 'displacement': 'M',
458 'velocity': 'M/S',
459 'acceleration': 'M/S**2'}
461 if self.is_blacklisted(obj):
462 raise NotFound('Response is blacklisted:', self.get_nslc(obj))
464 if not self.is_whitelisted(obj):
465 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))
467 net, sta, loc, cha = self.get_nslc(obj)
468 tmin, tmax = self.get_tmin_tmax(obj)
470 keys_x = [
471 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
473 keys = []
474 for k in keys_x:
475 if k not in keys:
476 keys.append(k)
478 candidates = []
479 for k in keys:
480 if k in self.responses:
481 for x in self.responses[k]:
482 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
483 if quantity == 'displacement':
484 candidates.append(x.response)
485 elif quantity == 'velocity':
486 candidates.append(trace.MultiplyResponse([
487 x.response,
488 trace.DifferentiationResponse()]))
489 elif quantity == 'acceleration':
490 candidates.append(trace.MultiplyResponse([
491 x.response,
492 trace.DifferentiationResponse(2)]))
493 else:
494 assert False
496 for sx in self.responses_stationxml:
497 try:
498 candidates.append(
499 sx.get_pyrocko_response(
500 (net, sta, loc, cha),
501 timespan=(tmin, tmax),
502 fake_input_units=quantity_to_unit[quantity]))
504 except (fs.NoResponseInformation, fs.MultipleResponseInformation):
505 pass
507 if len(candidates) == 1:
508 return candidates[0]
510 elif len(candidates) == 0:
511 raise NotFound('No response found:', (net, sta, loc, cha))
512 else:
513 raise NotFound('Multiple responses found:', (net, sta, loc, cha))
515 def get_waveform_raw(
516 self, obj,
517 tmin,
518 tmax,
519 tpad=0.,
520 toffset_noise_extract=0.,
521 want_incomplete=False,
522 extend_incomplete=False):
524 net, sta, loc, cha = self.get_nslc(obj)
526 if self.is_blacklisted((net, sta, loc, cha)):
527 raise NotFound(
528 'Waveform is blacklisted:', (net, sta, loc, cha))
530 if not self.is_whitelisted((net, sta, loc, cha)):
531 raise NotFound(
532 'Waveform is not on whitelist:', (net, sta, loc, cha))
534 if self.clip_handling == 'by_nsl':
535 if self.has_clipping((net, sta, loc), tmin, tmax):
536 raise NotFound(
537 'Waveform clipped:', (net, sta, loc))
539 elif self.clip_handling == 'by_nslc':
540 if self.has_clipping((net, sta, loc, cha), tmin, tmax):
541 raise NotFound(
542 'Waveform clipped:', (net, sta, loc, cha))
544 trs = self.squirrel.get_waveforms(
545 tmin=tmin+toffset_noise_extract-tpad,
546 tmax=tmax+toffset_noise_extract+tpad,
547 codes=(net, sta, loc, cha),
548 want_incomplete=want_incomplete or extend_incomplete)
550 if self.apply_displaced_sampling_workaround:
551 for tr in trs:
552 tr.snap()
554 if toffset_noise_extract != 0.0:
555 for tr in trs:
556 tr.shift(-toffset_noise_extract)
558 if extend_incomplete and len(trs) == 1:
559 trs[0].extend(
560 tmin + toffset_noise_extract - tpad,
561 tmax + toffset_noise_extract + tpad,
562 fillmethod='repeat')
564 if not want_incomplete and len(trs) != 1:
565 if len(trs) == 0:
566 message = 'Waveform missing or incomplete.'
567 else:
568 message = 'Waveform has gaps.'
570 raise NotFound(
571 message,
572 codes=(net, sta, loc, cha),
573 time_range=(
574 tmin + toffset_noise_extract - tpad,
575 tmax + toffset_noise_extract + tpad))
577 return trs
579 def get_waveform_restituted(
580 self,
581 obj, quantity='displacement',
582 tmin=None, tmax=None, tpad=0.,
583 tfade=0., freqlimits=None, deltat=None,
584 toffset_noise_extract=0.,
585 want_incomplete=False,
586 extend_incomplete=False):
588 trs_raw = self.get_waveform_raw(
589 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade,
590 toffset_noise_extract=toffset_noise_extract,
591 want_incomplete=want_incomplete,
592 extend_incomplete=extend_incomplete)
594 trs_restituted = []
595 for tr in trs_raw:
596 if deltat is not None:
597 try:
598 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
600 except util.UnavailableDecimation:
601 logger.warn('using resample instead of decimation')
602 tr.resample(deltat)
604 tr.deltat = deltat
606 resp = self.get_response(tr, quantity=quantity)
607 try:
608 trs_restituted.append(
609 tr.transfer(
610 tfade=tfade, freqlimits=freqlimits,
611 transfer_function=resp, invert=True, demean=True))
613 except trace.InfiniteResponse:
614 raise NotFound(
615 'Instrument response deconvolution failed '
616 '(divide by zero)', tr.nslc_id)
618 return trs_restituted, trs_raw
620 def _get_projections(
621 self, station, backazimuth, source, target, tmin, tmax):
623 # fill in missing channel information (happens when station file
624 # does not contain any channel information)
625 if not station.get_channels():
626 station = copy.deepcopy(station)
628 nsl = station.nsl()
629 trs = self.squirrel.pile.all(
630 tmin=tmin, tmax=tmax,
631 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
632 load_data=False)
634 channels = list(set(tr.channel for tr in trs))
635 station.set_channels_by_name(*channels)
637 projections = []
638 projections.extend(station.guess_projections_to_enu(
639 out_channels=('E', 'N', 'Z')))
641 if source is not None and target is not None:
642 backazimuth = source.azibazi_to(target)[1]
644 if backazimuth is not None:
645 projections.extend(station.guess_projections_to_rtu(
646 out_channels=('R', 'T', 'Z'),
647 backazimuth=backazimuth))
649 if not projections:
650 raise NotFound(
651 'Cannot determine projection of data components:',
652 station.nsl())
654 return projections
656 def _get_waveform(
657 self,
658 obj, quantity='displacement',
659 tmin=None, tmax=None, tpad=0.,
660 tfade=0., freqlimits=None, deltat=None, cache=None,
661 backazimuth=None,
662 source=None,
663 target=None,
664 debug=False):
666 assert not debug or (debug and cache is None)
668 if cache is True:
669 cache = self._cache
671 _, _, _, channel = self.get_nslc(obj)
672 station = self.get_station(self.get_nsl(obj))
674 nslc = station.nsl() + (channel,)
676 if self.is_blacklisted(nslc):
677 raise NotFound(
678 'Waveform is blacklisted:', nslc)
680 if not self.is_whitelisted(nslc):
681 raise NotFound(
682 'Waveform is not on whitelist:', nslc)
684 assert tmin is not None
685 assert tmax is not None
687 tmin = float(tmin)
688 tmax = float(tmax)
690 nslc = tuple(nslc)
692 cache_k = nslc + (
693 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
694 if cache is not None and (nslc + cache_k) in cache:
695 obj = cache[nslc + cache_k]
696 if isinstance(obj, Exception):
697 raise obj
698 elif obj is None:
699 raise NotFound('Waveform not found!', nslc)
700 else:
701 return obj
703 syn_test = self.synthetic_test
704 toffset_noise_extract = 0.0
705 if syn_test:
706 if not syn_test.respect_data_availability:
707 if syn_test.real_noise_scale != 0.0:
708 raise DatasetError(
709 'respect_data_availability=False and '
710 'addition of real noise cannot be combined.')
712 tr = syn_test.get_waveform(
713 nslc, tmin, tmax,
714 tfade=tfade,
715 freqlimits=freqlimits)
717 if cache is not None:
718 cache[tr.nslc_id + cache_k] = tr
720 if debug:
721 return [], [], []
722 else:
723 return tr
725 if syn_test.real_noise_scale != 0.0:
726 toffset_noise_extract = syn_test.real_noise_toffset
728 abs_delays = []
729 for ocha in 'ENZRT':
730 sc = self.station_corrections.get(station.nsl() + (channel,), None)
731 if sc:
732 abs_delays.append(abs(sc.delay))
734 if abs_delays:
735 abs_delay_max = max(abs_delays)
736 else:
737 abs_delay_max = 0.0
739 projections = self._get_projections(
740 station, backazimuth, source, target, tmin, tmax)
742 try:
743 trs_projected = []
744 trs_restituted = []
745 trs_raw = []
746 exceptions = []
747 for matrix, in_channels, out_channels in projections:
748 deps = trace.project_dependencies(
749 matrix, in_channels, out_channels)
751 try:
752 trs_restituted_group = []
753 trs_raw_group = []
754 if channel in deps:
755 for cha in deps[channel]:
756 trs_restituted_this, trs_raw_this = \
757 self.get_waveform_restituted(
758 station.nsl() + (cha,),
759 quantity=quantity,
760 tmin=tmin, tmax=tmax,
761 tpad=tpad+abs_delay_max,
762 toffset_noise_extract=toffset_noise_extract, # noqa
763 tfade=tfade,
764 freqlimits=freqlimits,
765 deltat=deltat,
766 want_incomplete=debug,
767 extend_incomplete=self.extend_incomplete)
769 trs_restituted_group.extend(trs_restituted_this)
770 trs_raw_group.extend(trs_raw_this)
772 trs_projected.extend(
773 trace.project(
774 trs_restituted_group, matrix,
775 in_channels, out_channels))
777 trs_restituted.extend(trs_restituted_group)
778 trs_raw.extend(trs_raw_group)
780 except NotFound as e:
781 exceptions.append((in_channels, out_channels, e))
783 if not trs_projected:
784 err = []
785 for (in_channels, out_channels, e) in exceptions:
786 sin = ', '.join(c.name for c in in_channels)
787 sout = ', '.join(c.name for c in out_channels)
788 err.append('(%s) -> (%s): %s' % (sin, sout, e))
790 raise NotFound('\n'.join(err))
792 for tr in trs_projected:
793 sc = self.station_corrections.get(tr.nslc_id, None)
794 if sc:
795 if self.apply_correction_factors:
796 tr.ydata /= sc.factor
798 if self.apply_correction_delays:
799 tr.shift(-sc.delay)
801 if tmin is not None and tmax is not None:
802 tr.chop(tmin, tmax)
804 if syn_test:
805 trs_projected_synthetic = []
806 for tr in trs_projected:
807 if tr.channel == channel:
808 tr_syn = syn_test.get_waveform(
809 tr.nslc_id, tmin, tmax,
810 tfade=tfade, freqlimits=freqlimits)
812 if tr_syn:
813 if syn_test.real_noise_scale != 0.0:
814 tr_syn = tr_syn.copy()
815 tr_noise = tr.copy()
816 tr_noise.set_ydata(
817 tr_noise.get_ydata()
818 * syn_test.real_noise_scale)
820 tr_syn.add(tr_noise)
822 trs_projected_synthetic.append(tr_syn)
824 trs_projected = trs_projected_synthetic
826 if cache is not None:
827 for tr in trs_projected:
828 cache[tr.nslc_id + cache_k] = tr
830 tr_return = None
831 for tr in trs_projected:
832 if tr.channel == channel:
833 tr_return = tr
835 if debug:
836 return trs_projected, trs_restituted, trs_raw, tr_return
838 elif tr_return:
839 return tr_return
841 else:
842 raise NotFound(
843 'waveform not available', station.nsl() + (channel,))
845 except NotFound:
846 if cache is not None:
847 cache[nslc + cache_k] = None
848 raise
850 def get_waveform(self, obj, tinc_cache=None, **kwargs):
851 tmin = kwargs['tmin']
852 tmax = kwargs['tmax']
853 if tinc_cache is not None:
854 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
855 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
856 else:
857 tmin_r = tmin
858 tmax_r = tmax
860 kwargs['tmin'] = tmin_r
861 kwargs['tmax'] = tmax_r
863 if kwargs.get('debug', None):
864 return self._get_waveform(obj, **kwargs)
865 else:
866 tr = self._get_waveform(obj, **kwargs)
867 return tr.chop(tmin, tmax, inplace=False)
869 def get_events(self, magmin=None, event_names=None):
870 evs = []
871 for ev in self.events:
872 if ((magmin is None or ev.magnitude >= magmin) and
873 (event_names is None or ev.name in event_names)):
874 evs.append(ev)
876 return evs
878 def get_event_by_time(self, t, magmin=None):
879 evs = self.get_events(magmin=magmin)
880 ev_x = None
881 for ev in evs:
882 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
883 ev_x = ev
885 if not ev_x:
886 raise NotFound(
887 'No event information matching criteria (t=%s, magmin=%s).' %
888 (t, magmin))
890 return ev_x
892 def get_event(self):
893 if self._event_name is None:
894 raise NotFound('No main event selected in dataset!')
896 for ev in self.events:
897 if ev.name == self._event_name:
898 return ev
900 raise NotFound('No such event: %s' % self._event_name)
902 def get_picks(self):
903 if self._picks is None:
904 hash_to_name = {}
905 names = set()
906 for marker in self.pick_markers:
907 if isinstance(marker, pmarker.EventMarker):
908 name = marker.get_event().name
909 if name in names:
910 raise DatasetError(
911 'Duplicate event name "%s" in picks.' % name)
913 names.add(name)
914 hash_to_name[marker.get_event_hash()] = name
916 for ev in self.events:
917 hash_to_name[ev.get_hash()] = ev.name
919 picks = {}
920 for marker in self.pick_markers:
921 if isinstance(marker, pmarker.PhaseMarker):
922 ehash = marker.get_event_hash()
924 nsl = marker.one_nslc()[:3]
925 phasename = marker.get_phasename()
927 if ehash is None or ehash not in hash_to_name:
928 raise DatasetError(
929 'Unassociated pick: %s.%s.%s, %s' %
930 (nsl + (phasename, )))
932 eventname = hash_to_name[ehash]
934 if (nsl, phasename, eventname) in picks:
935 raise DatasetError(
936 'Duplicate pick: %s.%s.%s, %s' %
937 (nsl + (phasename, )))
939 picks[nsl, phasename, eventname] = marker
941 self._picks = picks
943 return self._picks
945 def get_pick(self, eventname, obj, phasename):
946 nsl = self.get_nsl(obj)
947 return self.get_picks().get((nsl, phasename, eventname), None)
950class DatasetConfig(HasPaths):
951 ''' Configuration for a Grond `Dataset` object. '''
953 stations_path = Path.T(
954 optional=True,
955 help='List of files with station coordinates in Pyrocko format.')
956 stations_stationxml_paths = List.T(
957 Path.T(),
958 optional=True,
959 help='List of files with station coordinates in StationXML format.')
960 events_path = Path.T(
961 optional=True,
962 help='File with hypocenter information and possibly'
963 ' reference solution')
964 waveform_paths = List.T(
965 Path.T(),
966 optional=True,
967 help='List of directories with raw waveform data')
968 clippings_path = Path.T(
969 optional=True)
970 responses_sacpz_path = Path.T(
971 optional=True,
972 help='List of SACPZ response files for restitution of'
973 ' the raw waveform data.')
974 responses_stationxml_paths = List.T(
975 Path.T(),
976 optional=True,
977 help='List of StationXML response files for restitution of'
978 ' the raw waveform data.')
979 station_corrections_path = Path.T(
980 optional=True,
981 help='File containing station correction informations.')
982 apply_correction_factors = Bool.T(
983 optional=True,
984 default=True,
985 help='Apply correction factors from station corrections.')
986 apply_correction_delays = Bool.T(
987 optional=True,
988 default=True,
989 help='Apply correction delays from station corrections.')
990 apply_displaced_sampling_workaround = Bool.T(
991 optional=True,
992 default=False,
993 help='Work around displaced sampling issues.')
994 extend_incomplete = Bool.T(
995 default=False,
996 help='Extend incomplete seismic traces.')
997 picks_paths = List.T(
998 Path.T())
999 blacklist_paths = List.T(
1000 Path.T(),
1001 help='List of text files with blacklisted stations.')
1002 blacklist = List.T(
1003 String.T(),
1004 help='Stations/components to be excluded according to their STA, '
1005 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
1006 whitelist_paths = List.T(
1007 Path.T(),
1008 help='List of text files with whitelisted stations.')
1009 whitelist = List.T(
1010 String.T(),
1011 optional=True,
1012 help='If not None, list of stations/components to include according '
1013 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
1014 'Note: ''when whitelisting on channel level, both, the raw and '
1015 'the processed channel codes have to be listed.')
1016 synthetic_test = SyntheticTest.T(
1017 optional=True)
1019 kite_scene_paths = List.T(
1020 Path.T(),
1021 optional=True,
1022 help='List of directories for the InSAR scenes.')
1024 gnss_campaign_paths = List.T(
1025 Path.T(),
1026 optional=True,
1027 help='List of directories for the GNSS campaign data.')
1029 def __init__(self, *args, **kwargs):
1030 HasPaths.__init__(self, *args, **kwargs)
1031 self._ds = {}
1033 def get_event_names(self):
1034 logger.info('Loading events ...')
1036 def extra(path):
1037 return expand_template(path, dict(
1038 event_name='*'))
1040 def fp(path):
1041 return self.expand_path(path, extra=extra)
1043 def check_events(events, fn):
1044 for ev in events:
1045 if not ev.name:
1046 logger.warning('Event in %s has no name!', fn)
1047 return
1048 if not ev.lat or not ev.lon:
1049 logger.warning('Event %s has inconsistent coordinates!',
1050 ev.name)
1051 if not ev.depth:
1052 logger.warning('Event %s has no depth!', ev.name)
1053 if not ev.time:
1054 logger.warning('Event %s has no time!', ev.name)
1056 events = []
1057 events_path = fp(self.events_path)
1058 fns = glob.glob(events_path)
1059 if not fns:
1060 raise DatasetError('No event files matching "%s".' % events_path)
1062 for fn in fns:
1063 logger.debug('Loading from file %s' % fn)
1064 ev = model.load_events(filename=fn)
1065 check_events(ev, fn)
1067 events.extend(ev)
1069 event_names = [ev_.name for ev_ in events]
1070 event_names.sort()
1071 return event_names
1073 def get_dataset(self, event_name):
1074 if event_name not in self._ds:
1075 def extra(path):
1076 return expand_template(path, dict(
1077 event_name=event_name))
1079 def fp(path):
1080 p = self.expand_path(path, extra=extra)
1081 if p is None:
1082 return None
1084 if isinstance(p, list):
1085 for path in p:
1086 if not op.exists(path):
1087 logger.warning('Path %s does not exist.' % path)
1088 else:
1089 if not op.exists(p):
1090 logger.warning('Path %s does not exist.' % p)
1092 return p
1094 ds = Dataset(event_name)
1095 try:
1096 ds.add_events(filename=fp(self.events_path))
1098 ds.add_stations(
1099 pyrocko_stations_filename=fp(self.stations_path),
1100 stationxml_filenames=fp(self.stations_stationxml_paths))
1102 if self.waveform_paths:
1103 ds.add_waveforms(paths=fp(self.waveform_paths))
1105 if self.kite_scene_paths:
1106 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
1108 if self.gnss_campaign_paths:
1109 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
1111 if self.clippings_path:
1112 ds.add_clippings(markers_filename=fp(self.clippings_path))
1114 if self.responses_sacpz_path:
1115 ds.add_responses(
1116 sacpz_dirname=fp(self.responses_sacpz_path))
1118 if self.responses_stationxml_paths:
1119 ds.add_responses(
1120 stationxml_filenames=fp(
1121 self.responses_stationxml_paths))
1123 if self.station_corrections_path:
1124 ds.add_station_corrections(
1125 filename=fp(self.station_corrections_path))
1127 ds.apply_correction_factors = self.apply_correction_factors
1128 ds.apply_correction_delays = self.apply_correction_delays
1129 ds.apply_displaced_sampling_workaround = \
1130 self.apply_displaced_sampling_workaround
1131 ds.extend_incomplete = self.extend_incomplete
1133 for picks_path in self.picks_paths:
1134 ds.add_picks(
1135 filename=fp(picks_path))
1137 ds.add_blacklist(self.blacklist)
1138 ds.add_blacklist(filenames=fp(self.blacklist_paths))
1139 if self.whitelist:
1140 ds.add_whitelist(self.whitelist)
1141 if self.whitelist_paths:
1142 ds.add_whitelist(filenames=fp(self.whitelist_paths))
1144 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
1145 self._ds[event_name] = ds
1146 except (FileLoadError, OSError) as e:
1147 raise DatasetError(str(e))
1149 return self._ds[event_name]
1152__all__ = '''
1153 Dataset
1154 DatasetConfig
1155 DatasetError
1156 InvalidObject
1157 NotFound
1158 StationCorrection
1159 load_station_corrections
1160 dump_station_corrections
1161'''.split()