Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%
677 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:25 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:25 +0000
1import glob
2import copy
3import os.path as op
4import logging
5import math
6import numpy as num
8from collections import defaultdict
9from pyrocko import util, pile, model, config, trace, \
10 marker as pmarker
11from pyrocko.io.io_common import FileLoadError
12from pyrocko.fdsn import enhanced_sacpz, station as fs
13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
14 load_all)
16from pyrocko import gf
18from .meta import Path, HasPaths, expand_template, GrondError
20from .synthetic_tests import SyntheticTest
22guts_prefix = 'grond'
23logger = logging.getLogger('grond.dataset')
26def quote_paths(paths):
27 return ', '.join('"%s"' % path for path in paths)
30class InvalidObject(Exception):
31 pass
34class NotFound(Exception):
35 def __init__(self, reason, codes=None, time_range=None):
36 self.reason = reason
37 self.time_range = time_range
38 self.codes = codes
40 def __str__(self):
41 s = self.reason
42 if self.codes:
43 s += ' (%s)' % '.'.join(self.codes)
45 if self.time_range:
46 s += ' (%s - %s)' % (
47 util.time_to_str(self.time_range[0]),
48 util.time_to_str(self.time_range[1]))
50 return s
53class DatasetError(GrondError):
54 pass
57class StationCorrection(Object):
58 codes = Tuple.T(4, String.T())
59 delay = Float.T()
60 factor = Float.T()
63def load_station_corrections(filename):
64 scs = load_all(filename=filename)
65 for sc in scs:
66 assert isinstance(sc, StationCorrection)
68 return scs
71def dump_station_corrections(station_corrections, filename):
72 return dump_all(station_corrections, filename=filename)
75class Dataset(object):
77 def __init__(self, event_name=None):
78 self.events = []
79 self.pile = pile.Pile()
80 self.stations = {}
81 self.responses = defaultdict(list)
82 self.responses_stationxml = []
83 self.clippings = {}
84 self.blacklist = set()
85 self.whitelist_nslc = None
86 self.whitelist_nsl_xx = None
87 self.whitelist = None
88 self.station_corrections = {}
89 self.station_factors = {}
90 self.pick_markers = []
91 self.apply_correction_delays = True
92 self.apply_correction_factors = True
93 self.apply_displaced_sampling_workaround = False
94 self.extend_incomplete = False
95 self.clip_handling = 'by_nsl'
96 self.kite_scenes = []
97 self.gnss_campaigns = []
98 self.synthetic_test = None
99 self._picks = None
100 self._cache = {}
101 self._event_name = event_name
103 def empty_cache(self):
104 self._cache = {}
106 def set_synthetic_test(self, synthetic_test):
107 self.synthetic_test = synthetic_test
109 def add_stations(
110 self,
111 stations=None,
112 pyrocko_stations_filename=None,
113 stationxml_filenames=None):
115 if stations is not None:
116 for station in stations:
117 self.stations[station.nsl()] = station
119 if pyrocko_stations_filename is not None:
120 logger.debug(
121 'Loading stations from file "%s"...' %
122 pyrocko_stations_filename)
124 for station in model.load_stations(pyrocko_stations_filename):
125 self.stations[station.nsl()] = station
127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:
129 for stationxml_filename in stationxml_filenames:
130 if not op.exists(stationxml_filename):
131 continue
133 logger.debug(
134 'Loading stations from StationXML file "%s"...' %
135 stationxml_filename)
137 sx = fs.load_xml(filename=stationxml_filename)
138 ev = self.get_event()
139 stations = sx.get_pyrocko_stations(time=ev.time)
140 if len(stations) == 0:
141 logger.warning(
142 'No stations found for time %s in file "%s".' % (
143 util.time_to_str(ev.time), stationxml_filename))
145 for station in stations:
146 logger.debug('Adding station: %s.%s.%s' % station.nsl())
147 channels = station.get_channels()
148 if len(channels) == 1 and channels[0].name.endswith('Z'):
149 logger.warning(
150 'Station "%s" has vertical component'
151 ' information only, adding mocked channels.'
152 % station.nsl_string())
153 station.add_channel(
154 model.Channel(channels[0].name[:-1] + 'N'))
155 station.add_channel(
156 model.Channel(channels[0].name[:-1] + 'E'))
158 self.stations[station.nsl()] = station
160 def add_events(self, events=None, filename=None):
161 if events is not None:
162 self.events.extend(events)
164 if filename is not None:
165 logger.debug('Loading events from file "%s"...' % filename)
166 try:
167 events = model.load_events(filename)
168 self.events.extend(events)
169 logger.info(
170 'Loading events from %s: %i events found.' %
171 (filename, len(events)))
172 except Exception as e:
173 logger.warning('Could not load events from %s!', filename)
174 raise e
176 def add_waveforms(self, paths, regex=None, fileformat='detect',
177 show_progress=False):
178 cachedirname = config.config().cache_dir
180 logger.debug('Selecting waveform files %s...' % quote_paths(paths))
181 fns = util.select_files(paths, include=regex,
182 show_progress=show_progress)
183 cache = pile.get_cache(cachedirname)
184 logger.debug('Scanning waveform files %s...' % quote_paths(paths))
185 self.pile.load_files(sorted(fns), cache=cache,
186 fileformat=fileformat,
187 show_progress=show_progress)
189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
190 if sacpz_dirname:
191 logger.debug(
192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
194 self.responses[x.codes].append(x)
196 if stationxml_filenames:
197 for stationxml_filename in stationxml_filenames:
198 if not op.exists(stationxml_filename):
199 continue
201 logger.debug(
202 'Loading StationXML responses from "%s"...' %
203 stationxml_filename)
205 self.responses_stationxml.append(
206 fs.load_xml(filename=stationxml_filename))
208 def add_clippings(self, markers_filename):
209 markers = pmarker.load_markers(markers_filename)
210 clippings = {}
211 for marker in markers:
212 nslc = marker.one_nslc()
213 nsl = nslc[:3]
214 if nsl not in clippings:
215 clippings[nsl] = []
217 if nslc not in clippings:
218 clippings[nslc] = []
220 clippings[nsl].append(marker.tmin)
221 clippings[nslc].append(marker.tmin)
223 for k, times in clippings.items():
224 atimes = num.array(times, dtype=float)
225 if k not in self.clippings:
226 self.clippings[k] = atimes
227 else:
228 self.clippings[k] = num.concatenate(self.clippings, atimes)
230 def add_blacklist(self, blacklist=[], filenames=None):
231 logger.debug('Loading blacklisted stations...')
232 if filenames:
233 blacklist = list(blacklist)
234 for filename in filenames:
235 if op.exists(filename):
236 with open(filename, 'r') as f:
237 blacklist.extend(
238 s.strip() for s in f.read().splitlines())
239 else:
240 logger.warning('No such blacklist file: %s' % filename)
242 for x in blacklist:
243 if isinstance(x, str):
244 x = tuple(x.split('.'))
245 self.blacklist.add(x)
247 def add_whitelist(self, whitelist=[], filenames=None):
248 logger.debug('Loading whitelisted stations...')
249 if filenames:
250 whitelist = list(whitelist)
251 for filename in filenames:
252 with open(filename, 'r') as f:
253 whitelist.extend(s.strip() for s in f.read().splitlines())
255 if self.whitelist_nslc is None:
256 self.whitelist_nslc = set()
257 self.whitelist = set()
258 self.whitelist_nsl_xx = set()
260 for x in whitelist:
261 if isinstance(x, str):
262 x = tuple(x.split('.'))
263 if len(x) == 4:
264 self.whitelist_nslc.add(x)
265 self.whitelist_nsl_xx.add(x[:3])
266 else:
267 self.whitelist.add(x)
269 def add_station_corrections(self, filename):
270 self.station_corrections.update(
271 (sc.codes, sc) for sc in load_station_corrections(filename))
273 def add_picks(self, filename):
274 self.pick_markers.extend(
275 pmarker.load_markers(filename))
277 self._picks = None
279 def add_gnss_campaigns(self, paths):
280 paths = util.select_files(
281 paths,
282 include=r'\.yml|\.yaml',
283 show_progress=False)
285 for path in paths:
286 self.add_gnss_campaign(filename=path)
288 def add_gnss_campaign(self, filename):
289 try:
290 from pyrocko.model import gnss # noqa
291 except ImportError:
292 raise ImportError('Module pyrocko.model.gnss not found,'
293 ' please upgrade pyrocko!')
294 logger.debug('Loading GNSS campaign from "%s"...' % filename)
296 campaign = load_all(filename=filename)
297 self.gnss_campaigns.append(campaign[0])
299 def add_kite_scenes(self, paths):
300 logger.info('Loading kite InSAR scenes...')
301 paths = util.select_files(
302 paths,
303 include=r'\.npz',
304 show_progress=False)
306 for path in paths:
307 self.add_kite_scene(filename=path)
309 if not self.kite_scenes:
310 logger.warning('Could not find any kite scenes at %s.' %
311 quote_paths(self.kite_scene_paths))
313 def add_kite_scene(self, filename):
314 try:
315 from kite import Scene
316 except ImportError:
317 raise ImportError('Module kite could not be imported,'
318 ' please install from https://pyrocko.org.')
319 logger.debug('Loading kite scene from "%s"...' % filename)
321 scene = Scene.load(filename)
322 scene._log.setLevel(logger.level)
324 try:
325 self.get_kite_scene(scene.meta.scene_id)
326 except NotFound:
327 self.kite_scenes.append(scene)
328 else:
329 raise AttributeError('Kite scene_id not unique for "%s".'
330 % filename)
332 def is_blacklisted(self, obj):
333 try:
334 nslc = self.get_nslc(obj)
335 if nslc in self.blacklist:
336 return True
338 except InvalidObject:
339 pass
341 nsl = self.get_nsl(obj)
342 return (
343 nsl in self.blacklist or
344 nsl[1:2] in self.blacklist or
345 nsl[:2] in self.blacklist)
347 def is_whitelisted(self, obj):
348 if self.whitelist_nslc is None:
349 return True
351 nsl = self.get_nsl(obj)
353 if (
354 nsl in self.whitelist or
355 nsl[1:2] in self.whitelist or
356 nsl[:2] in self.whitelist):
358 return True
360 try:
361 nslc = self.get_nslc(obj)
362 if nslc in self.whitelist_nslc:
363 return True
365 except InvalidObject:
366 return nsl in self.whitelist_nsl_xx
368 def has_clipping(self, nsl_or_nslc, tmin, tmax):
369 if nsl_or_nslc not in self.clippings:
370 return False
372 atimes = self.clippings[nsl_or_nslc]
373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
375 def get_nsl(self, obj):
376 if isinstance(obj, trace.Trace):
377 net, sta, loc, _ = obj.nslc_id
378 elif isinstance(obj, model.Station):
379 net, sta, loc = obj.nsl()
380 elif isinstance(obj, gf.Target):
381 net, sta, loc, _ = obj.codes
382 elif isinstance(obj, tuple) and len(obj) in (3, 4):
383 net, sta, loc = obj[:3]
384 else:
385 raise InvalidObject(
386 'Cannot get nsl code from given object of type "%s".'
387 % type(obj))
389 return net, sta, loc
391 def get_nslc(self, obj):
392 if isinstance(obj, trace.Trace):
393 return obj.nslc_id
394 elif isinstance(obj, gf.Target):
395 return obj.codes
396 elif isinstance(obj, tuple) and len(obj) == 4:
397 return obj
398 else:
399 raise InvalidObject(
400 'Cannot get nslc code from given object "%s"' % type(obj))
402 def get_tmin_tmax(self, obj):
403 if isinstance(obj, trace.Trace):
404 return obj.tmin, obj.tmax
405 else:
406 raise InvalidObject(
407 'Cannot get tmin and tmax from given object of type "%s"' %
408 type(obj))
410 def get_station(self, obj):
411 if self.is_blacklisted(obj):
412 raise NotFound('Station is blacklisted:', self.get_nsl(obj))
414 if not self.is_whitelisted(obj):
415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))
417 if isinstance(obj, model.Station):
418 return obj
420 net, sta, loc = self.get_nsl(obj)
422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
423 for k in keys:
424 if k in self.stations:
425 return self.stations[k]
427 raise NotFound('No station information:', keys)
429 def get_stations(self):
430 return [self.stations[k] for k in sorted(self.stations)
431 if not self.is_blacklisted(self.stations[k])
432 and self.is_whitelisted(self.stations[k])]
434 def get_kite_scenes(self):
435 return self.kite_scenes
437 def get_kite_scene(self, scene_id=None):
438 if scene_id is None:
439 if len(self.kite_scenes) == 0:
440 raise AttributeError('No kite displacements defined.')
441 return self.kite_scenes[0]
442 else:
443 for scene in self.kite_scenes:
444 if scene.meta.scene_id == scene_id:
445 return scene
446 raise NotFound('No kite scene with id "%s" defined.' % scene_id)
448 def get_gnss_campaigns(self):
449 return self.gnss_campaigns
451 def get_gnss_campaign(self, name):
452 for camp in self.gnss_campaigns:
453 if camp.name == name:
454 return camp
455 raise NotFound('GNSS campaign %s not found!' % name)
457 def get_response(self, obj, quantity='displacement'):
458 if (self.responses is None or len(self.responses) == 0) \
459 and (self.responses_stationxml is None
460 or len(self.responses_stationxml) == 0):
462 raise NotFound('No response information available.')
464 quantity_to_unit = {
465 'displacement': 'M',
466 'velocity': 'M/S',
467 'acceleration': 'M/S**2'}
469 if self.is_blacklisted(obj):
470 raise NotFound('Response is blacklisted:', self.get_nslc(obj))
472 if not self.is_whitelisted(obj):
473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))
475 net, sta, loc, cha = self.get_nslc(obj)
476 tmin, tmax = self.get_tmin_tmax(obj)
478 keys_x = [
479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
481 keys = []
482 for k in keys_x:
483 if k not in keys:
484 keys.append(k)
486 candidates = []
487 for k in keys:
488 if k in self.responses:
489 for x in self.responses[k]:
490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
491 if quantity == 'displacement':
492 candidates.append(x.response)
493 elif quantity == 'velocity':
494 candidates.append(trace.MultiplyResponse([
495 x.response,
496 trace.DifferentiationResponse()]))
497 elif quantity == 'acceleration':
498 candidates.append(trace.MultiplyResponse([
499 x.response,
500 trace.DifferentiationResponse(2)]))
501 else:
502 assert False
504 for sx in self.responses_stationxml:
505 try:
506 candidates.append(
507 sx.get_pyrocko_response(
508 (net, sta, loc, cha),
509 timespan=(tmin, tmax),
510 fake_input_units=quantity_to_unit[quantity]))
512 except (fs.NoResponseInformation, fs.MultipleResponseInformation):
513 pass
515 if len(candidates) == 1:
516 return candidates[0]
518 elif len(candidates) == 0:
519 raise NotFound('No response found:', (net, sta, loc, cha))
520 else:
521 raise NotFound('Multiple responses found:', (net, sta, loc, cha))
523 def get_waveform_raw(
524 self, obj,
525 tmin,
526 tmax,
527 tpad=0.,
528 toffset_noise_extract=0.,
529 want_incomplete=False,
530 extend_incomplete=False):
532 net, sta, loc, cha = self.get_nslc(obj)
534 if self.is_blacklisted((net, sta, loc, cha)):
535 raise NotFound(
536 'Waveform is blacklisted:', (net, sta, loc, cha))
538 if not self.is_whitelisted((net, sta, loc, cha)):
539 raise NotFound(
540 'Waveform is not on whitelist:', (net, sta, loc, cha))
542 if self.clip_handling == 'by_nsl':
543 if self.has_clipping((net, sta, loc), tmin, tmax):
544 raise NotFound(
545 'Waveform clipped:', (net, sta, loc))
547 elif self.clip_handling == 'by_nslc':
548 if self.has_clipping((net, sta, loc, cha), tmin, tmax):
549 raise NotFound(
550 'Waveform clipped:', (net, sta, loc, cha))
552 trs = self.pile.all(
553 tmin=tmin+toffset_noise_extract,
554 tmax=tmax+toffset_noise_extract,
555 tpad=tpad,
556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),
557 want_incomplete=want_incomplete or extend_incomplete)
559 if self.apply_displaced_sampling_workaround:
560 for tr in trs:
561 tr.snap()
563 if toffset_noise_extract != 0.0:
564 for tr in trs:
565 tr.shift(-toffset_noise_extract)
567 if extend_incomplete and len(trs) == 1:
568 trs[0].extend(
569 tmin + toffset_noise_extract - tpad,
570 tmax + toffset_noise_extract + tpad,
571 fillmethod='repeat')
573 if not want_incomplete and len(trs) != 1:
574 if len(trs) == 0:
575 message = 'Waveform missing or incomplete.'
576 else:
577 message = 'Waveform has gaps.'
579 raise NotFound(
580 message,
581 codes=(net, sta, loc, cha),
582 time_range=(
583 tmin + toffset_noise_extract - tpad,
584 tmax + toffset_noise_extract + tpad))
586 return trs
588 def get_waveform_restituted(
589 self,
590 obj, quantity='displacement',
591 tmin=None, tmax=None, tpad=0.,
592 tfade=0., freqlimits=None, deltat=None,
593 toffset_noise_extract=0.,
594 want_incomplete=False,
595 extend_incomplete=False):
597 if deltat is not None:
598 tpad_downsample = deltat * 50.
599 else:
600 tpad_downsample = 0.
602 trs_raw = self.get_waveform_raw(
603 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,
604 toffset_noise_extract=toffset_noise_extract,
605 want_incomplete=want_incomplete,
606 extend_incomplete=extend_incomplete)
608 trs_restituted = []
609 for tr in trs_raw:
610 if deltat is not None:
611 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
612 tr.deltat = deltat
614 resp = self.get_response(tr, quantity=quantity)
615 try:
616 trs_restituted.append(
617 tr.transfer(
618 tfade=tfade, freqlimits=freqlimits,
619 transfer_function=resp, invert=True, demean=True))
621 except trace.InfiniteResponse:
622 raise NotFound(
623 'Instrument response deconvolution failed '
624 '(divide by zero)', tr.nslc_id)
626 return trs_restituted, trs_raw
628 def _get_projections(
629 self, station, backazimuth, source, target, tmin, tmax):
631 # fill in missing channel information (happens when station file
632 # does not contain any channel information)
633 if not station.get_channels():
634 station = copy.deepcopy(station)
636 nsl = station.nsl()
637 trs = self.pile.all(
638 tmin=tmin, tmax=tmax,
639 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
640 load_data=False)
642 channels = list(set(tr.channel for tr in trs))
643 station.set_channels_by_name(*channels)
645 projections = []
646 projections.extend(station.guess_projections_to_enu(
647 out_channels=('E', 'N', 'Z')))
649 if source is not None and target is not None:
650 backazimuth = source.azibazi_to(target)[1]
652 if backazimuth is not None:
653 projections.extend(station.guess_projections_to_rtu(
654 out_channels=('R', 'T', 'Z'),
655 backazimuth=backazimuth))
657 if not projections:
658 raise NotFound(
659 'Cannot determine projection of data components:',
660 station.nsl())
662 return projections
664 def _get_waveform(
665 self,
666 obj, quantity='displacement',
667 tmin=None, tmax=None, tpad=0.,
668 tfade=0., freqlimits=None, deltat=None, cache=None,
669 backazimuth=None,
670 source=None,
671 target=None,
672 debug=False):
674 assert not debug or (debug and cache is None)
676 if cache is True:
677 cache = self._cache
679 _, _, _, channel = self.get_nslc(obj)
680 station = self.get_station(self.get_nsl(obj))
682 nslc = station.nsl() + (channel,)
684 if self.is_blacklisted(nslc):
685 raise NotFound(
686 'Waveform is blacklisted:', nslc)
688 if not self.is_whitelisted(nslc):
689 raise NotFound(
690 'Waveform is not on whitelist:', nslc)
692 assert tmin is not None
693 assert tmax is not None
695 tmin = float(tmin)
696 tmax = float(tmax)
698 nslc = tuple(nslc)
700 cache_k = nslc + (
701 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
702 if cache is not None and (nslc + cache_k) in cache:
703 obj = cache[nslc + cache_k]
704 if isinstance(obj, Exception):
705 raise obj
706 elif obj is None:
707 raise NotFound('Waveform not found!', nslc)
708 else:
709 return obj
711 syn_test = self.synthetic_test
712 toffset_noise_extract = 0.0
713 if syn_test:
714 if not syn_test.respect_data_availability:
715 if syn_test.real_noise_scale != 0.0:
716 raise DatasetError(
717 'respect_data_availability=False and '
718 'addition of real noise cannot be combined.')
720 tr = syn_test.get_waveform(
721 nslc, tmin, tmax,
722 tfade=tfade,
723 freqlimits=freqlimits)
725 if cache is not None:
726 cache[tr.nslc_id + cache_k] = tr
728 if debug:
729 return [], [], []
730 else:
731 return tr
733 if syn_test.real_noise_scale != 0.0:
734 toffset_noise_extract = syn_test.real_noise_toffset
736 abs_delays = []
737 for ocha in 'ENZRT':
738 sc = self.station_corrections.get(station.nsl() + (channel,), None)
739 if sc:
740 abs_delays.append(abs(sc.delay))
742 if abs_delays:
743 abs_delay_max = max(abs_delays)
744 else:
745 abs_delay_max = 0.0
747 projections = self._get_projections(
748 station, backazimuth, source, target, tmin, tmax)
750 try:
751 trs_projected = []
752 trs_restituted = []
753 trs_raw = []
754 exceptions = []
755 for matrix, in_channels, out_channels in projections:
756 deps = trace.project_dependencies(
757 matrix, in_channels, out_channels)
759 try:
760 trs_restituted_group = []
761 trs_raw_group = []
762 if channel in deps:
763 for cha in deps[channel]:
764 trs_restituted_this, trs_raw_this = \
765 self.get_waveform_restituted(
766 station.nsl() + (cha,),
767 quantity=quantity,
768 tmin=tmin, tmax=tmax,
769 tpad=tpad+abs_delay_max,
770 toffset_noise_extract=toffset_noise_extract, # noqa
771 tfade=tfade,
772 freqlimits=freqlimits,
773 deltat=deltat,
774 want_incomplete=debug,
775 extend_incomplete=self.extend_incomplete)
777 trs_restituted_group.extend(trs_restituted_this)
778 trs_raw_group.extend(trs_raw_this)
780 trs_projected.extend(
781 trace.project(
782 trs_restituted_group, matrix,
783 in_channels, out_channels))
785 trs_restituted.extend(trs_restituted_group)
786 trs_raw.extend(trs_raw_group)
788 except NotFound as e:
789 exceptions.append((in_channels, out_channels, e))
791 if not trs_projected:
792 err = []
793 for (in_channels, out_channels, e) in exceptions:
794 sin = ', '.join(c.name for c in in_channels)
795 sout = ', '.join(c.name for c in out_channels)
796 err.append('(%s) -> (%s): %s' % (sin, sout, e))
798 raise NotFound('\n'.join(err))
800 for tr in trs_projected:
801 sc = self.station_corrections.get(tr.nslc_id, None)
802 if sc:
803 if self.apply_correction_factors:
804 tr.ydata /= sc.factor
806 if self.apply_correction_delays:
807 tr.shift(-sc.delay)
809 if tmin is not None and tmax is not None:
810 tr.chop(tmin, tmax)
812 if syn_test:
813 trs_projected_synthetic = []
814 for tr in trs_projected:
815 if tr.channel == channel:
816 tr_syn = syn_test.get_waveform(
817 tr.nslc_id, tmin, tmax,
818 tfade=tfade, freqlimits=freqlimits)
820 if tr_syn:
821 if syn_test.real_noise_scale != 0.0:
822 tr_syn = tr_syn.copy()
823 tr_noise = tr.copy()
824 tr_noise.set_ydata(
825 tr_noise.get_ydata()
826 * syn_test.real_noise_scale)
828 tr_syn.add(tr_noise)
830 trs_projected_synthetic.append(tr_syn)
832 trs_projected = trs_projected_synthetic
834 if cache is not None:
835 for tr in trs_projected:
836 cache[tr.nslc_id + cache_k] = tr
838 tr_return = None
839 for tr in trs_projected:
840 if tr.channel == channel:
841 tr_return = tr
843 if debug:
844 return trs_projected, trs_restituted, trs_raw, tr_return
846 elif tr_return:
847 return tr_return
849 else:
850 raise NotFound(
851 'waveform not available', station.nsl() + (channel,))
853 except NotFound:
854 if cache is not None:
855 cache[nslc + cache_k] = None
856 raise
858 def get_waveform(self, obj, tinc_cache=None, **kwargs):
859 tmin = kwargs['tmin']
860 tmax = kwargs['tmax']
861 if tinc_cache is not None:
862 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
863 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
864 else:
865 tmin_r = tmin
866 tmax_r = tmax
868 kwargs['tmin'] = tmin_r
869 kwargs['tmax'] = tmax_r
871 if kwargs.get('debug', None):
872 return self._get_waveform(obj, **kwargs)
873 else:
874 tr = self._get_waveform(obj, **kwargs)
875 return tr.chop(tmin, tmax, inplace=False)
877 def get_events(self, magmin=None, event_names=None):
878 evs = []
879 for ev in self.events:
880 if ((magmin is None or ev.magnitude >= magmin) and
881 (event_names is None or ev.name in event_names)):
882 evs.append(ev)
884 return evs
886 def get_event_by_time(self, t, magmin=None):
887 evs = self.get_events(magmin=magmin)
888 ev_x = None
889 for ev in evs:
890 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
891 ev_x = ev
893 if not ev_x:
894 raise NotFound(
895 'No event information matching criteria (t=%s, magmin=%s).' %
896 (t, magmin))
898 return ev_x
900 def get_event(self):
901 if self._event_name is None:
902 raise NotFound('No main event selected in dataset!')
904 for ev in self.events:
905 if ev.name == self._event_name:
906 return ev
908 raise NotFound('No such event: %s' % self._event_name)
910 def get_picks(self):
911 if self._picks is None:
912 hash_to_name = {}
913 names = set()
914 for marker in self.pick_markers:
915 if isinstance(marker, pmarker.EventMarker):
916 name = marker.get_event().name
917 if name in names:
918 raise DatasetError(
919 'Duplicate event name "%s" in picks.' % name)
921 names.add(name)
922 hash_to_name[marker.get_event_hash()] = name
924 for ev in self.events:
925 hash_to_name[ev.get_hash()] = ev.name
927 picks = {}
928 for marker in self.pick_markers:
929 if isinstance(marker, pmarker.PhaseMarker):
930 ehash = marker.get_event_hash()
932 nsl = marker.one_nslc()[:3]
933 phasename = marker.get_phasename()
935 if ehash is None or ehash not in hash_to_name:
936 raise DatasetError(
937 'Unassociated pick: %s.%s.%s, %s' %
938 (nsl + (phasename, )))
940 eventname = hash_to_name[ehash]
942 if (nsl, phasename, eventname) in picks:
943 raise DatasetError(
944 'Duplicate pick: %s.%s.%s, %s' %
945 (nsl + (phasename, )))
947 picks[nsl, phasename, eventname] = marker
949 self._picks = picks
951 return self._picks
953 def get_pick(self, eventname, obj, phasename):
954 nsl = self.get_nsl(obj)
955 return self.get_picks().get((nsl, phasename, eventname), None)
958class DatasetConfig(HasPaths):
959 ''' Configuration for a Grond `Dataset` object. '''
961 stations_path = Path.T(
962 optional=True,
963 help='List of files with station coordinates in Pyrocko format.')
964 stations_stationxml_paths = List.T(
965 Path.T(),
966 optional=True,
967 help='List of files with station coordinates in StationXML format.')
968 events_path = Path.T(
969 optional=True,
970 help='File with hypocenter information and possibly'
971 ' reference solution')
972 waveform_paths = List.T(
973 Path.T(),
974 optional=True,
975 help='List of directories with raw waveform data')
976 clippings_path = Path.T(
977 optional=True)
978 responses_sacpz_path = Path.T(
979 optional=True,
980 help='List of SACPZ response files for restitution of'
981 ' the raw waveform data.')
982 responses_stationxml_paths = List.T(
983 Path.T(),
984 optional=True,
985 help='List of StationXML response files for restitution of'
986 ' the raw waveform data.')
987 station_corrections_path = Path.T(
988 optional=True,
989 help='File containing station correction informations.')
990 apply_correction_factors = Bool.T(
991 optional=True,
992 default=True,
993 help='Apply correction factors from station corrections.')
994 apply_correction_delays = Bool.T(
995 optional=True,
996 default=True,
997 help='Apply correction delays from station corrections.')
998 apply_displaced_sampling_workaround = Bool.T(
999 optional=True,
1000 default=False,
1001 help='Work around displaced sampling issues.')
1002 extend_incomplete = Bool.T(
1003 default=False,
1004 help='Extend incomplete seismic traces.')
1005 picks_paths = List.T(
1006 Path.T())
1007 blacklist_paths = List.T(
1008 Path.T(),
1009 help='List of text files with blacklisted stations.')
1010 blacklist = List.T(
1011 String.T(),
1012 help='Stations/components to be excluded according to their STA, '
1013 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
1014 whitelist_paths = List.T(
1015 Path.T(),
1016 help='List of text files with whitelisted stations.')
1017 whitelist = List.T(
1018 String.T(),
1019 optional=True,
1020 help='If not None, list of stations/components to include according '
1021 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
1022 'Note: ''when whitelisting on channel level, both, the raw and '
1023 'the processed channel codes have to be listed.')
1024 synthetic_test = SyntheticTest.T(
1025 optional=True)
1027 kite_scene_paths = List.T(
1028 Path.T(),
1029 optional=True,
1030 help='List of directories for the InSAR scenes.')
1032 gnss_campaign_paths = List.T(
1033 Path.T(),
1034 optional=True,
1035 help='List of directories for the GNSS campaign data.')
1037 def __init__(self, *args, **kwargs):
1038 HasPaths.__init__(self, *args, **kwargs)
1039 self._ds = {}
1041 def get_event_names(self):
1042 logger.info('Loading events ...')
1044 def extra(path):
1045 return expand_template(path, dict(
1046 event_name='*'))
1048 def fp(path):
1049 return self.expand_path(path, extra=extra)
1051 def check_events(events, fn):
1052 for ev in events:
1053 if not ev.name:
1054 logger.warning('Event in %s has no name!', fn)
1055 return
1056 if not ev.lat or not ev.lon:
1057 logger.warning('Event %s has inconsistent coordinates!',
1058 ev.name)
1059 if not ev.depth:
1060 logger.warning('Event %s has no depth!', ev.name)
1061 if not ev.time:
1062 logger.warning('Event %s has no time!', ev.name)
1064 events = []
1065 events_path = fp(self.events_path)
1066 fns = glob.glob(events_path)
1067 if not fns:
1068 raise DatasetError('No event files matching "%s".' % events_path)
1070 for fn in fns:
1071 logger.debug('Loading from file %s' % fn)
1072 ev = model.load_events(filename=fn)
1073 check_events(ev, fn)
1075 events.extend(ev)
1077 event_names = [ev_.name for ev_ in events]
1078 event_names.sort()
1079 return event_names
1081 def get_dataset(self, event_name):
1082 if event_name not in self._ds:
1083 def extra(path):
1084 return expand_template(path, dict(
1085 event_name=event_name))
1087 def fp(path):
1088 p = self.expand_path(path, extra=extra)
1089 if p is None:
1090 return None
1092 if isinstance(p, list):
1093 for path in p:
1094 if not op.exists(path):
1095 logger.warning('Path %s does not exist.' % path)
1096 else:
1097 if not op.exists(p):
1098 logger.warning('Path %s does not exist.' % p)
1100 return p
1102 ds = Dataset(event_name)
1103 try:
1104 ds.add_events(filename=fp(self.events_path))
1106 ds.add_stations(
1107 pyrocko_stations_filename=fp(self.stations_path),
1108 stationxml_filenames=fp(self.stations_stationxml_paths))
1110 if self.waveform_paths:
1111 ds.add_waveforms(paths=fp(self.waveform_paths))
1113 if self.kite_scene_paths:
1114 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
1116 if self.gnss_campaign_paths:
1117 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
1119 if self.clippings_path:
1120 ds.add_clippings(markers_filename=fp(self.clippings_path))
1122 if self.responses_sacpz_path:
1123 ds.add_responses(
1124 sacpz_dirname=fp(self.responses_sacpz_path))
1126 if self.responses_stationxml_paths:
1127 ds.add_responses(
1128 stationxml_filenames=fp(
1129 self.responses_stationxml_paths))
1131 if self.station_corrections_path:
1132 ds.add_station_corrections(
1133 filename=fp(self.station_corrections_path))
1135 ds.apply_correction_factors = self.apply_correction_factors
1136 ds.apply_correction_delays = self.apply_correction_delays
1137 ds.apply_displaced_sampling_workaround = \
1138 self.apply_displaced_sampling_workaround
1139 ds.extend_incomplete = self.extend_incomplete
1141 for picks_path in self.picks_paths:
1142 ds.add_picks(
1143 filename=fp(picks_path))
1145 ds.add_blacklist(self.blacklist)
1146 ds.add_blacklist(filenames=fp(self.blacklist_paths))
1147 if self.whitelist:
1148 ds.add_whitelist(self.whitelist)
1149 if self.whitelist_paths:
1150 ds.add_whitelist(filenames=fp(self.whitelist_paths))
1152 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
1153 self._ds[event_name] = ds
1154 except (FileLoadError, OSError) as e:
1155 raise DatasetError(str(e))
1157 return self._ds[event_name]
1160__all__ = '''
1161 Dataset
1162 DatasetConfig
1163 DatasetError
1164 InvalidObject
1165 NotFound
1166 StationCorrection
1167 load_station_corrections
1168 dump_station_corrections
1169'''.split()