Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%
697 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 09:31 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 09:31 +0000
1# https://pyrocko.org/grond - GPLv3
2#
3# The Grond Developers, 21st Century
5import glob
6import copy
7import os.path as op
8import logging
9import math
10import numpy as num
12from collections import defaultdict
13from pyrocko import util, model, trace, marker as pmarker
14from pyrocko.io.io_common import FileLoadError
15from pyrocko.fdsn import enhanced_sacpz, station as fs
16from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
17 load_all)
18from pyrocko import squirrel
20from pyrocko import gf
22from .meta import Path, HasPaths, expand_template, GrondError
24from .synthetic_tests import SyntheticTest
26guts_prefix = 'grond'
27logger = logging.getLogger('grond.dataset')
30def quote_paths(paths):
31 return ', '.join('"%s"' % path for path in paths)
34class InvalidObject(Exception):
35 pass
38class NotFound(Exception):
39 def __init__(self, reason, codes=None, time_range=None):
40 self.reason = reason
41 self.time_range = time_range
42 self.codes = codes
44 def __str__(self):
45 s = self.reason
46 if self.codes:
47 s += ' (%s)' % '.'.join(self.codes)
49 if self.time_range:
50 s += ' (%s - %s)' % (
51 util.time_to_str(self.time_range[0]),
52 util.time_to_str(self.time_range[1]))
54 return s
57class DatasetError(GrondError):
58 pass
61class StationCorrection(Object):
62 codes = Tuple.T(4, String.T())
63 delay = Float.T()
64 factor = Float.T()
67def load_station_corrections(filename):
68 scs = load_all(filename=filename)
69 for sc in scs:
70 assert isinstance(sc, StationCorrection)
72 return scs
75def dump_station_corrections(station_corrections, filename):
76 return dump_all(station_corrections, filename=filename)
79class Dataset(object):
81 def __init__(self, event_name=None):
82 self.events = []
83 self.squirrel = squirrel.Squirrel()
84 self.stations = {}
85 self.responses = defaultdict(list)
86 self.responses_stationxml = []
87 self.clippings = {}
88 self.exclude = set()
89 self.include_nslc = None
90 self.include_nsl_xx = None
91 self.include = None
92 self.station_corrections = {}
93 self.station_factors = {}
94 self.pick_markers = []
95 self.apply_correction_delays = True
96 self.apply_correction_factors = True
97 self.apply_displaced_sampling_workaround = False
98 self.extend_incomplete = False
99 self.clip_handling = 'by_nsl'
100 self.kite_scenes = []
101 self.gnss_campaigns = []
102 self.synthetic_test = None
103 self._picks = None
104 self._cache = {}
105 self._event_name = event_name
107 def close(self):
108 del self.squirrel
109 self.squirrel = None
111 def empty_cache(self):
112 self._cache = {}
114 def set_synthetic_test(self, synthetic_test):
115 self.synthetic_test = synthetic_test
117 def add_stations(
118 self,
119 stations=None,
120 pyrocko_stations_filename=None,
121 stationxml_filenames=None):
123 if stations is not None:
124 for station in stations:
125 self.stations[station.nsl()] = station
127 if pyrocko_stations_filename is not None:
128 logger.debug(
129 'Loading stations from file "%s"...' %
130 pyrocko_stations_filename)
132 for station in model.load_stations(pyrocko_stations_filename):
133 self.stations[station.nsl()] = station
135 if stationxml_filenames is not None and len(stationxml_filenames) > 0:
137 for stationxml_filename in stationxml_filenames:
138 if not op.exists(stationxml_filename):
139 continue
141 logger.debug(
142 'Loading stations from StationXML file "%s"...' %
143 stationxml_filename)
145 sx = fs.load_xml(filename=stationxml_filename)
146 ev = self.get_event()
147 try:
148 stations = sx.get_pyrocko_stations(
149 time=ev.time, sloppy=True)
150 except TypeError:
151 logger.warning(
152 'The installed version of Pyrocko does not support '
153 'the keyword argument `sloppy` in method '
154 '`FDSNStationXML.get_pyrocko_stations`. Please '
155 'upgrade Pyrocko.')
157 stations = sx.get_pyrocko_stations(time=ev.time)
159 if len(stations) == 0:
160 logger.warning(
161 'No stations found for time %s in file "%s".' % (
162 util.time_to_str(ev.time), stationxml_filename))
164 for station in stations:
165 logger.debug('Adding station: %s.%s.%s' % station.nsl())
166 channels = station.get_channels()
167 if len(channels) == 1 and channels[0].name.endswith('Z'):
168 logger.warning(
169 'Station "%s" has vertical component'
170 ' information only, adding mocked channels.'
171 % station.nsl_string())
172 station.add_channel(
173 model.Channel(channels[0].name[:-1] + 'N'))
174 station.add_channel(
175 model.Channel(channels[0].name[:-1] + 'E'))
177 self.stations[station.nsl()] = station
179 def add_events(self, events=None, filename=None):
180 if events is not None:
181 self.events.extend(events)
183 if filename is not None:
184 logger.debug('Loading events from file "%s"...' % filename)
185 try:
186 events = model.load_events(filename)
187 self.events.extend(events)
188 logger.info(
189 'Loading events from %s: %i events found.' %
190 (filename, len(events)))
191 except Exception as e:
192 logger.warning('Could not load events from %s!', filename)
193 raise e
195 def add_waveforms(self, paths, regex=None, fileformat='detect',
196 show_progress=False):
198 self.squirrel.add(paths)
200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
201 if sacpz_dirname:
202 logger.debug(
203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
205 self.responses[x.codes].append(x)
207 if stationxml_filenames:
208 for stationxml_filename in stationxml_filenames:
209 if not op.exists(stationxml_filename):
210 continue
212 logger.debug(
213 'Loading StationXML responses from "%s"...' %
214 stationxml_filename)
216 self.responses_stationxml.append(
217 fs.load_xml(filename=stationxml_filename))
219 def add_clippings(self, markers_filename):
220 markers = pmarker.load_markers(markers_filename)
221 clippings = {}
222 for marker in markers:
223 nslc = marker.one_nslc()
224 nsl = nslc[:3]
225 if nsl not in clippings:
226 clippings[nsl] = []
228 if nslc not in clippings:
229 clippings[nslc] = []
231 clippings[nsl].append(marker.tmin)
232 clippings[nslc].append(marker.tmin)
234 for k, times in clippings.items():
235 atimes = num.array(times, dtype=float)
236 if k not in self.clippings:
237 self.clippings[k] = atimes
238 else:
239 self.clippings[k] = num.concatenate(self.clippings, atimes)
241 def add_exclude(self, exclude=[], filenames=None):
242 logger.debug('Loading exclude definitions...')
243 if filenames:
244 exclude = list(exclude)
245 for filename in filenames:
246 if op.exists(filename):
247 with open(filename, 'r') as f:
248 exclude.extend(
249 s.strip() for s in f.read().splitlines())
250 else:
251 logger.warning('No such exclude file: %s' % filename)
253 for x in exclude:
254 if isinstance(x, str):
255 x = tuple(x.split('.'))
256 self.exclude.add(x)
258 def add_include(self, include=[], filenames=None):
259 logger.debug('Loading included stations...')
260 if filenames:
261 include = list(include)
262 for filename in filenames:
263 with open(filename, 'r') as f:
264 include.extend(s.strip() for s in f.read().splitlines())
266 if self.include_nslc is None:
267 self.include_nslc = set()
268 self.include = set()
269 self.include_nsl_xx = set()
271 for x in include:
272 if isinstance(x, str):
273 x = tuple(x.split('.'))
274 if len(x) == 4:
275 self.include_nslc.add(x)
276 self.include_nsl_xx.add(x[:3])
277 else:
278 self.include.add(x)
280 def add_station_corrections(self, filename):
281 self.station_corrections.update(
282 (sc.codes, sc) for sc in load_station_corrections(filename))
284 def add_picks(self, filename):
285 self.pick_markers.extend(
286 pmarker.load_markers(filename))
288 self._picks = None
290 def add_gnss_campaigns(self, paths):
291 paths = util.select_files(
292 paths,
293 include=r'\.yml|\.yaml',
294 show_progress=False)
296 for path in paths:
297 self.add_gnss_campaign(filename=path)
299 def add_gnss_campaign(self, filename):
300 from pyrocko.model import gnss # noqa
302 logger.debug('Loading GNSS campaign from "%s"...' % filename)
304 campaign = load_all(filename=filename)
305 self.gnss_campaigns.append(campaign[0])
307 def add_kite_scenes(self, paths):
308 logger.info('Loading kite InSAR scenes...')
309 paths = util.select_files(
310 paths,
311 include=r'\.npz',
312 show_progress=False)
314 for path in paths:
315 self.add_kite_scene(filename=path)
317 if not self.kite_scenes:
318 logger.warning('Could not find any kite scenes at %s.' %
319 quote_paths(self.kite_scene_paths))
321 def add_kite_scene(self, filename):
322 try:
323 from kite import Scene
324 except ImportError:
325 raise ImportError('Module kite could not be imported,'
326 ' please install from https://pyrocko.org.')
327 logger.debug('Loading kite scene from "%s"...' % filename)
329 scene = Scene.load(filename)
330 scene._log.setLevel(logger.level)
332 try:
333 self.get_kite_scene(scene.meta.scene_id)
334 except NotFound:
335 self.kite_scenes.append(scene)
336 else:
337 raise AttributeError('Kite scene_id not unique for "%s".'
338 % filename)
340 def is_excluded(self, obj):
341 try:
342 nslc = self.get_nslc(obj)
343 if nslc in self.exclude:
344 return True
346 except InvalidObject:
347 pass
349 nsl = self.get_nsl(obj)
350 return (
351 nsl in self.exclude or
352 nsl[1:2] in self.exclude or
353 nsl[:2] in self.exclude)
355 def is_included(self, obj):
356 if self.include_nslc is None:
357 return True
359 nsl = self.get_nsl(obj)
361 if (
362 nsl in self.include or
363 nsl[1:2] in self.include or
364 nsl[:2] in self.include):
366 return True
368 try:
369 nslc = self.get_nslc(obj)
370 if nslc in self.include_nslc:
371 return True
373 except InvalidObject:
374 return nsl in self.include_nsl_xx
376 def has_clipping(self, nsl_or_nslc, tmin, tmax):
377 if nsl_or_nslc not in self.clippings:
378 return False
380 atimes = self.clippings[nsl_or_nslc]
381 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
383 def get_nsl(self, obj):
384 if isinstance(obj, trace.Trace):
385 net, sta, loc, _ = obj.nslc_id
386 elif isinstance(obj, model.Station):
387 net, sta, loc = obj.nsl()
388 elif isinstance(obj, gf.Target):
389 net, sta, loc, _ = obj.codes
390 elif isinstance(obj, tuple) and len(obj) in (3, 4):
391 net, sta, loc = obj[:3]
392 else:
393 raise InvalidObject(
394 'Cannot get nsl code from given object of type "%s".'
395 % type(obj))
397 return net, sta, loc
399 def get_nslc(self, obj):
400 if isinstance(obj, trace.Trace):
401 return obj.nslc_id
402 elif isinstance(obj, gf.Target):
403 return obj.codes
404 elif isinstance(obj, tuple) and len(obj) == 4:
405 return obj
406 else:
407 raise InvalidObject(
408 'Cannot get nslc code from given object "%s"' % type(obj))
410 def get_tmin_tmax(self, obj):
411 if isinstance(obj, trace.Trace):
412 return obj.tmin, obj.tmax
413 else:
414 raise InvalidObject(
415 'Cannot get tmin and tmax from given object of type "%s"' %
416 type(obj))
418 def get_station(self, obj):
419 if self.is_excluded(obj):
420 raise NotFound('Station is in exclude list:', self.get_nsl(obj))
422 if not self.is_included(obj):
423 raise NotFound(
424 'Station is not on include list:', self.get_nsl(obj))
426 if isinstance(obj, model.Station):
427 return obj
429 net, sta, loc = self.get_nsl(obj)
431 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
432 for k in keys:
433 if k in self.stations:
434 return self.stations[k]
436 raise NotFound('No station information:', keys)
438 def get_stations(self):
439 return [self.stations[k] for k in sorted(self.stations)
440 if not self.is_excluded(self.stations[k])
441 and self.is_included(self.stations[k])]
443 def get_kite_scenes(self):
444 return self.kite_scenes
446 def get_kite_scene(self, scene_id=None):
447 if scene_id is None:
448 if len(self.kite_scenes) == 0:
449 raise AttributeError('No kite displacements defined.')
450 return self.kite_scenes[0]
451 else:
452 for scene in self.kite_scenes:
453 if scene.meta.scene_id == scene_id:
454 return scene
455 raise NotFound('No kite scene with id "%s" defined.' % scene_id)
457 def get_gnss_campaigns(self):
458 return self.gnss_campaigns
460 def get_gnss_campaign(self, name):
461 for camp in self.gnss_campaigns:
462 if camp.name == name:
463 return camp
464 raise NotFound('GNSS campaign %s not found!' % name)
466 def get_response(self, obj, quantity='displacement'):
467 if (self.responses is None or len(self.responses) == 0) \
468 and (self.responses_stationxml is None
469 or len(self.responses_stationxml) == 0):
471 raise NotFound('No response information available.')
473 quantity_to_unit = {
474 'displacement': 'M',
475 'velocity': 'M/S',
476 'acceleration': 'M/S**2',
477 'rotation_displacement': 'RAD',
478 'rotation_velocity': 'RAD/S',
479 'rotation_acceleration': 'RAD/S**2'}
481 if self.is_excluded(obj):
482 raise NotFound(
483 'Response is in exclude list:', self.get_nslc(obj))
485 if not self.is_included(obj):
486 raise NotFound(
487 'Response is not on include list:', self.get_nslc(obj))
489 net, sta, loc, cha = self.get_nslc(obj)
490 tmin, tmax = self.get_tmin_tmax(obj)
492 keys_x = [
493 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
495 keys = []
496 for k in keys_x:
497 if k not in keys:
498 keys.append(k)
500 candidates = []
501 for k in keys:
502 if k in self.responses:
503 for x in self.responses[k]:
504 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
505 if quantity in (
506 'displacement', 'rotation_displacement'):
507 candidates.append(x.response)
508 elif quantity in (
509 'velocity', 'rotation_velocity'):
510 candidates.append(trace.MultiplyResponse([
511 x.response,
512 trace.DifferentiationResponse()]))
513 elif quantity in (
514 'acceleration', 'rotation_acceleration'):
515 candidates.append(trace.MultiplyResponse([
516 x.response,
517 trace.DifferentiationResponse(2)]))
518 else:
519 assert False
521 for sx in self.responses_stationxml:
522 try:
523 candidates.append(
524 sx.get_pyrocko_response(
525 (net, sta, loc, cha),
526 timespan=(tmin, tmax),
527 fake_input_units=quantity_to_unit[quantity]))
529 except (fs.NoResponseInformation, fs.MultipleResponseInformation):
530 pass
532 if len(candidates) == 1:
533 return candidates[0]
535 elif len(candidates) == 0:
536 raise NotFound('No response found:', (net, sta, loc, cha))
537 else:
538 raise NotFound('Multiple responses found:', (net, sta, loc, cha))
540 def get_waveform_raw(
541 self, obj,
542 tmin,
543 tmax,
544 tpad=0.,
545 toffset_noise_extract=0.,
546 want_incomplete=False,
547 extend_incomplete=False):
549 net, sta, loc, cha = self.get_nslc(obj)
551 if self.is_excluded((net, sta, loc, cha)):
552 raise NotFound(
553 'Waveform is in exclude list:', (net, sta, loc, cha))
555 if not self.is_included((net, sta, loc, cha)):
556 raise NotFound(
557 'Waveform is not on include list:', (net, sta, loc, cha))
559 if self.clip_handling == 'by_nsl':
560 if self.has_clipping((net, sta, loc), tmin, tmax):
561 raise NotFound(
562 'Waveform clipped:', (net, sta, loc))
564 elif self.clip_handling == 'by_nslc':
565 if self.has_clipping((net, sta, loc, cha), tmin, tmax):
566 raise NotFound(
567 'Waveform clipped:', (net, sta, loc, cha))
569 trs = self.squirrel.get_waveforms(
570 tmin=tmin+toffset_noise_extract-tpad,
571 tmax=tmax+toffset_noise_extract+tpad,
572 codes=(net, sta, loc, cha),
573 want_incomplete=want_incomplete or extend_incomplete)
575 if self.apply_displaced_sampling_workaround:
576 for tr in trs:
577 tr.snap()
579 if toffset_noise_extract != 0.0:
580 for tr in trs:
581 tr.shift(-toffset_noise_extract)
583 if extend_incomplete and len(trs) == 1:
584 trs[0].extend(
585 tmin + toffset_noise_extract - tpad,
586 tmax + toffset_noise_extract + tpad,
587 fillmethod='repeat')
589 if not want_incomplete and len(trs) != 1:
590 if len(trs) == 0:
591 message = 'Waveform missing or incomplete.'
592 else:
593 message = 'Waveform has gaps.'
595 raise NotFound(
596 message,
597 codes=(net, sta, loc, cha),
598 time_range=(
599 tmin + toffset_noise_extract - tpad,
600 tmax + toffset_noise_extract + tpad))
602 return trs
604 def get_waveform_restituted(
605 self,
606 obj, quantity='displacement',
607 tmin=None, tmax=None, tpad=0.,
608 tfade=0., freqlimits=None, deltat=None,
609 toffset_noise_extract=0.,
610 want_incomplete=False,
611 extend_incomplete=False):
613 if deltat is not None:
614 tpad_downsample = deltat * 50.
615 else:
616 tpad_downsample = 0.
618 trs_raw = self.get_waveform_raw(
619 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,
620 toffset_noise_extract=toffset_noise_extract,
621 want_incomplete=want_incomplete,
622 extend_incomplete=extend_incomplete)
624 trs_restituted = []
625 for tr in trs_raw:
626 if deltat is not None:
627 try:
628 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
630 except util.UnavailableDecimation:
631 logger.warn('using resample instead of decimation')
632 tr.resample(deltat)
634 tr.deltat = deltat
636 resp = self.get_response(tr, quantity=quantity)
637 try:
638 trs_restituted.append(
639 tr.transfer(
640 tfade=tfade, freqlimits=freqlimits,
641 transfer_function=resp, invert=True, demean=True))
643 except trace.InfiniteResponse:
644 raise NotFound(
645 'Instrument response deconvolution failed '
646 '(divide by zero)', tr.nslc_id)
648 return trs_restituted, trs_raw
650 def _get_projections(
651 self, station, backazimuth, source, target, tmin, tmax):
653 # fill in missing channel information (happens when station file
654 # does not contain any channel information)
655 if not station.get_channels():
656 station = copy.deepcopy(station)
658 nsl = station.nsl()
659 trs = self.squirrel.pile.all(
660 tmin=tmin, tmax=tmax,
661 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
662 load_data=False)
664 channels = list(set(tr.channel for tr in trs))
665 station.set_channels_by_name(*channels)
667 projections = []
668 projections.extend(station.guess_projections_to_enu(
669 out_channels=('E', 'N', 'Z')))
671 if source is not None and target is not None:
672 backazimuth = source.azibazi_to(target)[1]
674 if backazimuth is not None:
675 projections.extend(station.guess_projections_to_rtu(
676 out_channels=('R', 'T', 'Z'),
677 backazimuth=backazimuth))
679 if not projections:
680 raise NotFound(
681 'Cannot determine projection of data components:',
682 station.nsl())
684 return projections
686 def _get_waveform(
687 self,
688 obj, quantity='displacement',
689 tmin=None, tmax=None, tpad=0.,
690 tfade=0., freqlimits=None, deltat=None, cache=None,
691 backazimuth=None,
692 source=None,
693 target=None,
694 debug=False):
696 assert not debug or (debug and cache is None)
698 if cache is True:
699 cache = self._cache
701 _, _, _, channel = self.get_nslc(obj)
702 station = self.get_station(self.get_nsl(obj))
704 nslc = station.nsl() + (channel,)
706 if self.is_excluded(nslc):
707 raise NotFound(
708 'Waveform is excluded:', nslc)
710 if not self.is_included(nslc):
711 raise NotFound(
712 'Waveform is not on include list:', nslc)
714 assert tmin is not None
715 assert tmax is not None
717 tmin = float(tmin)
718 tmax = float(tmax)
720 nslc = tuple(nslc)
722 cache_k = nslc + (
723 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
724 if cache is not None and (nslc + cache_k) in cache:
725 obj = cache[nslc + cache_k]
726 if isinstance(obj, Exception):
727 raise obj
728 elif obj is None:
729 raise NotFound('Waveform not found!', nslc)
730 else:
731 return obj
733 syn_test = self.synthetic_test
734 toffset_noise_extract = 0.0
735 if syn_test:
736 if not syn_test.respect_data_availability:
737 if syn_test.real_noise_scale != 0.0:
738 raise DatasetError(
739 'respect_data_availability=False and '
740 'addition of real noise cannot be combined.')
742 tr = syn_test.get_waveform(
743 nslc, tmin, tmax,
744 tfade=tfade,
745 freqlimits=freqlimits)
747 if cache is not None:
748 cache[tr.nslc_id + cache_k] = tr
750 if debug:
751 return [], [], [], []
752 else:
753 return tr
755 if syn_test.real_noise_scale != 0.0:
756 toffset_noise_extract = syn_test.real_noise_toffset
758 abs_delays = []
759 for ocha in 'ENZRT':
760 sc = self.station_corrections.get(station.nsl() + (channel,), None)
761 if sc:
762 abs_delays.append(abs(sc.delay))
764 if abs_delays:
765 abs_delay_max = max(abs_delays)
766 else:
767 abs_delay_max = 0.0
769 projections = self._get_projections(
770 station, backazimuth, source, target, tmin, tmax)
772 try:
773 trs_projected = []
774 trs_restituted = []
775 trs_raw = []
776 exceptions = []
777 for matrix, in_channels, out_channels in projections:
778 deps = trace.project_dependencies(
779 matrix, in_channels, out_channels)
781 try:
782 trs_restituted_group = []
783 trs_raw_group = []
784 if channel in deps:
785 for cha in deps[channel]:
786 trs_restituted_this, trs_raw_this = \
787 self.get_waveform_restituted(
788 station.nsl() + (cha,),
789 quantity=quantity,
790 tmin=tmin, tmax=tmax,
791 tpad=tpad+abs_delay_max,
792 toffset_noise_extract=toffset_noise_extract, # noqa
793 tfade=tfade,
794 freqlimits=freqlimits,
795 deltat=deltat,
796 want_incomplete=debug,
797 extend_incomplete=self.extend_incomplete)
799 trs_restituted_group.extend(trs_restituted_this)
800 trs_raw_group.extend(trs_raw_this)
802 trs_projected.extend(
803 trace.project(
804 trs_restituted_group, matrix,
805 in_channels, out_channels))
807 trs_restituted.extend(trs_restituted_group)
808 trs_raw.extend(trs_raw_group)
810 except NotFound as e:
811 exceptions.append((in_channels, out_channels, e))
813 if not trs_projected:
814 err = []
815 for (in_channels, out_channels, e) in exceptions:
816 sin = ', '.join(c.name for c in in_channels)
817 sout = ', '.join(c.name for c in out_channels)
818 err.append('(%s) -> (%s): %s' % (sin, sout, e))
820 raise NotFound('\n'.join(err))
822 for tr in trs_projected:
823 sc = self.station_corrections.get(tr.nslc_id, None)
824 if sc:
825 if self.apply_correction_factors:
826 tr.ydata /= sc.factor
828 if self.apply_correction_delays:
829 tr.shift(-sc.delay)
831 if tmin is not None and tmax is not None:
832 tr.chop(tmin, tmax)
834 if syn_test:
835 trs_projected_synthetic = []
836 for tr in trs_projected:
837 if tr.channel == channel:
838 tr_syn = syn_test.get_waveform(
839 tr.nslc_id, tmin, tmax,
840 tfade=tfade, freqlimits=freqlimits)
842 if tr_syn:
843 if syn_test.real_noise_scale != 0.0:
844 tr_syn = tr_syn.copy()
845 tr_noise = tr.copy()
846 tr_noise.set_ydata(
847 tr_noise.get_ydata()
848 * syn_test.real_noise_scale)
850 tr_syn.add(tr_noise)
852 trs_projected_synthetic.append(tr_syn)
854 trs_projected = trs_projected_synthetic
856 if cache is not None:
857 for tr in trs_projected:
858 cache[tr.nslc_id + cache_k] = tr
860 tr_return = None
861 for tr in trs_projected:
862 if tr.channel == channel:
863 tr_return = tr
865 if debug:
866 return trs_projected, trs_restituted, trs_raw, tr_return
868 elif tr_return:
869 return tr_return
871 else:
872 raise NotFound(
873 'waveform not available', station.nsl() + (channel,))
875 except NotFound:
876 if cache is not None:
877 cache[nslc + cache_k] = None
878 raise
880 def get_waveform(self, obj, tinc_cache=None, **kwargs):
881 tmin = kwargs['tmin']
882 tmax = kwargs['tmax']
883 if tinc_cache is not None:
884 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
885 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
886 else:
887 tmin_r = tmin
888 tmax_r = tmax
890 kwargs['tmin'] = tmin_r
891 kwargs['tmax'] = tmax_r
893 if kwargs.get('debug', None):
894 return self._get_waveform(obj, **kwargs)
895 else:
896 tr = self._get_waveform(obj, **kwargs)
897 return tr.chop(tmin, tmax, inplace=False)
899 def get_events(self, magmin=None, event_names=None):
900 evs = []
901 for ev in self.events:
902 if ((magmin is None or ev.magnitude >= magmin) and
903 (event_names is None or ev.name in event_names)):
904 evs.append(ev)
906 return evs
908 def get_event_by_time(self, t, magmin=None):
909 evs = self.get_events(magmin=magmin)
910 ev_x = None
911 for ev in evs:
912 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
913 ev_x = ev
915 if not ev_x:
916 raise NotFound(
917 'No event information matching criteria (t=%s, magmin=%s).' %
918 (t, magmin))
920 return ev_x
922 def get_event(self):
923 if self._event_name is None:
924 raise NotFound('No main event selected in dataset!')
926 for ev in self.events:
927 if ev.name == self._event_name:
928 return ev
930 raise NotFound('No such event: %s' % self._event_name)
932 def get_picks(self):
933 if self._picks is None:
934 hash_to_name = {}
935 names = set()
936 for marker in self.pick_markers:
937 if isinstance(marker, pmarker.EventMarker):
938 name = marker.get_event().name
939 if name in names:
940 raise DatasetError(
941 'Duplicate event name "%s" in picks.' % name)
943 names.add(name)
944 hash_to_name[marker.get_event_hash()] = name
946 for ev in self.events:
947 hash_to_name[ev.get_hash()] = ev.name
949 picks = {}
950 for marker in self.pick_markers:
951 if isinstance(marker, pmarker.PhaseMarker):
952 ehash = marker.get_event_hash()
954 nsl = marker.one_nslc()[:3]
955 phasename = marker.get_phasename()
957 if ehash is None or ehash not in hash_to_name:
958 raise DatasetError(
959 'Unassociated pick: %s.%s.%s, %s' %
960 (nsl + (phasename, )))
962 eventname = hash_to_name[ehash]
964 if (nsl, phasename, eventname) in picks:
965 raise DatasetError(
966 'Duplicate pick: %s.%s.%s, %s' %
967 (nsl + (phasename, )))
969 picks[nsl, phasename, eventname] = marker
971 self._picks = picks
973 return self._picks
975 def get_pick(self, eventname, obj, phasename):
976 nsl = self.get_nsl(obj)
977 return self.get_picks().get((nsl, phasename, eventname), None)
980class DatasetConfig(HasPaths):
981 ''' Configuration for a Grond `Dataset` object. '''
983 stations_path = Path.T(
984 optional=True,
985 help='List of files with station coordinates in Pyrocko format.')
986 stations_stationxml_paths = List.T(
987 Path.T(),
988 optional=True,
989 help='List of files with station coordinates in StationXML format.')
990 events_path = Path.T(
991 optional=True,
992 help='File with hypocenter information and possibly'
993 ' reference solution')
994 waveform_paths = List.T(
995 Path.T(),
996 optional=True,
997 help='List of directories with raw waveform data')
998 clippings_path = Path.T(
999 optional=True)
1000 responses_sacpz_path = Path.T(
1001 optional=True,
1002 help='List of SACPZ response files for restitution of'
1003 ' the raw waveform data.')
1004 responses_stationxml_paths = List.T(
1005 Path.T(),
1006 optional=True,
1007 help='List of StationXML response files for restitution of'
1008 ' the raw waveform data.')
1009 station_corrections_path = Path.T(
1010 optional=True,
1011 help='File containing station correction informations.')
1012 apply_correction_factors = Bool.T(
1013 optional=True,
1014 default=True,
1015 help='Apply correction factors from station corrections.')
1016 apply_correction_delays = Bool.T(
1017 optional=True,
1018 default=True,
1019 help='Apply correction delays from station corrections.')
1020 apply_displaced_sampling_workaround = Bool.T(
1021 optional=True,
1022 default=False,
1023 help='Work around displaced sampling issues.')
1024 extend_incomplete = Bool.T(
1025 default=False,
1026 help='Extend incomplete seismic traces.')
1027 picks_paths = List.T(
1028 Path.T())
1029 exclude_paths = List.T(
1030 Path.T(),
1031 help='List of text files with excluded stations.')
1032 exclude = List.T(
1033 String.T(),
1034 help='Stations/components to be excluded according to their STA, '
1035 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
1036 include_paths = List.T(
1037 Path.T(),
1038 help='List of text files with included stations.')
1039 include = List.T(
1040 String.T(),
1041 optional=True,
1042 help='If not None, list of stations/components to include according '
1043 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
1044 'Note: ''when including on channel level, both, the raw and '
1045 'the processed channel codes have to be listed.')
1047 synthetic_test = SyntheticTest.T(
1048 optional=True)
1050 kite_scene_paths = List.T(
1051 Path.T(),
1052 optional=True,
1053 help='List of directories for the InSAR scenes.')
1055 gnss_campaign_paths = List.T(
1056 Path.T(),
1057 optional=True,
1058 help='List of directories for the GNSS campaign data.')
1060 # for backward compatibility
1061 blacklist = List.T(String.T(), optional=True)
1062 whitelist = List.T(String.T(), optional=True)
1063 blacklist_paths = List.T(Path.T(), optional=True)
1064 whitelist_paths = List.T(Path.T(), optional=True)
1066 def __init__(self, *args, **kwargs):
1067 HasPaths.__init__(self, *args, **kwargs)
1069 # for backward compatibility
1070 if self.blacklist:
1071 self.exclude.extend(self.blacklist)
1072 self.blacklist = None
1073 if self.whitelist:
1074 self.include.extend(self.whitelist)
1075 self.whitelist = None
1076 if self.blacklist_paths:
1077 self.exclude_paths.extend(self.blacklist_paths)
1078 self.blacklist_paths = None
1079 if self.whitelist_paths:
1080 self.include_paths.extend(self.whitelist_paths)
1081 self.whitelist_paths = None
1083 self._ds = {}
1085 def get_event_names(self):
1086 logger.info('Loading events ...')
1088 def extra(path):
1089 return expand_template(path, dict(
1090 event_name='*'))
1092 def fp(path):
1093 return self.expand_path(path, extra=extra)
1095 def check_events(events, fn):
1096 for ev in events:
1097 if not ev.name:
1098 logger.warning('Event in %s has no name!', fn)
1099 return
1100 if not ev.lat or not ev.lon:
1101 logger.warning('Event %s has inconsistent coordinates!',
1102 ev.name)
1103 if not ev.depth:
1104 logger.warning('Event %s has no depth!', ev.name)
1105 if not ev.time:
1106 logger.warning('Event %s has no time!', ev.name)
1108 events = []
1109 events_path = fp(self.events_path)
1110 fns = glob.glob(events_path)
1111 if not fns:
1112 raise DatasetError('No event files matching "%s".' % events_path)
1114 for fn in fns:
1115 logger.debug('Loading from file %s' % fn)
1116 ev = model.load_events(filename=fn)
1117 check_events(ev, fn)
1119 events.extend(ev)
1121 event_names = [ev_.name for ev_ in events]
1122 event_names.sort()
1123 return event_names
1125 def get_dataset(self, event_name):
1126 if event_name not in self._ds:
1127 def extra(path):
1128 return expand_template(path, dict(
1129 event_name=event_name))
1131 def fp(path):
1132 p = self.expand_path(path, extra=extra)
1133 if p is None:
1134 return None
1136 if isinstance(p, list):
1137 for path in p:
1138 if not op.exists(path):
1139 logger.warning('Path %s does not exist.' % path)
1140 else:
1141 if not op.exists(p):
1142 logger.warning('Path %s does not exist.' % p)
1144 return p
1146 ds = Dataset(event_name)
1147 try:
1148 ds.add_events(filename=fp(self.events_path))
1150 ds.add_stations(
1151 pyrocko_stations_filename=fp(self.stations_path),
1152 stationxml_filenames=fp(self.stations_stationxml_paths))
1154 if self.waveform_paths:
1155 ds.add_waveforms(paths=fp(self.waveform_paths))
1157 if self.kite_scene_paths:
1158 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
1160 if self.gnss_campaign_paths:
1161 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
1163 if self.clippings_path:
1164 ds.add_clippings(markers_filename=fp(self.clippings_path))
1166 if self.responses_sacpz_path:
1167 ds.add_responses(
1168 sacpz_dirname=fp(self.responses_sacpz_path))
1170 if self.responses_stationxml_paths:
1171 ds.add_responses(
1172 stationxml_filenames=fp(
1173 self.responses_stationxml_paths))
1175 if self.station_corrections_path:
1176 ds.add_station_corrections(
1177 filename=fp(self.station_corrections_path))
1179 ds.apply_correction_factors = self.apply_correction_factors
1180 ds.apply_correction_delays = self.apply_correction_delays
1181 ds.apply_displaced_sampling_workaround = \
1182 self.apply_displaced_sampling_workaround
1183 ds.extend_incomplete = self.extend_incomplete
1185 for picks_path in self.picks_paths:
1186 ds.add_picks(
1187 filename=fp(picks_path))
1189 ds.add_exclude(self.exclude)
1190 ds.add_exclude(filenames=fp(self.exclude_paths))
1191 if self.include:
1192 ds.add_include(self.include)
1193 if self.include_paths:
1194 ds.add_include(filenames=fp(self.include_paths))
1196 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
1197 self._ds[event_name] = ds
1198 except (FileLoadError, OSError) as e:
1199 raise DatasetError(str(e))
1201 return self._ds[event_name]
1204__all__ = '''
1205 Dataset
1206 DatasetConfig
1207 DatasetError
1208 InvalidObject
1209 NotFound
1210 StationCorrection
1211 load_station_corrections
1212 dump_station_corrections
1213'''.split()