Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%
681 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-01 12:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-01 12:39 +0000
1import glob
2import copy
3import os.path as op
4import logging
5import math
6import numpy as num
8from collections import defaultdict
9from pyrocko import util, pile, model, config, trace, \
10 marker as pmarker
11from pyrocko.io.io_common import FileLoadError
12from pyrocko.fdsn import enhanced_sacpz, station as fs
13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
14 load_all)
16from pyrocko import gf
18from .meta import Path, HasPaths, expand_template, GrondError
20from .synthetic_tests import SyntheticTest
22guts_prefix = 'grond'
23logger = logging.getLogger('grond.dataset')
26def quote_paths(paths):
27 return ', '.join('"%s"' % path for path in paths)
30class InvalidObject(Exception):
31 pass
34class NotFound(Exception):
35 def __init__(self, reason, codes=None, time_range=None):
36 self.reason = reason
37 self.time_range = time_range
38 self.codes = codes
40 def __str__(self):
41 s = self.reason
42 if self.codes:
43 s += ' (%s)' % '.'.join(self.codes)
45 if self.time_range:
46 s += ' (%s - %s)' % (
47 util.time_to_str(self.time_range[0]),
48 util.time_to_str(self.time_range[1]))
50 return s
53class DatasetError(GrondError):
54 pass
57class StationCorrection(Object):
58 codes = Tuple.T(4, String.T())
59 delay = Float.T()
60 factor = Float.T()
63def load_station_corrections(filename):
64 scs = load_all(filename=filename)
65 for sc in scs:
66 assert isinstance(sc, StationCorrection)
68 return scs
71def dump_station_corrections(station_corrections, filename):
72 return dump_all(station_corrections, filename=filename)
75class Dataset(object):
77 def __init__(self, event_name=None):
78 self.events = []
79 self.pile = pile.Pile()
80 self.stations = {}
81 self.responses = defaultdict(list)
82 self.responses_stationxml = []
83 self.clippings = {}
84 self.blacklist = set()
85 self.whitelist_nslc = None
86 self.whitelist_nsl_xx = None
87 self.whitelist = None
88 self.station_corrections = {}
89 self.station_factors = {}
90 self.pick_markers = []
91 self.apply_correction_delays = True
92 self.apply_correction_factors = True
93 self.apply_displaced_sampling_workaround = False
94 self.extend_incomplete = False
95 self.clip_handling = 'by_nsl'
96 self.kite_scenes = []
97 self.gnss_campaigns = []
98 self.synthetic_test = None
99 self._picks = None
100 self._cache = {}
101 self._event_name = event_name
103 def empty_cache(self):
104 self._cache = {}
106 def set_synthetic_test(self, synthetic_test):
107 self.synthetic_test = synthetic_test
109 def add_stations(
110 self,
111 stations=None,
112 pyrocko_stations_filename=None,
113 stationxml_filenames=None):
115 if stations is not None:
116 for station in stations:
117 self.stations[station.nsl()] = station
119 if pyrocko_stations_filename is not None:
120 logger.debug(
121 'Loading stations from file "%s"...' %
122 pyrocko_stations_filename)
124 for station in model.load_stations(pyrocko_stations_filename):
125 self.stations[station.nsl()] = station
127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:
129 for stationxml_filename in stationxml_filenames:
130 if not op.exists(stationxml_filename):
131 continue
133 logger.debug(
134 'Loading stations from StationXML file "%s"...' %
135 stationxml_filename)
137 sx = fs.load_xml(filename=stationxml_filename)
138 ev = self.get_event()
139 try:
140 stations = sx.get_pyrocko_stations(
141 time=ev.time, sloppy=True)
142 except TypeError:
143 logger.warning(
144 'The installed version of Pyrocko does not support '
145 'the keyword argument `sloppy` in method '
146 '`FDSNStationXML.get_pyrocko_stations`. Please '
147 'upgrade Pyrocko.')
149 stations = sx.get_pyrocko_stations(time=ev.time)
151 if len(stations) == 0:
152 logger.warning(
153 'No stations found for time %s in file "%s".' % (
154 util.time_to_str(ev.time), stationxml_filename))
156 for station in stations:
157 logger.debug('Adding station: %s.%s.%s' % station.nsl())
158 channels = station.get_channels()
159 if len(channels) == 1 and channels[0].name.endswith('Z'):
160 logger.warning(
161 'Station "%s" has vertical component'
162 ' information only, adding mocked channels.'
163 % station.nsl_string())
164 station.add_channel(
165 model.Channel(channels[0].name[:-1] + 'N'))
166 station.add_channel(
167 model.Channel(channels[0].name[:-1] + 'E'))
169 self.stations[station.nsl()] = station
171 def add_events(self, events=None, filename=None):
172 if events is not None:
173 self.events.extend(events)
175 if filename is not None:
176 logger.debug('Loading events from file "%s"...' % filename)
177 try:
178 events = model.load_events(filename)
179 self.events.extend(events)
180 logger.info(
181 'Loading events from %s: %i events found.' %
182 (filename, len(events)))
183 except Exception as e:
184 logger.warning('Could not load events from %s!', filename)
185 raise e
187 def add_waveforms(self, paths, regex=None, fileformat='detect',
188 show_progress=False):
189 cachedirname = config.config().cache_dir
191 logger.debug('Selecting waveform files %s...' % quote_paths(paths))
192 fns = util.select_files(paths, include=regex,
193 show_progress=show_progress)
194 cache = pile.get_cache(cachedirname)
195 logger.debug('Scanning waveform files %s...' % quote_paths(paths))
196 self.pile.load_files(sorted(fns), cache=cache,
197 fileformat=fileformat,
198 show_progress=show_progress)
200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
201 if sacpz_dirname:
202 logger.debug(
203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
205 self.responses[x.codes].append(x)
207 if stationxml_filenames:
208 for stationxml_filename in stationxml_filenames:
209 if not op.exists(stationxml_filename):
210 continue
212 logger.debug(
213 'Loading StationXML responses from "%s"...' %
214 stationxml_filename)
216 self.responses_stationxml.append(
217 fs.load_xml(filename=stationxml_filename))
219 def add_clippings(self, markers_filename):
220 markers = pmarker.load_markers(markers_filename)
221 clippings = {}
222 for marker in markers:
223 nslc = marker.one_nslc()
224 nsl = nslc[:3]
225 if nsl not in clippings:
226 clippings[nsl] = []
228 if nslc not in clippings:
229 clippings[nslc] = []
231 clippings[nsl].append(marker.tmin)
232 clippings[nslc].append(marker.tmin)
234 for k, times in clippings.items():
235 atimes = num.array(times, dtype=float)
236 if k not in self.clippings:
237 self.clippings[k] = atimes
238 else:
239 self.clippings[k] = num.concatenate(self.clippings, atimes)
241 def add_blacklist(self, blacklist=[], filenames=None):
242 logger.debug('Loading blacklisted stations...')
243 if filenames:
244 blacklist = list(blacklist)
245 for filename in filenames:
246 if op.exists(filename):
247 with open(filename, 'r') as f:
248 blacklist.extend(
249 s.strip() for s in f.read().splitlines())
250 else:
251 logger.warning('No such blacklist file: %s' % filename)
253 for x in blacklist:
254 if isinstance(x, str):
255 x = tuple(x.split('.'))
256 self.blacklist.add(x)
258 def add_whitelist(self, whitelist=[], filenames=None):
259 logger.debug('Loading whitelisted stations...')
260 if filenames:
261 whitelist = list(whitelist)
262 for filename in filenames:
263 with open(filename, 'r') as f:
264 whitelist.extend(s.strip() for s in f.read().splitlines())
266 if self.whitelist_nslc is None:
267 self.whitelist_nslc = set()
268 self.whitelist = set()
269 self.whitelist_nsl_xx = set()
271 for x in whitelist:
272 if isinstance(x, str):
273 x = tuple(x.split('.'))
274 if len(x) == 4:
275 self.whitelist_nslc.add(x)
276 self.whitelist_nsl_xx.add(x[:3])
277 else:
278 self.whitelist.add(x)
280 def add_station_corrections(self, filename):
281 self.station_corrections.update(
282 (sc.codes, sc) for sc in load_station_corrections(filename))
284 def add_picks(self, filename):
285 self.pick_markers.extend(
286 pmarker.load_markers(filename))
288 self._picks = None
290 def add_gnss_campaigns(self, paths):
291 paths = util.select_files(
292 paths,
293 include=r'\.yml|\.yaml',
294 show_progress=False)
296 for path in paths:
297 self.add_gnss_campaign(filename=path)
299 def add_gnss_campaign(self, filename):
300 try:
301 from pyrocko.model import gnss # noqa
302 except ImportError:
303 raise ImportError(
304 'Module pyrocko.model.gnss not found. Please upgrade Pyrocko.')
306 logger.debug('Loading GNSS campaign from "%s"...' % filename)
308 campaign = load_all(filename=filename)
309 self.gnss_campaigns.append(campaign[0])
311 def add_kite_scenes(self, paths):
312 logger.info('Loading kite InSAR scenes...')
313 paths = util.select_files(
314 paths,
315 include=r'\.npz',
316 show_progress=False)
318 for path in paths:
319 self.add_kite_scene(filename=path)
321 if not self.kite_scenes:
322 logger.warning('Could not find any kite scenes at %s.' %
323 quote_paths(self.kite_scene_paths))
325 def add_kite_scene(self, filename):
326 try:
327 from kite import Scene
328 except ImportError:
329 raise ImportError('Module kite could not be imported,'
330 ' please install from https://pyrocko.org.')
331 logger.debug('Loading kite scene from "%s"...' % filename)
333 scene = Scene.load(filename)
334 scene._log.setLevel(logger.level)
336 try:
337 self.get_kite_scene(scene.meta.scene_id)
338 except NotFound:
339 self.kite_scenes.append(scene)
340 else:
341 raise AttributeError('Kite scene_id not unique for "%s".'
342 % filename)
344 def is_blacklisted(self, obj):
345 try:
346 nslc = self.get_nslc(obj)
347 if nslc in self.blacklist:
348 return True
350 except InvalidObject:
351 pass
353 nsl = self.get_nsl(obj)
354 return (
355 nsl in self.blacklist or
356 nsl[1:2] in self.blacklist or
357 nsl[:2] in self.blacklist)
359 def is_whitelisted(self, obj):
360 if self.whitelist_nslc is None:
361 return True
363 nsl = self.get_nsl(obj)
365 if (
366 nsl in self.whitelist or
367 nsl[1:2] in self.whitelist or
368 nsl[:2] in self.whitelist):
370 return True
372 try:
373 nslc = self.get_nslc(obj)
374 if nslc in self.whitelist_nslc:
375 return True
377 except InvalidObject:
378 return nsl in self.whitelist_nsl_xx
380 def has_clipping(self, nsl_or_nslc, tmin, tmax):
381 if nsl_or_nslc not in self.clippings:
382 return False
384 atimes = self.clippings[nsl_or_nslc]
385 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
387 def get_nsl(self, obj):
388 if isinstance(obj, trace.Trace):
389 net, sta, loc, _ = obj.nslc_id
390 elif isinstance(obj, model.Station):
391 net, sta, loc = obj.nsl()
392 elif isinstance(obj, gf.Target):
393 net, sta, loc, _ = obj.codes
394 elif isinstance(obj, tuple) and len(obj) in (3, 4):
395 net, sta, loc = obj[:3]
396 else:
397 raise InvalidObject(
398 'Cannot get nsl code from given object of type "%s".'
399 % type(obj))
401 return net, sta, loc
403 def get_nslc(self, obj):
404 if isinstance(obj, trace.Trace):
405 return obj.nslc_id
406 elif isinstance(obj, gf.Target):
407 return obj.codes
408 elif isinstance(obj, tuple) and len(obj) == 4:
409 return obj
410 else:
411 raise InvalidObject(
412 'Cannot get nslc code from given object "%s"' % type(obj))
414 def get_tmin_tmax(self, obj):
415 if isinstance(obj, trace.Trace):
416 return obj.tmin, obj.tmax
417 else:
418 raise InvalidObject(
419 'Cannot get tmin and tmax from given object of type "%s"' %
420 type(obj))
422 def get_station(self, obj):
423 if self.is_blacklisted(obj):
424 raise NotFound('Station is blacklisted:', self.get_nsl(obj))
426 if not self.is_whitelisted(obj):
427 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))
429 if isinstance(obj, model.Station):
430 return obj
432 net, sta, loc = self.get_nsl(obj)
434 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
435 for k in keys:
436 if k in self.stations:
437 return self.stations[k]
439 raise NotFound('No station information:', keys)
441 def get_stations(self):
442 return [self.stations[k] for k in sorted(self.stations)
443 if not self.is_blacklisted(self.stations[k])
444 and self.is_whitelisted(self.stations[k])]
446 def get_kite_scenes(self):
447 return self.kite_scenes
449 def get_kite_scene(self, scene_id=None):
450 if scene_id is None:
451 if len(self.kite_scenes) == 0:
452 raise AttributeError('No kite displacements defined.')
453 return self.kite_scenes[0]
454 else:
455 for scene in self.kite_scenes:
456 if scene.meta.scene_id == scene_id:
457 return scene
458 raise NotFound('No kite scene with id "%s" defined.' % scene_id)
460 def get_gnss_campaigns(self):
461 return self.gnss_campaigns
463 def get_gnss_campaign(self, name):
464 for camp in self.gnss_campaigns:
465 if camp.name == name:
466 return camp
467 raise NotFound('GNSS campaign %s not found!' % name)
469 def get_response(self, obj, quantity='displacement'):
470 if (self.responses is None or len(self.responses) == 0) \
471 and (self.responses_stationxml is None
472 or len(self.responses_stationxml) == 0):
474 raise NotFound('No response information available.')
476 quantity_to_unit = {
477 'displacement': 'M',
478 'velocity': 'M/S',
479 'acceleration': 'M/S**2'}
481 if self.is_blacklisted(obj):
482 raise NotFound('Response is blacklisted:', self.get_nslc(obj))
484 if not self.is_whitelisted(obj):
485 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))
487 net, sta, loc, cha = self.get_nslc(obj)
488 tmin, tmax = self.get_tmin_tmax(obj)
490 keys_x = [
491 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
493 keys = []
494 for k in keys_x:
495 if k not in keys:
496 keys.append(k)
498 candidates = []
499 for k in keys:
500 if k in self.responses:
501 for x in self.responses[k]:
502 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
503 if quantity == 'displacement':
504 candidates.append(x.response)
505 elif quantity == 'velocity':
506 candidates.append(trace.MultiplyResponse([
507 x.response,
508 trace.DifferentiationResponse()]))
509 elif quantity == 'acceleration':
510 candidates.append(trace.MultiplyResponse([
511 x.response,
512 trace.DifferentiationResponse(2)]))
513 else:
514 assert False
516 for sx in self.responses_stationxml:
517 try:
518 candidates.append(
519 sx.get_pyrocko_response(
520 (net, sta, loc, cha),
521 timespan=(tmin, tmax),
522 fake_input_units=quantity_to_unit[quantity]))
524 except (fs.NoResponseInformation, fs.MultipleResponseInformation):
525 pass
527 if len(candidates) == 1:
528 return candidates[0]
530 elif len(candidates) == 0:
531 raise NotFound('No response found:', (net, sta, loc, cha))
532 else:
533 raise NotFound('Multiple responses found:', (net, sta, loc, cha))
535 def get_waveform_raw(
536 self, obj,
537 tmin,
538 tmax,
539 tpad=0.,
540 toffset_noise_extract=0.,
541 want_incomplete=False,
542 extend_incomplete=False):
544 net, sta, loc, cha = self.get_nslc(obj)
546 if self.is_blacklisted((net, sta, loc, cha)):
547 raise NotFound(
548 'Waveform is blacklisted:', (net, sta, loc, cha))
550 if not self.is_whitelisted((net, sta, loc, cha)):
551 raise NotFound(
552 'Waveform is not on whitelist:', (net, sta, loc, cha))
554 if self.clip_handling == 'by_nsl':
555 if self.has_clipping((net, sta, loc), tmin, tmax):
556 raise NotFound(
557 'Waveform clipped:', (net, sta, loc))
559 elif self.clip_handling == 'by_nslc':
560 if self.has_clipping((net, sta, loc, cha), tmin, tmax):
561 raise NotFound(
562 'Waveform clipped:', (net, sta, loc, cha))
564 trs = self.pile.all(
565 tmin=tmin+toffset_noise_extract,
566 tmax=tmax+toffset_noise_extract,
567 tpad=tpad,
568 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),
569 want_incomplete=want_incomplete or extend_incomplete)
571 if self.apply_displaced_sampling_workaround:
572 for tr in trs:
573 tr.snap()
575 if toffset_noise_extract != 0.0:
576 for tr in trs:
577 tr.shift(-toffset_noise_extract)
579 if extend_incomplete and len(trs) == 1:
580 trs[0].extend(
581 tmin + toffset_noise_extract - tpad,
582 tmax + toffset_noise_extract + tpad,
583 fillmethod='repeat')
585 if not want_incomplete and len(trs) != 1:
586 if len(trs) == 0:
587 message = 'Waveform missing or incomplete.'
588 else:
589 message = 'Waveform has gaps.'
591 raise NotFound(
592 message,
593 codes=(net, sta, loc, cha),
594 time_range=(
595 tmin + toffset_noise_extract - tpad,
596 tmax + toffset_noise_extract + tpad))
598 return trs
600 def get_waveform_restituted(
601 self,
602 obj, quantity='displacement',
603 tmin=None, tmax=None, tpad=0.,
604 tfade=0., freqlimits=None, deltat=None,
605 toffset_noise_extract=0.,
606 want_incomplete=False,
607 extend_incomplete=False):
609 if deltat is not None:
610 tpad_downsample = deltat * 50.
611 else:
612 tpad_downsample = 0.
614 trs_raw = self.get_waveform_raw(
615 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,
616 toffset_noise_extract=toffset_noise_extract,
617 want_incomplete=want_incomplete,
618 extend_incomplete=extend_incomplete)
620 trs_restituted = []
621 for tr in trs_raw:
622 if deltat is not None:
623 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
624 tr.deltat = deltat
626 resp = self.get_response(tr, quantity=quantity)
627 try:
628 trs_restituted.append(
629 tr.transfer(
630 tfade=tfade, freqlimits=freqlimits,
631 transfer_function=resp, invert=True, demean=True))
633 except trace.InfiniteResponse:
634 raise NotFound(
635 'Instrument response deconvolution failed '
636 '(divide by zero)', tr.nslc_id)
638 return trs_restituted, trs_raw
640 def _get_projections(
641 self, station, backazimuth, source, target, tmin, tmax):
643 # fill in missing channel information (happens when station file
644 # does not contain any channel information)
645 if not station.get_channels():
646 station = copy.deepcopy(station)
648 nsl = station.nsl()
649 trs = self.pile.all(
650 tmin=tmin, tmax=tmax,
651 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
652 load_data=False)
654 channels = list(set(tr.channel for tr in trs))
655 station.set_channels_by_name(*channels)
657 projections = []
658 projections.extend(station.guess_projections_to_enu(
659 out_channels=('E', 'N', 'Z')))
661 if source is not None and target is not None:
662 backazimuth = source.azibazi_to(target)[1]
664 if backazimuth is not None:
665 projections.extend(station.guess_projections_to_rtu(
666 out_channels=('R', 'T', 'Z'),
667 backazimuth=backazimuth))
669 if not projections:
670 raise NotFound(
671 'Cannot determine projection of data components:',
672 station.nsl())
674 return projections
676 def _get_waveform(
677 self,
678 obj, quantity='displacement',
679 tmin=None, tmax=None, tpad=0.,
680 tfade=0., freqlimits=None, deltat=None, cache=None,
681 backazimuth=None,
682 source=None,
683 target=None,
684 debug=False):
686 assert not debug or (debug and cache is None)
688 if cache is True:
689 cache = self._cache
691 _, _, _, channel = self.get_nslc(obj)
692 station = self.get_station(self.get_nsl(obj))
694 nslc = station.nsl() + (channel,)
696 if self.is_blacklisted(nslc):
697 raise NotFound(
698 'Waveform is blacklisted:', nslc)
700 if not self.is_whitelisted(nslc):
701 raise NotFound(
702 'Waveform is not on whitelist:', nslc)
704 assert tmin is not None
705 assert tmax is not None
707 tmin = float(tmin)
708 tmax = float(tmax)
710 nslc = tuple(nslc)
712 cache_k = nslc + (
713 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
714 if cache is not None and (nslc + cache_k) in cache:
715 obj = cache[nslc + cache_k]
716 if isinstance(obj, Exception):
717 raise obj
718 elif obj is None:
719 raise NotFound('Waveform not found!', nslc)
720 else:
721 return obj
723 syn_test = self.synthetic_test
724 toffset_noise_extract = 0.0
725 if syn_test:
726 if not syn_test.respect_data_availability:
727 if syn_test.real_noise_scale != 0.0:
728 raise DatasetError(
729 'respect_data_availability=False and '
730 'addition of real noise cannot be combined.')
732 tr = syn_test.get_waveform(
733 nslc, tmin, tmax,
734 tfade=tfade,
735 freqlimits=freqlimits)
737 if cache is not None:
738 cache[tr.nslc_id + cache_k] = tr
740 if debug:
741 return [], [], []
742 else:
743 return tr
745 if syn_test.real_noise_scale != 0.0:
746 toffset_noise_extract = syn_test.real_noise_toffset
748 abs_delays = []
749 for ocha in 'ENZRT':
750 sc = self.station_corrections.get(station.nsl() + (channel,), None)
751 if sc:
752 abs_delays.append(abs(sc.delay))
754 if abs_delays:
755 abs_delay_max = max(abs_delays)
756 else:
757 abs_delay_max = 0.0
759 projections = self._get_projections(
760 station, backazimuth, source, target, tmin, tmax)
762 try:
763 trs_projected = []
764 trs_restituted = []
765 trs_raw = []
766 exceptions = []
767 for matrix, in_channels, out_channels in projections:
768 deps = trace.project_dependencies(
769 matrix, in_channels, out_channels)
771 try:
772 trs_restituted_group = []
773 trs_raw_group = []
774 if channel in deps:
775 for cha in deps[channel]:
776 trs_restituted_this, trs_raw_this = \
777 self.get_waveform_restituted(
778 station.nsl() + (cha,),
779 quantity=quantity,
780 tmin=tmin, tmax=tmax,
781 tpad=tpad+abs_delay_max,
782 toffset_noise_extract=toffset_noise_extract, # noqa
783 tfade=tfade,
784 freqlimits=freqlimits,
785 deltat=deltat,
786 want_incomplete=debug,
787 extend_incomplete=self.extend_incomplete)
789 trs_restituted_group.extend(trs_restituted_this)
790 trs_raw_group.extend(trs_raw_this)
792 trs_projected.extend(
793 trace.project(
794 trs_restituted_group, matrix,
795 in_channels, out_channels))
797 trs_restituted.extend(trs_restituted_group)
798 trs_raw.extend(trs_raw_group)
800 except NotFound as e:
801 exceptions.append((in_channels, out_channels, e))
803 if not trs_projected:
804 err = []
805 for (in_channels, out_channels, e) in exceptions:
806 sin = ', '.join(c.name for c in in_channels)
807 sout = ', '.join(c.name for c in out_channels)
808 err.append('(%s) -> (%s): %s' % (sin, sout, e))
810 raise NotFound('\n'.join(err))
812 for tr in trs_projected:
813 sc = self.station_corrections.get(tr.nslc_id, None)
814 if sc:
815 if self.apply_correction_factors:
816 tr.ydata /= sc.factor
818 if self.apply_correction_delays:
819 tr.shift(-sc.delay)
821 if tmin is not None and tmax is not None:
822 tr.chop(tmin, tmax)
824 if syn_test:
825 trs_projected_synthetic = []
826 for tr in trs_projected:
827 if tr.channel == channel:
828 tr_syn = syn_test.get_waveform(
829 tr.nslc_id, tmin, tmax,
830 tfade=tfade, freqlimits=freqlimits)
832 if tr_syn:
833 if syn_test.real_noise_scale != 0.0:
834 tr_syn = tr_syn.copy()
835 tr_noise = tr.copy()
836 tr_noise.set_ydata(
837 tr_noise.get_ydata()
838 * syn_test.real_noise_scale)
840 tr_syn.add(tr_noise)
842 trs_projected_synthetic.append(tr_syn)
844 trs_projected = trs_projected_synthetic
846 if cache is not None:
847 for tr in trs_projected:
848 cache[tr.nslc_id + cache_k] = tr
850 tr_return = None
851 for tr in trs_projected:
852 if tr.channel == channel:
853 tr_return = tr
855 if debug:
856 return trs_projected, trs_restituted, trs_raw, tr_return
858 elif tr_return:
859 return tr_return
861 else:
862 raise NotFound(
863 'waveform not available', station.nsl() + (channel,))
865 except NotFound:
866 if cache is not None:
867 cache[nslc + cache_k] = None
868 raise
870 def get_waveform(self, obj, tinc_cache=None, **kwargs):
871 tmin = kwargs['tmin']
872 tmax = kwargs['tmax']
873 if tinc_cache is not None:
874 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
875 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
876 else:
877 tmin_r = tmin
878 tmax_r = tmax
880 kwargs['tmin'] = tmin_r
881 kwargs['tmax'] = tmax_r
883 if kwargs.get('debug', None):
884 return self._get_waveform(obj, **kwargs)
885 else:
886 tr = self._get_waveform(obj, **kwargs)
887 return tr.chop(tmin, tmax, inplace=False)
889 def get_events(self, magmin=None, event_names=None):
890 evs = []
891 for ev in self.events:
892 if ((magmin is None or ev.magnitude >= magmin) and
893 (event_names is None or ev.name in event_names)):
894 evs.append(ev)
896 return evs
898 def get_event_by_time(self, t, magmin=None):
899 evs = self.get_events(magmin=magmin)
900 ev_x = None
901 for ev in evs:
902 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
903 ev_x = ev
905 if not ev_x:
906 raise NotFound(
907 'No event information matching criteria (t=%s, magmin=%s).' %
908 (t, magmin))
910 return ev_x
912 def get_event(self):
913 if self._event_name is None:
914 raise NotFound('No main event selected in dataset!')
916 for ev in self.events:
917 if ev.name == self._event_name:
918 return ev
920 raise NotFound('No such event: %s' % self._event_name)
922 def get_picks(self):
923 if self._picks is None:
924 hash_to_name = {}
925 names = set()
926 for marker in self.pick_markers:
927 if isinstance(marker, pmarker.EventMarker):
928 name = marker.get_event().name
929 if name in names:
930 raise DatasetError(
931 'Duplicate event name "%s" in picks.' % name)
933 names.add(name)
934 hash_to_name[marker.get_event_hash()] = name
936 for ev in self.events:
937 hash_to_name[ev.get_hash()] = ev.name
939 picks = {}
940 for marker in self.pick_markers:
941 if isinstance(marker, pmarker.PhaseMarker):
942 ehash = marker.get_event_hash()
944 nsl = marker.one_nslc()[:3]
945 phasename = marker.get_phasename()
947 if ehash is None or ehash not in hash_to_name:
948 raise DatasetError(
949 'Unassociated pick: %s.%s.%s, %s' %
950 (nsl + (phasename, )))
952 eventname = hash_to_name[ehash]
954 if (nsl, phasename, eventname) in picks:
955 raise DatasetError(
956 'Duplicate pick: %s.%s.%s, %s' %
957 (nsl + (phasename, )))
959 picks[nsl, phasename, eventname] = marker
961 self._picks = picks
963 return self._picks
965 def get_pick(self, eventname, obj, phasename):
966 nsl = self.get_nsl(obj)
967 return self.get_picks().get((nsl, phasename, eventname), None)
970class DatasetConfig(HasPaths):
971 ''' Configuration for a Grond `Dataset` object. '''
973 stations_path = Path.T(
974 optional=True,
975 help='List of files with station coordinates in Pyrocko format.')
976 stations_stationxml_paths = List.T(
977 Path.T(),
978 optional=True,
979 help='List of files with station coordinates in StationXML format.')
980 events_path = Path.T(
981 optional=True,
982 help='File with hypocenter information and possibly'
983 ' reference solution')
984 waveform_paths = List.T(
985 Path.T(),
986 optional=True,
987 help='List of directories with raw waveform data')
988 clippings_path = Path.T(
989 optional=True)
990 responses_sacpz_path = Path.T(
991 optional=True,
992 help='List of SACPZ response files for restitution of'
993 ' the raw waveform data.')
994 responses_stationxml_paths = List.T(
995 Path.T(),
996 optional=True,
997 help='List of StationXML response files for restitution of'
998 ' the raw waveform data.')
999 station_corrections_path = Path.T(
1000 optional=True,
1001 help='File containing station correction informations.')
1002 apply_correction_factors = Bool.T(
1003 optional=True,
1004 default=True,
1005 help='Apply correction factors from station corrections.')
1006 apply_correction_delays = Bool.T(
1007 optional=True,
1008 default=True,
1009 help='Apply correction delays from station corrections.')
1010 apply_displaced_sampling_workaround = Bool.T(
1011 optional=True,
1012 default=False,
1013 help='Work around displaced sampling issues.')
1014 extend_incomplete = Bool.T(
1015 default=False,
1016 help='Extend incomplete seismic traces.')
1017 picks_paths = List.T(
1018 Path.T())
1019 blacklist_paths = List.T(
1020 Path.T(),
1021 help='List of text files with blacklisted stations.')
1022 blacklist = List.T(
1023 String.T(),
1024 help='Stations/components to be excluded according to their STA, '
1025 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
1026 whitelist_paths = List.T(
1027 Path.T(),
1028 help='List of text files with whitelisted stations.')
1029 whitelist = List.T(
1030 String.T(),
1031 optional=True,
1032 help='If not None, list of stations/components to include according '
1033 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
1034 'Note: ''when whitelisting on channel level, both, the raw and '
1035 'the processed channel codes have to be listed.')
1036 synthetic_test = SyntheticTest.T(
1037 optional=True)
1039 kite_scene_paths = List.T(
1040 Path.T(),
1041 optional=True,
1042 help='List of directories for the InSAR scenes.')
1044 gnss_campaign_paths = List.T(
1045 Path.T(),
1046 optional=True,
1047 help='List of directories for the GNSS campaign data.')
1049 def __init__(self, *args, **kwargs):
1050 HasPaths.__init__(self, *args, **kwargs)
1051 self._ds = {}
1053 def get_event_names(self):
1054 logger.info('Loading events ...')
1056 def extra(path):
1057 return expand_template(path, dict(
1058 event_name='*'))
1060 def fp(path):
1061 return self.expand_path(path, extra=extra)
1063 def check_events(events, fn):
1064 for ev in events:
1065 if not ev.name:
1066 logger.warning('Event in %s has no name!', fn)
1067 return
1068 if not ev.lat or not ev.lon:
1069 logger.warning('Event %s has inconsistent coordinates!',
1070 ev.name)
1071 if not ev.depth:
1072 logger.warning('Event %s has no depth!', ev.name)
1073 if not ev.time:
1074 logger.warning('Event %s has no time!', ev.name)
1076 events = []
1077 events_path = fp(self.events_path)
1078 fns = glob.glob(events_path)
1079 if not fns:
1080 raise DatasetError('No event files matching "%s".' % events_path)
1082 for fn in fns:
1083 logger.debug('Loading from file %s' % fn)
1084 ev = model.load_events(filename=fn)
1085 check_events(ev, fn)
1087 events.extend(ev)
1089 event_names = [ev_.name for ev_ in events]
1090 event_names.sort()
1091 return event_names
1093 def get_dataset(self, event_name):
1094 if event_name not in self._ds:
1095 def extra(path):
1096 return expand_template(path, dict(
1097 event_name=event_name))
1099 def fp(path):
1100 p = self.expand_path(path, extra=extra)
1101 if p is None:
1102 return None
1104 if isinstance(p, list):
1105 for path in p:
1106 if not op.exists(path):
1107 logger.warning('Path %s does not exist.' % path)
1108 else:
1109 if not op.exists(p):
1110 logger.warning('Path %s does not exist.' % p)
1112 return p
1114 ds = Dataset(event_name)
1115 try:
1116 ds.add_events(filename=fp(self.events_path))
1118 ds.add_stations(
1119 pyrocko_stations_filename=fp(self.stations_path),
1120 stationxml_filenames=fp(self.stations_stationxml_paths))
1122 if self.waveform_paths:
1123 ds.add_waveforms(paths=fp(self.waveform_paths))
1125 if self.kite_scene_paths:
1126 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
1128 if self.gnss_campaign_paths:
1129 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
1131 if self.clippings_path:
1132 ds.add_clippings(markers_filename=fp(self.clippings_path))
1134 if self.responses_sacpz_path:
1135 ds.add_responses(
1136 sacpz_dirname=fp(self.responses_sacpz_path))
1138 if self.responses_stationxml_paths:
1139 ds.add_responses(
1140 stationxml_filenames=fp(
1141 self.responses_stationxml_paths))
1143 if self.station_corrections_path:
1144 ds.add_station_corrections(
1145 filename=fp(self.station_corrections_path))
1147 ds.apply_correction_factors = self.apply_correction_factors
1148 ds.apply_correction_delays = self.apply_correction_delays
1149 ds.apply_displaced_sampling_workaround = \
1150 self.apply_displaced_sampling_workaround
1151 ds.extend_incomplete = self.extend_incomplete
1153 for picks_path in self.picks_paths:
1154 ds.add_picks(
1155 filename=fp(picks_path))
1157 ds.add_blacklist(self.blacklist)
1158 ds.add_blacklist(filenames=fp(self.blacklist_paths))
1159 if self.whitelist:
1160 ds.add_whitelist(self.whitelist)
1161 if self.whitelist_paths:
1162 ds.add_whitelist(filenames=fp(self.whitelist_paths))
1164 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
1165 self._ds[event_name] = ds
1166 except (FileLoadError, OSError) as e:
1167 raise DatasetError(str(e))
1169 return self._ds[event_name]
1172__all__ = '''
1173 Dataset
1174 DatasetConfig
1175 DatasetError
1176 InvalidObject
1177 NotFound
1178 StationCorrection
1179 load_station_corrections
1180 dump_station_corrections
1181'''.split()