import glob
import copy
import os.path as op
import logging
import math
import numpy as num
from collections import defaultdict
from pyrocko import util, model, trace, marker as pmarker
from pyrocko.io.io_common import FileLoadError
from pyrocko.fdsn import enhanced_sacpz, station as fs
from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
load_all)
from pyrocko import squirrel
from pyrocko import gf
from .meta import Path, HasPaths, expand_template, GrondError
from .synthetic_tests import SyntheticTest
guts_prefix = 'grond'
logger = logging.getLogger('grond.dataset')
def quote_paths(paths):
return ', '.join('"%s"' % path for path in paths)
[docs]
class InvalidObject(Exception):
pass
[docs]
class NotFound(Exception):
def __init__(self, reason, codes=None, time_range=None):
self.reason = reason
self.time_range = time_range
self.codes = codes
def __str__(self):
s = self.reason
if self.codes:
s += ' (%s)' % '.'.join(self.codes)
if self.time_range:
s += ' (%s - %s)' % (
util.time_to_str(self.time_range[0]),
util.time_to_str(self.time_range[1]))
return s
[docs]
class DatasetError(GrondError):
pass
[docs]
class StationCorrection(Object):
codes = Tuple.T(4, String.T())
delay = Float.T()
factor = Float.T()
def load_station_corrections(filename):
scs = load_all(filename=filename)
for sc in scs:
assert isinstance(sc, StationCorrection)
return scs
def dump_station_corrections(station_corrections, filename):
return dump_all(station_corrections, filename=filename)
class Dataset(object):
def __init__(self, event_name=None):
self.events = []
self.squirrel = squirrel.Squirrel()
self.stations = {}
self.responses = defaultdict(list)
self.responses_stationxml = []
self.clippings = {}
self.blacklist = set()
self.whitelist_nslc = None
self.whitelist_nsl_xx = None
self.whitelist = None
self.station_corrections = {}
self.station_factors = {}
self.pick_markers = []
self.apply_correction_delays = True
self.apply_correction_factors = True
self.apply_displaced_sampling_workaround = False
self.extend_incomplete = False
self.clip_handling = 'by_nsl'
self.kite_scenes = []
self.gnss_campaigns = []
self.synthetic_test = None
self._picks = None
self._cache = {}
self._event_name = event_name
def empty_cache(self):
self._cache = {}
def set_synthetic_test(self, synthetic_test):
self.synthetic_test = synthetic_test
def add_stations(
self,
stations=None,
pyrocko_stations_filename=None,
stationxml_filenames=None):
if stations is not None:
for station in stations:
self.stations[station.nsl()] = station
if pyrocko_stations_filename is not None:
logger.debug(
'Loading stations from file "%s"...' %
pyrocko_stations_filename)
for station in model.load_stations(pyrocko_stations_filename):
self.stations[station.nsl()] = station
if stationxml_filenames is not None and len(stationxml_filenames) > 0:
for stationxml_filename in stationxml_filenames:
if not op.exists(stationxml_filename):
continue
logger.debug(
'Loading stations from StationXML file "%s"...' %
stationxml_filename)
sx = fs.load_xml(filename=stationxml_filename)
ev = self.get_event()
try:
stations = sx.get_pyrocko_stations(
time=ev.time, sloppy=True)
except TypeError:
logger.warning(
'The installed version of Pyrocko does not support '
'the keyword argument `sloppy` in method '
'`FDSNStationXML.get_pyrocko_stations`. Please '
'upgrade Pyrocko.')
stations = sx.get_pyrocko_stations(time=ev.time)
if len(stations) == 0:
logger.warning(
'No stations found for time %s in file "%s".' % (
util.time_to_str(ev.time), stationxml_filename))
for station in stations:
logger.debug('Adding station: %s.%s.%s' % station.nsl())
channels = station.get_channels()
if len(channels) == 1 and channels[0].name.endswith('Z'):
logger.warning(
'Station "%s" has vertical component'
' information only, adding mocked channels.'
% station.nsl_string())
station.add_channel(
model.Channel(channels[0].name[:-1] + 'N'))
station.add_channel(
model.Channel(channels[0].name[:-1] + 'E'))
self.stations[station.nsl()] = station
def add_events(self, events=None, filename=None):
if events is not None:
self.events.extend(events)
if filename is not None:
logger.debug('Loading events from file "%s"...' % filename)
try:
events = model.load_events(filename)
self.events.extend(events)
logger.info(
'Loading events from %s: %i events found.' %
(filename, len(events)))
except Exception as e:
logger.warning('Could not load events from %s!', filename)
raise e
def add_waveforms(self, paths, regex=None, fileformat='detect',
show_progress=False):
self.squirrel.add(paths)
def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):
if sacpz_dirname:
logger.debug(
'Loading SAC PZ responses from "%s"...' % sacpz_dirname)
for x in enhanced_sacpz.iload_dirname(sacpz_dirname):
self.responses[x.codes].append(x)
if stationxml_filenames:
for stationxml_filename in stationxml_filenames:
if not op.exists(stationxml_filename):
continue
logger.debug(
'Loading StationXML responses from "%s"...' %
stationxml_filename)
self.responses_stationxml.append(
fs.load_xml(filename=stationxml_filename))
def add_clippings(self, markers_filename):
markers = pmarker.load_markers(markers_filename)
clippings = {}
for marker in markers:
nslc = marker.one_nslc()
nsl = nslc[:3]
if nsl not in clippings:
clippings[nsl] = []
if nslc not in clippings:
clippings[nslc] = []
clippings[nsl].append(marker.tmin)
clippings[nslc].append(marker.tmin)
for k, times in clippings.items():
atimes = num.array(times, dtype=float)
if k not in self.clippings:
self.clippings[k] = atimes
else:
self.clippings[k] = num.concatenate(self.clippings, atimes)
def add_blacklist(self, blacklist=[], filenames=None):
logger.debug('Loading blacklisted stations...')
if filenames:
blacklist = list(blacklist)
for filename in filenames:
if op.exists(filename):
with open(filename, 'r') as f:
blacklist.extend(
s.strip() for s in f.read().splitlines())
else:
logger.warning('No such blacklist file: %s' % filename)
for x in blacklist:
if isinstance(x, str):
x = tuple(x.split('.'))
self.blacklist.add(x)
def add_whitelist(self, whitelist=[], filenames=None):
logger.debug('Loading whitelisted stations...')
if filenames:
whitelist = list(whitelist)
for filename in filenames:
with open(filename, 'r') as f:
whitelist.extend(s.strip() for s in f.read().splitlines())
if self.whitelist_nslc is None:
self.whitelist_nslc = set()
self.whitelist = set()
self.whitelist_nsl_xx = set()
for x in whitelist:
if isinstance(x, str):
x = tuple(x.split('.'))
if len(x) == 4:
self.whitelist_nslc.add(x)
self.whitelist_nsl_xx.add(x[:3])
else:
self.whitelist.add(x)
def add_station_corrections(self, filename):
self.station_corrections.update(
(sc.codes, sc) for sc in load_station_corrections(filename))
def add_picks(self, filename):
self.pick_markers.extend(
pmarker.load_markers(filename))
self._picks = None
def add_gnss_campaigns(self, paths):
paths = util.select_files(
paths,
include=r'\.yml|\.yaml',
show_progress=False)
for path in paths:
self.add_gnss_campaign(filename=path)
def add_gnss_campaign(self, filename):
try:
from pyrocko.model import gnss # noqa
except ImportError:
raise ImportError(
'Module pyrocko.model.gnss not found. Please upgrade Pyrocko.')
logger.debug('Loading GNSS campaign from "%s"...' % filename)
campaign = load_all(filename=filename)
self.gnss_campaigns.append(campaign[0])
def add_kite_scenes(self, paths):
logger.info('Loading kite InSAR scenes...')
paths = util.select_files(
paths,
include=r'\.npz',
show_progress=False)
for path in paths:
self.add_kite_scene(filename=path)
if not self.kite_scenes:
logger.warning('Could not find any kite scenes at %s.' %
quote_paths(self.kite_scene_paths))
def add_kite_scene(self, filename):
try:
from kite import Scene
except ImportError:
raise ImportError('Module kite could not be imported,'
' please install from https://pyrocko.org.')
logger.debug('Loading kite scene from "%s"...' % filename)
scene = Scene.load(filename)
scene._log.setLevel(logger.level)
try:
self.get_kite_scene(scene.meta.scene_id)
except NotFound:
self.kite_scenes.append(scene)
else:
raise AttributeError('Kite scene_id not unique for "%s".'
% filename)
def is_blacklisted(self, obj):
try:
nslc = self.get_nslc(obj)
if nslc in self.blacklist:
return True
except InvalidObject:
pass
nsl = self.get_nsl(obj)
return (
nsl in self.blacklist or
nsl[1:2] in self.blacklist or
nsl[:2] in self.blacklist)
def is_whitelisted(self, obj):
if self.whitelist_nslc is None:
return True
nsl = self.get_nsl(obj)
if (
nsl in self.whitelist or
nsl[1:2] in self.whitelist or
nsl[:2] in self.whitelist):
return True
try:
nslc = self.get_nslc(obj)
if nslc in self.whitelist_nslc:
return True
except InvalidObject:
return nsl in self.whitelist_nsl_xx
def has_clipping(self, nsl_or_nslc, tmin, tmax):
if nsl_or_nslc not in self.clippings:
return False
atimes = self.clippings[nsl_or_nslc]
return num.any(num.logical_and(tmin < atimes, atimes <= tmax))
def get_nsl(self, obj):
if isinstance(obj, trace.Trace):
net, sta, loc, _ = obj.nslc_id
elif isinstance(obj, model.Station):
net, sta, loc = obj.nsl()
elif isinstance(obj, gf.Target):
net, sta, loc, _ = obj.codes
elif isinstance(obj, tuple) and len(obj) in (3, 4):
net, sta, loc = obj[:3]
else:
raise InvalidObject(
'Cannot get nsl code from given object of type "%s".'
% type(obj))
return net, sta, loc
def get_nslc(self, obj):
if isinstance(obj, trace.Trace):
return obj.nslc_id
elif isinstance(obj, gf.Target):
return obj.codes
elif isinstance(obj, tuple) and len(obj) == 4:
return obj
else:
raise InvalidObject(
'Cannot get nslc code from given object "%s"' % type(obj))
def get_tmin_tmax(self, obj):
if isinstance(obj, trace.Trace):
return obj.tmin, obj.tmax
else:
raise InvalidObject(
'Cannot get tmin and tmax from given object of type "%s"' %
type(obj))
def get_station(self, obj):
if self.is_blacklisted(obj):
raise NotFound('Station is blacklisted:', self.get_nsl(obj))
if not self.is_whitelisted(obj):
raise NotFound('Station is not on whitelist:', self.get_nsl(obj))
if isinstance(obj, model.Station):
return obj
net, sta, loc = self.get_nsl(obj)
keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]
for k in keys:
if k in self.stations:
return self.stations[k]
raise NotFound('No station information:', keys)
def get_stations(self):
return [self.stations[k] for k in sorted(self.stations)
if not self.is_blacklisted(self.stations[k])
and self.is_whitelisted(self.stations[k])]
def get_kite_scenes(self):
return self.kite_scenes
def get_kite_scene(self, scene_id=None):
if scene_id is None:
if len(self.kite_scenes) == 0:
raise AttributeError('No kite displacements defined.')
return self.kite_scenes[0]
else:
for scene in self.kite_scenes:
if scene.meta.scene_id == scene_id:
return scene
raise NotFound('No kite scene with id "%s" defined.' % scene_id)
def get_gnss_campaigns(self):
return self.gnss_campaigns
def get_gnss_campaign(self, name):
for camp in self.gnss_campaigns:
if camp.name == name:
return camp
raise NotFound('GNSS campaign %s not found!' % name)
def get_response(self, obj, quantity='displacement'):
if (self.responses is None or len(self.responses) == 0) \
and (self.responses_stationxml is None
or len(self.responses_stationxml) == 0):
raise NotFound('No response information available.')
quantity_to_unit = {
'displacement': 'M',
'velocity': 'M/S',
'acceleration': 'M/S**2',
'rotation_displacement': 'RAD',
'rotation_velocity': 'RAD/S',
'rotation_acceleration': 'RAD/S**2'}
if self.is_blacklisted(obj):
raise NotFound('Response is blacklisted:', self.get_nslc(obj))
if not self.is_whitelisted(obj):
raise NotFound('Response is not on whitelist:', self.get_nslc(obj))
net, sta, loc, cha = self.get_nslc(obj)
tmin, tmax = self.get_tmin_tmax(obj)
keys_x = [
(net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]
keys = []
for k in keys_x:
if k not in keys:
keys.append(k)
candidates = []
for k in keys:
if k in self.responses:
for x in self.responses[k]:
if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):
if quantity in (
'displacement', 'rotation_displacement'):
candidates.append(x.response)
elif quantity in (
'velocity', 'rotation_velocity'):
candidates.append(trace.MultiplyResponse([
x.response,
trace.DifferentiationResponse()]))
elif quantity in (
'acceleration', 'rotation_acceleration'):
candidates.append(trace.MultiplyResponse([
x.response,
trace.DifferentiationResponse(2)]))
else:
assert False
for sx in self.responses_stationxml:
try:
candidates.append(
sx.get_pyrocko_response(
(net, sta, loc, cha),
timespan=(tmin, tmax),
fake_input_units=quantity_to_unit[quantity]))
except (fs.NoResponseInformation, fs.MultipleResponseInformation):
pass
if len(candidates) == 1:
return candidates[0]
elif len(candidates) == 0:
raise NotFound('No response found:', (net, sta, loc, cha))
else:
raise NotFound('Multiple responses found:', (net, sta, loc, cha))
def get_waveform_raw(
self, obj,
tmin,
tmax,
tpad=0.,
toffset_noise_extract=0.,
want_incomplete=False,
extend_incomplete=False):
net, sta, loc, cha = self.get_nslc(obj)
if self.is_blacklisted((net, sta, loc, cha)):
raise NotFound(
'Waveform is blacklisted:', (net, sta, loc, cha))
if not self.is_whitelisted((net, sta, loc, cha)):
raise NotFound(
'Waveform is not on whitelist:', (net, sta, loc, cha))
if self.clip_handling == 'by_nsl':
if self.has_clipping((net, sta, loc), tmin, tmax):
raise NotFound(
'Waveform clipped:', (net, sta, loc))
elif self.clip_handling == 'by_nslc':
if self.has_clipping((net, sta, loc, cha), tmin, tmax):
raise NotFound(
'Waveform clipped:', (net, sta, loc, cha))
trs = self.squirrel.get_waveforms(
tmin=tmin+toffset_noise_extract-tpad,
tmax=tmax+toffset_noise_extract+tpad,
codes=(net, sta, loc, cha),
want_incomplete=want_incomplete or extend_incomplete)
if self.apply_displaced_sampling_workaround:
for tr in trs:
tr.snap()
if toffset_noise_extract != 0.0:
for tr in trs:
tr.shift(-toffset_noise_extract)
if extend_incomplete and len(trs) == 1:
trs[0].extend(
tmin + toffset_noise_extract - tpad,
tmax + toffset_noise_extract + tpad,
fillmethod='repeat')
if not want_incomplete and len(trs) != 1:
if len(trs) == 0:
message = 'Waveform missing or incomplete.'
else:
message = 'Waveform has gaps.'
raise NotFound(
message,
codes=(net, sta, loc, cha),
time_range=(
tmin + toffset_noise_extract - tpad,
tmax + toffset_noise_extract + tpad))
return trs
def get_waveform_restituted(
self,
obj, quantity='displacement',
tmin=None, tmax=None, tpad=0.,
tfade=0., freqlimits=None, deltat=None,
toffset_noise_extract=0.,
want_incomplete=False,
extend_incomplete=False):
if deltat is not None:
tpad_downsample = deltat * 50.
else:
tpad_downsample = 0.
trs_raw = self.get_waveform_raw(
obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,
toffset_noise_extract=toffset_noise_extract,
want_incomplete=want_incomplete,
extend_incomplete=extend_incomplete)
trs_restituted = []
for tr in trs_raw:
if deltat is not None:
try:
tr.downsample_to(deltat, snap=True, allow_upsample_max=5)
except util.UnavailableDecimation:
logger.warn('using resample instead of decimation')
tr.resample(deltat)
tr.deltat = deltat
resp = self.get_response(tr, quantity=quantity)
try:
trs_restituted.append(
tr.transfer(
tfade=tfade, freqlimits=freqlimits,
transfer_function=resp, invert=True, demean=True))
except trace.InfiniteResponse:
raise NotFound(
'Instrument response deconvolution failed '
'(divide by zero)', tr.nslc_id)
return trs_restituted, trs_raw
def _get_projections(
self, station, backazimuth, source, target, tmin, tmax):
# fill in missing channel information (happens when station file
# does not contain any channel information)
if not station.get_channels():
station = copy.deepcopy(station)
nsl = station.nsl()
trs = self.squirrel.pile.all(
tmin=tmin, tmax=tmax,
trace_selector=lambda tr: tr.nslc_id[:3] == nsl,
load_data=False)
channels = list(set(tr.channel for tr in trs))
station.set_channels_by_name(*channels)
projections = []
projections.extend(station.guess_projections_to_enu(
out_channels=('E', 'N', 'Z')))
if source is not None and target is not None:
backazimuth = source.azibazi_to(target)[1]
if backazimuth is not None:
projections.extend(station.guess_projections_to_rtu(
out_channels=('R', 'T', 'Z'),
backazimuth=backazimuth))
if not projections:
raise NotFound(
'Cannot determine projection of data components:',
station.nsl())
return projections
def _get_waveform(
self,
obj, quantity='displacement',
tmin=None, tmax=None, tpad=0.,
tfade=0., freqlimits=None, deltat=None, cache=None,
backazimuth=None,
source=None,
target=None,
debug=False):
assert not debug or (debug and cache is None)
if cache is True:
cache = self._cache
_, _, _, channel = self.get_nslc(obj)
station = self.get_station(self.get_nsl(obj))
nslc = station.nsl() + (channel,)
if self.is_blacklisted(nslc):
raise NotFound(
'Waveform is blacklisted:', nslc)
if not self.is_whitelisted(nslc):
raise NotFound(
'Waveform is not on whitelist:', nslc)
assert tmin is not None
assert tmax is not None
tmin = float(tmin)
tmax = float(tmax)
nslc = tuple(nslc)
cache_k = nslc + (
tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)
if cache is not None and (nslc + cache_k) in cache:
obj = cache[nslc + cache_k]
if isinstance(obj, Exception):
raise obj
elif obj is None:
raise NotFound('Waveform not found!', nslc)
else:
return obj
syn_test = self.synthetic_test
toffset_noise_extract = 0.0
if syn_test:
if not syn_test.respect_data_availability:
if syn_test.real_noise_scale != 0.0:
raise DatasetError(
'respect_data_availability=False and '
'addition of real noise cannot be combined.')
tr = syn_test.get_waveform(
nslc, tmin, tmax,
tfade=tfade,
freqlimits=freqlimits)
if cache is not None:
cache[tr.nslc_id + cache_k] = tr
if debug:
return [], [], [], []
else:
return tr
if syn_test.real_noise_scale != 0.0:
toffset_noise_extract = syn_test.real_noise_toffset
abs_delays = []
for ocha in 'ENZRT':
sc = self.station_corrections.get(station.nsl() + (channel,), None)
if sc:
abs_delays.append(abs(sc.delay))
if abs_delays:
abs_delay_max = max(abs_delays)
else:
abs_delay_max = 0.0
projections = self._get_projections(
station, backazimuth, source, target, tmin, tmax)
try:
trs_projected = []
trs_restituted = []
trs_raw = []
exceptions = []
for matrix, in_channels, out_channels in projections:
deps = trace.project_dependencies(
matrix, in_channels, out_channels)
try:
trs_restituted_group = []
trs_raw_group = []
if channel in deps:
for cha in deps[channel]:
trs_restituted_this, trs_raw_this = \
self.get_waveform_restituted(
station.nsl() + (cha,),
quantity=quantity,
tmin=tmin, tmax=tmax,
tpad=tpad+abs_delay_max,
toffset_noise_extract=toffset_noise_extract, # noqa
tfade=tfade,
freqlimits=freqlimits,
deltat=deltat,
want_incomplete=debug,
extend_incomplete=self.extend_incomplete)
trs_restituted_group.extend(trs_restituted_this)
trs_raw_group.extend(trs_raw_this)
trs_projected.extend(
trace.project(
trs_restituted_group, matrix,
in_channels, out_channels))
trs_restituted.extend(trs_restituted_group)
trs_raw.extend(trs_raw_group)
except NotFound as e:
exceptions.append((in_channels, out_channels, e))
if not trs_projected:
err = []
for (in_channels, out_channels, e) in exceptions:
sin = ', '.join(c.name for c in in_channels)
sout = ', '.join(c.name for c in out_channels)
err.append('(%s) -> (%s): %s' % (sin, sout, e))
raise NotFound('\n'.join(err))
for tr in trs_projected:
sc = self.station_corrections.get(tr.nslc_id, None)
if sc:
if self.apply_correction_factors:
tr.ydata /= sc.factor
if self.apply_correction_delays:
tr.shift(-sc.delay)
if tmin is not None and tmax is not None:
tr.chop(tmin, tmax)
if syn_test:
trs_projected_synthetic = []
for tr in trs_projected:
if tr.channel == channel:
tr_syn = syn_test.get_waveform(
tr.nslc_id, tmin, tmax,
tfade=tfade, freqlimits=freqlimits)
if tr_syn:
if syn_test.real_noise_scale != 0.0:
tr_syn = tr_syn.copy()
tr_noise = tr.copy()
tr_noise.set_ydata(
tr_noise.get_ydata()
* syn_test.real_noise_scale)
tr_syn.add(tr_noise)
trs_projected_synthetic.append(tr_syn)
trs_projected = trs_projected_synthetic
if cache is not None:
for tr in trs_projected:
cache[tr.nslc_id + cache_k] = tr
tr_return = None
for tr in trs_projected:
if tr.channel == channel:
tr_return = tr
if debug:
return trs_projected, trs_restituted, trs_raw, tr_return
elif tr_return:
return tr_return
else:
raise NotFound(
'waveform not available', station.nsl() + (channel,))
except NotFound:
if cache is not None:
cache[nslc + cache_k] = None
raise
def get_waveform(self, obj, tinc_cache=None, **kwargs):
tmin = kwargs['tmin']
tmax = kwargs['tmax']
if tinc_cache is not None:
tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache
tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache
else:
tmin_r = tmin
tmax_r = tmax
kwargs['tmin'] = tmin_r
kwargs['tmax'] = tmax_r
if kwargs.get('debug', None):
return self._get_waveform(obj, **kwargs)
else:
tr = self._get_waveform(obj, **kwargs)
return tr.chop(tmin, tmax, inplace=False)
def get_events(self, magmin=None, event_names=None):
evs = []
for ev in self.events:
if ((magmin is None or ev.magnitude >= magmin) and
(event_names is None or ev.name in event_names)):
evs.append(ev)
return evs
def get_event_by_time(self, t, magmin=None):
evs = self.get_events(magmin=magmin)
ev_x = None
for ev in evs:
if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):
ev_x = ev
if not ev_x:
raise NotFound(
'No event information matching criteria (t=%s, magmin=%s).' %
(t, magmin))
return ev_x
def get_event(self):
if self._event_name is None:
raise NotFound('No main event selected in dataset!')
for ev in self.events:
if ev.name == self._event_name:
return ev
raise NotFound('No such event: %s' % self._event_name)
def get_picks(self):
if self._picks is None:
hash_to_name = {}
names = set()
for marker in self.pick_markers:
if isinstance(marker, pmarker.EventMarker):
name = marker.get_event().name
if name in names:
raise DatasetError(
'Duplicate event name "%s" in picks.' % name)
names.add(name)
hash_to_name[marker.get_event_hash()] = name
for ev in self.events:
hash_to_name[ev.get_hash()] = ev.name
picks = {}
for marker in self.pick_markers:
if isinstance(marker, pmarker.PhaseMarker):
ehash = marker.get_event_hash()
nsl = marker.one_nslc()[:3]
phasename = marker.get_phasename()
if ehash is None or ehash not in hash_to_name:
raise DatasetError(
'Unassociated pick: %s.%s.%s, %s' %
(nsl + (phasename, )))
eventname = hash_to_name[ehash]
if (nsl, phasename, eventname) in picks:
raise DatasetError(
'Duplicate pick: %s.%s.%s, %s' %
(nsl + (phasename, )))
picks[nsl, phasename, eventname] = marker
self._picks = picks
return self._picks
def get_pick(self, eventname, obj, phasename):
nsl = self.get_nsl(obj)
return self.get_picks().get((nsl, phasename, eventname), None)
[docs]
class DatasetConfig(HasPaths):
''' Configuration for a Grond `Dataset` object. '''
stations_path = Path.T(
optional=True,
help='List of files with station coordinates in Pyrocko format.')
stations_stationxml_paths = List.T(
Path.T(),
optional=True,
help='List of files with station coordinates in StationXML format.')
events_path = Path.T(
optional=True,
help='File with hypocenter information and possibly'
' reference solution')
waveform_paths = List.T(
Path.T(),
optional=True,
help='List of directories with raw waveform data')
clippings_path = Path.T(
optional=True)
responses_sacpz_path = Path.T(
optional=True,
help='List of SACPZ response files for restitution of'
' the raw waveform data.')
responses_stationxml_paths = List.T(
Path.T(),
optional=True,
help='List of StationXML response files for restitution of'
' the raw waveform data.')
station_corrections_path = Path.T(
optional=True,
help='File containing station correction informations.')
apply_correction_factors = Bool.T(
optional=True,
default=True,
help='Apply correction factors from station corrections.')
apply_correction_delays = Bool.T(
optional=True,
default=True,
help='Apply correction delays from station corrections.')
apply_displaced_sampling_workaround = Bool.T(
optional=True,
default=False,
help='Work around displaced sampling issues.')
extend_incomplete = Bool.T(
default=False,
help='Extend incomplete seismic traces.')
picks_paths = List.T(
Path.T())
blacklist_paths = List.T(
Path.T(),
help='List of text files with blacklisted stations.')
blacklist = List.T(
String.T(),
help='Stations/components to be excluded according to their STA, '
'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')
whitelist_paths = List.T(
Path.T(),
help='List of text files with whitelisted stations.')
whitelist = List.T(
String.T(),
optional=True,
help='If not None, list of stations/components to include according '
'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '
'Note: ''when whitelisting on channel level, both, the raw and '
'the processed channel codes have to be listed.')
synthetic_test = SyntheticTest.T(
optional=True)
kite_scene_paths = List.T(
Path.T(),
optional=True,
help='List of directories for the InSAR scenes.')
gnss_campaign_paths = List.T(
Path.T(),
optional=True,
help='List of directories for the GNSS campaign data.')
def __init__(self, *args, **kwargs):
HasPaths.__init__(self, *args, **kwargs)
self._ds = {}
def get_event_names(self):
logger.info('Loading events ...')
def extra(path):
return expand_template(path, dict(
event_name='*'))
def fp(path):
return self.expand_path(path, extra=extra)
def check_events(events, fn):
for ev in events:
if not ev.name:
logger.warning('Event in %s has no name!', fn)
return
if not ev.lat or not ev.lon:
logger.warning('Event %s has inconsistent coordinates!',
ev.name)
if not ev.depth:
logger.warning('Event %s has no depth!', ev.name)
if not ev.time:
logger.warning('Event %s has no time!', ev.name)
events = []
events_path = fp(self.events_path)
fns = glob.glob(events_path)
if not fns:
raise DatasetError('No event files matching "%s".' % events_path)
for fn in fns:
logger.debug('Loading from file %s' % fn)
ev = model.load_events(filename=fn)
check_events(ev, fn)
events.extend(ev)
event_names = [ev_.name for ev_ in events]
event_names.sort()
return event_names
def get_dataset(self, event_name):
if event_name not in self._ds:
def extra(path):
return expand_template(path, dict(
event_name=event_name))
def fp(path):
p = self.expand_path(path, extra=extra)
if p is None:
return None
if isinstance(p, list):
for path in p:
if not op.exists(path):
logger.warning('Path %s does not exist.' % path)
else:
if not op.exists(p):
logger.warning('Path %s does not exist.' % p)
return p
ds = Dataset(event_name)
try:
ds.add_events(filename=fp(self.events_path))
ds.add_stations(
pyrocko_stations_filename=fp(self.stations_path),
stationxml_filenames=fp(self.stations_stationxml_paths))
if self.waveform_paths:
ds.add_waveforms(paths=fp(self.waveform_paths))
if self.kite_scene_paths:
ds.add_kite_scenes(paths=fp(self.kite_scene_paths))
if self.gnss_campaign_paths:
ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))
if self.clippings_path:
ds.add_clippings(markers_filename=fp(self.clippings_path))
if self.responses_sacpz_path:
ds.add_responses(
sacpz_dirname=fp(self.responses_sacpz_path))
if self.responses_stationxml_paths:
ds.add_responses(
stationxml_filenames=fp(
self.responses_stationxml_paths))
if self.station_corrections_path:
ds.add_station_corrections(
filename=fp(self.station_corrections_path))
ds.apply_correction_factors = self.apply_correction_factors
ds.apply_correction_delays = self.apply_correction_delays
ds.apply_displaced_sampling_workaround = \
self.apply_displaced_sampling_workaround
ds.extend_incomplete = self.extend_incomplete
for picks_path in self.picks_paths:
ds.add_picks(
filename=fp(picks_path))
ds.add_blacklist(self.blacklist)
ds.add_blacklist(filenames=fp(self.blacklist_paths))
if self.whitelist:
ds.add_whitelist(self.whitelist)
if self.whitelist_paths:
ds.add_whitelist(filenames=fp(self.whitelist_paths))
ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))
self._ds[event_name] = ds
except (FileLoadError, OSError) as e:
raise DatasetError(str(e))
return self._ds[event_name]
__all__ = '''
Dataset
DatasetConfig
DatasetError
InvalidObject
NotFound
StationCorrection
load_station_corrections
dump_station_corrections
'''.split()