# http://pyrocko.org - GPLv3
#
# The Pyrocko Developers, 21st Century
# ---|P------/S----------~Lg----------
'''
Low-level input and output of seismic waveforms, metadata and earthquake
catalogs.
Input and output (IO) for various different file formats is implemented in the
submodules of :py:mod:`pyrocko.io`. :py:mod:`pyrocko.io` itself provides a
simple unified interface to load and save seismic waveforms to a few different
file formats. For a higher-level approach to accessing seismic data see
:doc:`/topics/squirrel`.
.. rubric:: Seismic waveform IO
The data model used for the :py:class:`~pyrocko.trace.Trace` objects in Pyrocko
is most closely matched by the Mini-SEED file format. However, a difference is,
that Mini-SEED limits the length of the network, station, location, and channel
codes to 2, 5, 2, and 3 characters, respectively.
============ =========================== ========= ======== ======
format format identifier load save note
============ =========================== ========= ======== ======
Mini-SEED mseed yes yes
SAC sac yes yes [#f1]_
SEG Y rev1 segy some
SEISAN seisan, seisan.l, seisan.b yes [#f2]_
KAN kan yes [#f3]_
YAFF yaff yes yes [#f4]_
ASCII Table text yes [#f5]_
GSE1 gse1 some
GSE2 gse2 some
DATACUBE datacube yes
SUDS suds some
CSS css yes
TDMS iDAS tdms_idas yes
HDF5 iDAS hdf5_idas yes
============ =========================== ========= ======== ======
.. rubric:: Notes
.. [#f1] For SAC files, the endianness is guessed. Additional header
information is stored in the `Trace`'s ``meta`` attribute.
.. [#f2] Seisan waveform files can be in little (``seisan.l``) or big endian
(``seisan.b``) format. ``seisan`` currently is an alias for ``seisan.l``.
.. [#f3] The KAN file format has only been seen once by the author, and support
for it may be removed again.
.. [#f4] YAFF is an in-house, experimental file format, which should not be
released into the wild.
.. [#f5] ASCII tables with two columns (time and amplitude) are output - meta
information will be lost.
'''
import os
import logging
from pyrocko import util, trace
from . import (mseed, sac, kan, segy, yaff, seisan_waveform, gse1, gcf,
datacube, suds, css, gse2, tdms_idas, hdf5_idas)
from .io_common import FileLoadError, FileSaveError
import numpy as num
logger = logging.getLogger('pyrocko.io')
def allowed_formats(operation, use=None, default=None):
if operation == 'load':
lst = ['detect', 'from_extension', 'mseed', 'sac', 'segy', 'seisan',
'seisan.l', 'seisan.b', 'kan', 'yaff', 'gse1', 'gse2', 'gcf',
'datacube', 'suds', 'css', 'tdms_idas', 'hdf5_idas']
elif operation == 'save':
lst = ['mseed', 'sac', 'text', 'yaff', 'gse2']
if use == 'doc':
return ', '.join("``'%s'``" % fmt for fmt in lst)
elif use == 'cli_help':
return ', '.join(fmt + ['', ' [default]'][fmt == default]
for fmt in lst)
else:
return lst
[docs]def load(filename, format='mseed', getdata=True, substitutions=None):
'''
Load traces from file.
:param format: format of the file (%s)
:param getdata: if ``True`` (the default), read data, otherwise only read
traces metadata
:param substitutions: dict with substitutions to be applied to the traces
metadata
:returns: list of loaded traces
When *format* is set to ``'detect'``, the file type is guessed from the
first 512 bytes of the file. Only Mini-SEED, SAC, GSE1, and YAFF format are
detected. When *format* is set to ``'from_extension'``, the filename
extension is used to decide what format should be assumed. The filename
extensions considered are (matching is case insensitive): ``'.sac'``,
``'.kan'``, ``'.sgy'``, ``'.segy'``, ``'.yaff'``, everything else is
assumed to be in Mini-SEED format.
This function calls :py:func:`iload` and aggregates the loaded traces in a
list.
'''
return list(iload(
filename, format=format, getdata=getdata, substitutions=substitutions))
load.__doc__ %= allowed_formats('load', 'doc')
def detect_format(filename):
try:
f = open(filename, 'rb')
data = f.read(512)
except OSError as e:
raise FileLoadError(e)
finally:
f.close()
formats = [
(yaff, 'yaff'),
(mseed, 'mseed'),
(sac, 'sac'),
(gse1, 'gse1'),
(gse2, 'gse2'),
(datacube, 'datacube'),
(suds, 'suds'),
(tdms_idas, 'tdms_idas'),
(hdf5_idas, 'hdf5_idas')]
for mod, fmt in formats:
if mod.detect(data):
return fmt
raise FileLoadError(UnknownFormat(filename))
[docs]def iload(filename, format='mseed', getdata=True, substitutions=None):
'''
Load traces from file (iterator version).
This function works like :py:func:`load`, but returns an iterator which
yields the loaded traces.
'''
load_data = getdata
toks = format.split('.', 1)
if len(toks) == 2:
format, subformat = toks
else:
subformat = None
try:
mtime = os.stat(filename)[8]
except OSError as e:
raise FileLoadError(e)
def subs(tr):
make_substitutions(tr, substitutions)
tr.set_mtime(mtime)
return tr
extension_to_format = {
'.yaff': 'yaff',
'.sac': 'sac',
'.kan': 'kan',
'.segy': 'segy',
'.sgy': 'segy',
'.gse': 'gse2',
'.wfdisc': 'css',
'.tdms': 'tdms_idas',
'.h5': 'hdf5_idas'
}
if format == 'from_extension':
format = 'mseed'
extension = os.path.splitext(filename)[1]
format = extension_to_format.get(extension.lower(), 'mseed')
if format == 'detect':
format = detect_format(filename)
format_to_module = {
'kan': kan,
'segy': segy,
'yaff': yaff,
'sac': sac,
'mseed': mseed,
'seisan': seisan_waveform,
'gse1': gse1,
'gse2': gse2,
'gcf': gcf,
'datacube': datacube,
'suds': suds,
'css': css,
'tdms_idas': tdms_idas,
'hdf5_idas': hdf5_idas
}
add_args = {
'seisan': {'subformat': subformat},
}
if format not in format_to_module:
raise UnsupportedFormat(format)
mod = format_to_module[format]
for tr in mod.iload(
filename, load_data=load_data, **add_args.get(format, {})):
yield subs(tr)
[docs]def save(traces, filename_template, format='mseed', additional={},
stations=None, overwrite=True, **kwargs):
'''
Save traces to file(s).
:param traces: a trace or an iterable of traces to store
:param filename_template: filename template with placeholders for trace
metadata. Uses normal python '%%(placeholder)s' string templates.
The following placeholders are considered: ``network``,
``station``, ``location``, ``channel``, ``tmin``
(time of first sample), ``tmax`` (time of last sample),
``tmin_ms``, ``tmax_ms``, ``tmin_us``, ``tmax_us``. The versions
with '_ms' include milliseconds, the versions with '_us' include
microseconds.
:param format: %s
:param additional: dict with custom template placeholder fillins.
:param overwrite': if ``False``, raise an exception if file exists
:returns: list of generated filenames
.. note::
Network, station, location, and channel codes may be silently truncated
to file format specific maximum lengthes.
'''
if isinstance(traces, trace.Trace):
traces = [traces]
if format == 'from_extension':
format = os.path.splitext(filename_template)[1][1:]
if format == 'mseed':
return mseed.save(traces, filename_template, additional,
overwrite=overwrite, **kwargs)
elif format == 'gse2':
return gse2.save(traces, filename_template, additional,
overwrite=overwrite, **kwargs)
elif format == 'sac':
fns = []
for tr in traces:
fn = tr.fill_template(filename_template, **additional)
if not overwrite and os.path.exists(fn):
raise FileSaveError('file exists: %s' % fn)
if fn in fns:
raise FileSaveError('file just created would be overwritten: '
'%s (multiple traces map to same filename)'
% fn)
util.ensuredirs(fn)
f = sac.SacFile(from_trace=tr)
if stations:
s = stations[tr.network, tr.station, tr.location]
f.stla = s.lat
f.stlo = s.lon
f.stel = s.elevation
f.stdp = s.depth
f.cmpinc = s.get_channel(tr.channel).dip + 90.
f.cmpaz = s.get_channel(tr.channel).azimuth
f.write(fn)
fns.append(fn)
return fns
elif format == 'text':
fns = []
for tr in traces:
fn = tr.fill_template(filename_template, **additional)
if not overwrite and os.path.exists(fn):
raise FileSaveError('file exists: %s' % fn)
if fn in fns:
raise FileSaveError('file just created would be overwritten: '
'%s (multiple traces map to same filename)'
% fn)
util.ensuredirs(fn)
x, y = tr.get_xdata(), tr.get_ydata()
num.savetxt(fn, num.transpose((x, y)))
fns.append(fn)
return fns
elif format == 'yaff':
return yaff.save(traces, filename_template, additional,
overwrite=overwrite, **kwargs)
else:
raise UnsupportedFormat(format)
save.__doc__ %= allowed_formats('save', 'doc')
class UnknownFormat(Exception):
def __init__(self, filename):
Exception.__init__(self, 'Unknown file format: %s' % filename)
class UnsupportedFormat(Exception):
def __init__(self, format):
Exception.__init__(self, 'Unsupported file format: %s' % format)
def make_substitutions(tr, substitutions):
if substitutions:
tr.set_codes(**substitutions)