Source code for pyrocko.io

# http://pyrocko.org - GPLv3
#
# The Pyrocko Developers, 21st Century
# ---|P------/S----------~Lg----------

'''
Low-level input and output of seismic waveforms, metadata and earthquake
catalogs.

Input and output (IO) for various different file formats is implemented in the
submodules of :py:mod:`pyrocko.io`. :py:mod:`pyrocko.io` itself provides a
simple unified interface to load and save seismic waveforms to a few different
file formats. For a higher-level approach to accessing seismic data see
:doc:`/topics/squirrel`.

.. rubric:: Seismic waveform IO

The data model used for the :py:class:`~pyrocko.trace.Trace` objects in Pyrocko
is most closely matched by the Mini-SEED file format. However, a difference is,
that Mini-SEED limits the length of the network, station, location, and channel
codes to 2, 5, 2, and 3 characters, respectively.

============ =========================== ========= ======== ======
format       format identifier           load      save     note
============ =========================== ========= ======== ======
Mini-SEED    mseed                       yes       yes
SAC          sac                         yes       yes      [#f1]_
SEG Y rev1   segy                        some
SEISAN       seisan, seisan.l, seisan.b  yes                [#f2]_
KAN          kan                         yes                [#f3]_
YAFF         yaff                        yes       yes      [#f4]_
ASCII Table  text                                  yes      [#f5]_
GSE1         gse1                        some
GSE2         gse2                        some
DATACUBE     datacube                    yes
SUDS         suds                        some
CSS          css                         yes
TDMS iDAS    tdms_idas                   yes
HDF5 iDAS    hdf5_idas                   yes
============ =========================== ========= ======== ======

.. rubric:: Notes

.. [#f1] For SAC files, the endianness is guessed. Additional header
    information is stored in the `Trace`'s ``meta`` attribute.
.. [#f2] Seisan waveform files can be in little (``seisan.l``) or big endian
    (``seisan.b``) format. ``seisan`` currently is an alias for ``seisan.l``.
.. [#f3] The KAN file format has only been seen once by the author, and support
    for it may be removed again.
.. [#f4] YAFF is an in-house, experimental file format, which should not be
    released into the wild.
.. [#f5] ASCII tables with two columns (time and amplitude) are output - meta
    information will be lost.

'''

import os
import logging
from pyrocko import util, trace

from . import (mseed, sac, kan, segy, yaff, seisan_waveform, gse1, gcf,
               datacube, suds, css, gse2, tdms_idas, hdf5_idas)
from .io_common import FileLoadError, FileSaveError

import numpy as num


logger = logging.getLogger('pyrocko.io')


def allowed_formats(operation, use=None, default=None):
    if operation == 'load':
        lst = ['detect', 'from_extension', 'mseed', 'sac', 'segy', 'seisan',
               'seisan.l', 'seisan.b', 'kan', 'yaff', 'gse1', 'gse2', 'gcf',
               'datacube', 'suds', 'css', 'tdms_idas', 'hdf5_idas']

    elif operation == 'save':
        lst = ['mseed', 'sac', 'text', 'yaff', 'gse2']

    if use == 'doc':
        return ', '.join("``'%s'``" % fmt for fmt in lst)

    elif use == 'cli_help':
        return ', '.join(fmt + ['', ' [default]'][fmt == default]
                         for fmt in lst)

    else:
        return lst


g_formats_supporting_append = ['mseed']


[docs]def load(filename, format='mseed', getdata=True, substitutions=None): ''' Load traces from file. :param format: format of the file (%s) :param getdata: if ``True`` (the default), read data, otherwise only read traces metadata :param substitutions: dict with substitutions to be applied to the traces metadata :returns: list of loaded traces When *format* is set to ``'detect'``, the file type is guessed from the first 512 bytes of the file. Only Mini-SEED, SAC, GSE1, and YAFF format are detected. When *format* is set to ``'from_extension'``, the filename extension is used to decide what format should be assumed. The filename extensions considered are (matching is case insensitive): ``'.sac'``, ``'.kan'``, ``'.sgy'``, ``'.segy'``, ``'.yaff'``, everything else is assumed to be in Mini-SEED format. This function calls :py:func:`iload` and aggregates the loaded traces in a list. ''' return list(iload( filename, format=format, getdata=getdata, substitutions=substitutions))
load.__doc__ %= allowed_formats('load', 'doc') def detect_format(filename): try: f = open(filename, 'rb') data = f.read(512) except OSError as e: raise FileLoadError(e) finally: f.close() formats = [ (yaff, 'yaff'), (mseed, 'mseed'), (sac, 'sac'), (gse1, 'gse1'), (gse2, 'gse2'), (datacube, 'datacube'), (suds, 'suds'), (tdms_idas, 'tdms_idas'), (hdf5_idas, 'hdf5_idas')] for mod, fmt in formats: if mod.detect(data): return fmt raise FileLoadError(UnknownFormat(filename))
[docs]def iload(filename, format='mseed', getdata=True, substitutions=None): ''' Load traces from file (iterator version). This function works like :py:func:`load`, but returns an iterator which yields the loaded traces. ''' load_data = getdata toks = format.split('.', 1) if len(toks) == 2: format, subformat = toks else: subformat = None try: mtime = os.stat(filename)[8] except OSError as e: raise FileLoadError(e) def subs(tr): make_substitutions(tr, substitutions) tr.set_mtime(mtime) return tr extension_to_format = { '.yaff': 'yaff', '.sac': 'sac', '.kan': 'kan', '.segy': 'segy', '.sgy': 'segy', '.gse': 'gse2', '.wfdisc': 'css', '.tdms': 'tdms_idas', '.h5': 'hdf5_idas' } if format == 'from_extension': format = 'mseed' extension = os.path.splitext(filename)[1] format = extension_to_format.get(extension.lower(), 'mseed') if format == 'detect': format = detect_format(filename) format_to_module = { 'kan': kan, 'segy': segy, 'yaff': yaff, 'sac': sac, 'mseed': mseed, 'seisan': seisan_waveform, 'gse1': gse1, 'gse2': gse2, 'gcf': gcf, 'datacube': datacube, 'suds': suds, 'css': css, 'tdms_idas': tdms_idas, 'hdf5_idas': hdf5_idas } add_args = { 'seisan': {'subformat': subformat}, } if format not in format_to_module: raise UnsupportedFormat(format) mod = format_to_module[format] for tr in mod.iload( filename, load_data=load_data, **add_args.get(format, {})): yield subs(tr)
[docs]def save(traces, filename_template, format='mseed', additional={}, stations=None, overwrite=True, append=False, check_append=False, check_append_hook=None, **kwargs): ''' Save traces to file(s). :param traces: a trace or an iterable of traces to store :param filename_template: filename template with placeholders for trace metadata. Uses normal python '%%(placeholder)s' string templates. The following placeholders are considered: ``network``, ``station``, ``location``, ``channel``, ``tmin`` (time of first sample), ``tmax`` (time of last sample), ``tmin_ms``, ``tmax_ms``, ``tmin_us``, ``tmax_us``. The versions with '_ms' include milliseconds, the versions with '_us' include microseconds. :param format: %s :param additional: dict with custom template placeholder fillins. :param overwrite': if ``False``, raise an exception if file exists :param append': append traces to the file if the file exists :param check_append': ensure that appended traces do not overlap with traces already present in the file :param check_append_hook: callback queried for permission to append to an existing file (for example to prevent overwriting files which existed prior to the application start but to allow appending to files created in the current run). The callback takes a single argument, the current filename. If it returns ``False`` the save will either fail (if overwrite is ``False``) or truncate the file (if overwrite is True). If the hook returns ``True`` or if no hook is installed, appending is allowed. :returns: list of generated filenames .. note:: Network, station, location, and channel codes may be silently truncated to file format specific maximum lengthes. ''' if isinstance(traces, trace.Trace): traces = [traces] if format == 'from_extension': format = os.path.splitext(filename_template)[1][1:] if append and format not in g_formats_supporting_append: raise FileSaveError( '`pyrocko.io.save` has been called with `append=True` but the ' 'file format `%s` does not support appending.' % format) if format == 'mseed': return mseed.save( traces, filename_template, additional, overwrite=overwrite, append=append, check_append=check_append, check_append_hook=check_append_hook, **kwargs) elif format == 'gse2': return gse2.save(traces, filename_template, additional, overwrite=overwrite, **kwargs) elif format == 'sac': fns = [] for tr in traces: fn = tr.fill_template(filename_template, **additional) if not overwrite and os.path.exists(fn): raise FileSaveError('file exists: %s' % fn) if fn in fns: raise FileSaveError('file just created would be overwritten: ' '%s (multiple traces map to same filename)' % fn) util.ensuredirs(fn) f = sac.SacFile(from_trace=tr) if stations: s = stations[tr.network, tr.station, tr.location] f.stla = s.lat f.stlo = s.lon f.stel = s.elevation f.stdp = s.depth f.cmpinc = s.get_channel(tr.channel).dip + 90. f.cmpaz = s.get_channel(tr.channel).azimuth f.write(fn) fns.append(fn) return fns elif format == 'text': fns = [] for tr in traces: fn = tr.fill_template(filename_template, **additional) if not overwrite and os.path.exists(fn): raise FileSaveError('file exists: %s' % fn) if fn in fns: raise FileSaveError('file just created would be overwritten: ' '%s (multiple traces map to same filename)' % fn) util.ensuredirs(fn) x, y = tr.get_xdata(), tr.get_ydata() num.savetxt(fn, num.transpose((x, y))) fns.append(fn) return fns elif format == 'yaff': return yaff.save(traces, filename_template, additional, overwrite=overwrite, **kwargs) else: raise UnsupportedFormat(format)
save.__doc__ %= allowed_formats('save', 'doc') class UnknownFormat(Exception): def __init__(self, filename): Exception.__init__(self, 'Unknown file format: %s' % filename) class UnsupportedFormat(Exception): def __init__(self, format): Exception.__init__(self, 'Unsupported file format: %s' % format) def make_substitutions(tr, substitutions): if substitutions: tr.set_codes(**substitutions)