Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/io/__init__.py: 91%
108 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 06:59 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 06:59 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Low-level input and output of seismic waveforms, metadata and earthquake
8catalogs.
10Input and output (IO) for various different file formats is implemented in the
11submodules of :py:mod:`pyrocko.io`. :py:mod:`pyrocko.io` itself provides a
12simple unified interface to load and save seismic waveforms to a few different
13file formats. For a higher-level approach to accessing seismic data see
14:doc:`/topics/squirrel`.
16.. rubric:: Seismic waveform IO
18The data model used for the :py:class:`~pyrocko.trace.Trace` objects in Pyrocko
19is most closely matched by the Mini-SEED file format. However, a difference is,
20that Mini-SEED limits the length of the network, station, location, and channel
21codes to 2, 5, 2, and 3 characters, respectively.
23============ =========================== ========= ======== ======
24format format identifier load save note
25============ =========================== ========= ======== ======
26Mini-SEED mseed yes yes
27SAC sac yes yes [#f1]_
28SEG Y rev1 segy some
29SEISAN seisan, seisan.l, seisan.b yes [#f2]_
30KAN kan yes [#f3]_
31YAFF yaff yes yes [#f4]_
32ASCII Table text yes [#f5]_
33GSE1 gse1 some
34GSE2 gse2 some
35DATACUBE datacube yes
36SUDS suds some
37CSS css yes
38TDMS iDAS tdms_idas yes
39HDF5 iDAS hdf5_idas yes
40============ =========================== ========= ======== ======
42.. rubric:: Notes
44.. [#f1] For SAC files, the endianness is guessed. Additional header
45 information is stored in the `Trace`'s ``meta`` attribute.
46.. [#f2] Seisan waveform files can be in little (``seisan.l``) or big endian
47 (``seisan.b``) format. ``seisan`` currently is an alias for ``seisan.l``.
48.. [#f3] The KAN file format has only been seen once by the author, and support
49 for it may be removed again.
50.. [#f4] YAFF is an in-house, experimental file format, which should not be
51 released into the wild.
52.. [#f5] ASCII tables with two columns (time and amplitude) are output - meta
53 information will be lost.
55'''
57import os
58import logging
59from pyrocko import util, trace
61from . import (mseed, sac, kan, segy, yaff, seisan_waveform, gse1, gcf,
62 datacube, suds, css, gse2, tdms_idas, hdf5_idas)
63from .io_common import FileLoadError, FileSaveError
65import numpy as num
68logger = logging.getLogger('pyrocko.io')
71def allowed_formats(operation, use=None, default=None):
72 if operation == 'load':
73 lst = ['detect', 'from_extension', 'mseed', 'sac', 'segy', 'seisan',
74 'seisan.l', 'seisan.b', 'kan', 'yaff', 'gse1', 'gse2', 'gcf',
75 'datacube', 'suds', 'css', 'tdms_idas', 'hdf5_idas']
77 elif operation == 'save':
78 lst = ['mseed', 'sac', 'text', 'yaff', 'gse2']
80 if use == 'doc':
81 return ', '.join("``'%s'``" % fmt for fmt in lst)
83 elif use == 'cli_help':
84 return ', '.join(fmt + ['', ' [default]'][fmt == default]
85 for fmt in lst)
87 else:
88 return lst
91def load(filename, format='mseed', getdata=True, substitutions=None):
92 '''
93 Load traces from file.
95 :param format: format of the file (%s)
96 :param getdata: if ``True`` (the default), read data, otherwise only read
97 traces metadata
98 :param substitutions: dict with substitutions to be applied to the traces
99 metadata
101 :returns: list of loaded traces
103 When *format* is set to ``'detect'``, the file type is guessed from the
104 first 512 bytes of the file. Only Mini-SEED, SAC, GSE1, and YAFF format are
105 detected. When *format* is set to ``'from_extension'``, the filename
106 extension is used to decide what format should be assumed. The filename
107 extensions considered are (matching is case insensitive): ``'.sac'``,
108 ``'.kan'``, ``'.sgy'``, ``'.segy'``, ``'.yaff'``, everything else is
109 assumed to be in Mini-SEED format.
111 This function calls :py:func:`iload` and aggregates the loaded traces in a
112 list.
113 '''
115 return list(iload(
116 filename, format=format, getdata=getdata, substitutions=substitutions))
119load.__doc__ %= allowed_formats('load', 'doc')
122def detect_format(filename):
123 try:
124 f = open(filename, 'rb')
125 data = f.read(512)
126 except OSError as e:
127 raise FileLoadError(e)
128 finally:
129 f.close()
131 formats = [
132 (yaff, 'yaff'),
133 (mseed, 'mseed'),
134 (sac, 'sac'),
135 (gse1, 'gse1'),
136 (gse2, 'gse2'),
137 (datacube, 'datacube'),
138 (suds, 'suds'),
139 (tdms_idas, 'tdms_idas'),
140 (hdf5_idas, 'hdf5_idas')]
142 for mod, fmt in formats:
143 if mod.detect(data):
144 return fmt
146 raise FileLoadError(UnknownFormat(filename))
149def iload(filename, format='mseed', getdata=True, substitutions=None):
150 '''
151 Load traces from file (iterator version).
153 This function works like :py:func:`load`, but returns an iterator which
154 yields the loaded traces.
155 '''
156 load_data = getdata
158 toks = format.split('.', 1)
159 if len(toks) == 2:
160 format, subformat = toks
161 else:
162 subformat = None
164 try:
165 mtime = os.stat(filename)[8]
166 except OSError as e:
167 raise FileLoadError(e)
169 def subs(tr):
170 make_substitutions(tr, substitutions)
171 tr.set_mtime(mtime)
172 return tr
174 extension_to_format = {
175 '.yaff': 'yaff',
176 '.sac': 'sac',
177 '.kan': 'kan',
178 '.segy': 'segy',
179 '.sgy': 'segy',
180 '.gse': 'gse2',
181 '.wfdisc': 'css',
182 '.tdms': 'tdms_idas',
183 '.h5': 'hdf5_idas'
184 }
186 if format == 'from_extension':
187 format = 'mseed'
188 extension = os.path.splitext(filename)[1]
189 format = extension_to_format.get(extension.lower(), 'mseed')
191 if format == 'detect':
192 format = detect_format(filename)
194 format_to_module = {
195 'kan': kan,
196 'segy': segy,
197 'yaff': yaff,
198 'sac': sac,
199 'mseed': mseed,
200 'seisan': seisan_waveform,
201 'gse1': gse1,
202 'gse2': gse2,
203 'gcf': gcf,
204 'datacube': datacube,
205 'suds': suds,
206 'css': css,
207 'tdms_idas': tdms_idas,
208 'hdf5_idas': hdf5_idas
209 }
211 add_args = {
212 'seisan': {'subformat': subformat},
213 }
215 if format not in format_to_module:
216 raise UnsupportedFormat(format)
218 mod = format_to_module[format]
220 for tr in mod.iload(
221 filename, load_data=load_data, **add_args.get(format, {})):
223 yield subs(tr)
226def save(traces, filename_template, format='mseed', additional={},
227 stations=None, overwrite=True, **kwargs):
228 '''
229 Save traces to file(s).
231 :param traces: a trace or an iterable of traces to store
232 :param filename_template: filename template with placeholders for trace
233 metadata. Uses normal python '%%(placeholder)s' string templates.
234 The following placeholders are considered: ``network``,
235 ``station``, ``location``, ``channel``, ``tmin``
236 (time of first sample), ``tmax`` (time of last sample),
237 ``tmin_ms``, ``tmax_ms``, ``tmin_us``, ``tmax_us``. The versions
238 with '_ms' include milliseconds, the versions with '_us' include
239 microseconds.
240 :param format: %s
241 :param additional: dict with custom template placeholder fillins.
242 :param overwrite': if ``False``, raise an exception if file exists
243 :returns: list of generated filenames
245 .. note::
246 Network, station, location, and channel codes may be silently truncated
247 to file format specific maximum lengthes.
248 '''
250 if isinstance(traces, trace.Trace):
251 traces = [traces]
253 if format == 'from_extension':
254 format = os.path.splitext(filename_template)[1][1:]
256 if format == 'mseed':
257 return mseed.save(traces, filename_template, additional,
258 overwrite=overwrite, **kwargs)
260 elif format == 'gse2':
261 return gse2.save(traces, filename_template, additional,
262 overwrite=overwrite, **kwargs)
264 elif format == 'sac':
265 fns = []
266 for tr in traces:
267 fn = tr.fill_template(filename_template, **additional)
268 if not overwrite and os.path.exists(fn):
269 raise FileSaveError('file exists: %s' % fn)
271 if fn in fns:
272 raise FileSaveError('file just created would be overwritten: '
273 '%s (multiple traces map to same filename)'
274 % fn)
276 util.ensuredirs(fn)
278 f = sac.SacFile(from_trace=tr)
279 if stations:
280 s = stations[tr.network, tr.station, tr.location]
281 f.stla = s.lat
282 f.stlo = s.lon
283 f.stel = s.elevation
284 f.stdp = s.depth
285 f.cmpinc = s.get_channel(tr.channel).dip + 90.
286 f.cmpaz = s.get_channel(tr.channel).azimuth
288 f.write(fn)
289 fns.append(fn)
291 return fns
293 elif format == 'text':
294 fns = []
295 for tr in traces:
296 fn = tr.fill_template(filename_template, **additional)
297 if not overwrite and os.path.exists(fn):
298 raise FileSaveError('file exists: %s' % fn)
300 if fn in fns:
301 raise FileSaveError('file just created would be overwritten: '
302 '%s (multiple traces map to same filename)'
303 % fn)
305 util.ensuredirs(fn)
306 x, y = tr.get_xdata(), tr.get_ydata()
307 num.savetxt(fn, num.transpose((x, y)))
308 fns.append(fn)
309 return fns
311 elif format == 'yaff':
312 return yaff.save(traces, filename_template, additional,
313 overwrite=overwrite, **kwargs)
314 else:
315 raise UnsupportedFormat(format)
318save.__doc__ %= allowed_formats('save', 'doc')
321class UnknownFormat(Exception):
322 def __init__(self, filename):
323 Exception.__init__(self, 'Unknown file format: %s' % filename)
326class UnsupportedFormat(Exception):
327 def __init__(self, format):
328 Exception.__init__(self, 'Unsupported file format: %s' % format)
331def make_substitutions(tr, substitutions):
332 if substitutions:
333 tr.set_codes(**substitutions)