Coverage for /usr/local/lib/python3.13/dist-packages/pyrocko/io/__init__.py: 91%

109 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2025-12-04 10:41 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Low-level input and output of seismic waveforms, metadata and earthquake 

8catalogs. 

9 

10Input and output (IO) for various different file formats is implemented in the 

11submodules of :py:mod:`pyrocko.io`. :py:mod:`pyrocko.io` itself provides a 

12simple unified interface to load and save seismic waveforms to a few different 

13file formats. For a higher-level approach to accessing seismic data see 

14:doc:`/topics/squirrel`. 

15 

16.. rubric:: Seismic waveform IO 

17 

18The data model used for the :py:class:`~pyrocko.trace.Trace` objects in Pyrocko 

19is most closely matched by the Mini-SEED file format. However, a difference is, 

20that Mini-SEED limits the length of the network, station, location, and channel 

21codes to 2, 5, 2, and 3 characters, respectively. 

22 

23============ =========================== ========= ======== ====== 

24format format identifier load save note 

25============ =========================== ========= ======== ====== 

26Mini-SEED mseed yes yes 

27SAC sac yes yes [#f1]_ 

28SEG Y rev1 segy some 

29SEISAN seisan, seisan.l, seisan.b yes [#f2]_ 

30KAN kan yes [#f3]_ 

31YAFF yaff yes yes [#f4]_ 

32ASCII Table text yes [#f5]_ 

33GSE1 gse1 some 

34GSE2 gse2 some 

35DATACUBE datacube yes 

36SUDS suds some 

37CSS css yes 

38TDMS iDAS tdms_idas yes 

39HDF5 iDAS hdf5_idas yes 

40============ =========================== ========= ======== ====== 

41 

42.. rubric:: Notes 

43 

44.. [#f1] For SAC files, the endianness is guessed. Additional header 

45 information is stored in the `Trace`'s ``meta`` attribute. 

46.. [#f2] Seisan waveform files can be in little (``seisan.l``) or big endian 

47 (``seisan.b``) format. ``seisan`` currently is an alias for ``seisan.l``. 

48.. [#f3] The KAN file format has only been seen once by the author, and support 

49 for it may be removed again. 

50.. [#f4] YAFF is an in-house, experimental file format, which should not be 

51 released into the wild. 

52.. [#f5] ASCII tables with two columns (time and amplitude) are output - meta 

53 information will be lost. 

54 

55''' 

56 

57import os 

58import logging 

59from pyrocko import util, trace 

60 

61from . import (mseed, sac, kan, segy, yaff, seisan_waveform, gse1, gcf, 

62 datacube, suds, css, gse2, tdms_idas, hdf5_idas, hdf5_optodas) 

63from .io_common import FileLoadError, FileSaveError 

64 

65import numpy as num 

66 

67 

68logger = logging.getLogger('pyrocko.io') 

69 

70 

71def allowed_formats(operation, use=None, default=None): 

72 if operation == 'load': 

73 lst = ['detect', 'from_extension', 'mseed', 'sac', 'segy', 'seisan', 

74 'seisan.l', 'seisan.b', 'kan', 'yaff', 'gse1', 'gse2', 'gcf', 

75 'datacube', 'suds', 'css', 'tdms_idas', 'hdf5_idas'] 

76 

77 elif operation == 'save': 

78 lst = ['mseed', 'sac', 'text', 'yaff', 'gse2'] 

79 

80 if use == 'doc': 

81 return ', '.join("``'%s'``" % fmt for fmt in lst) 

82 

83 elif use == 'cli_help': 

84 return ', '.join(fmt + ['', ' [default]'][fmt == default] 

85 for fmt in lst) 

86 

87 else: 

88 return lst 

89 

90 

91g_formats_supporting_append = ['mseed'] 

92 

93 

94def load(filename, format='mseed', getdata=True, substitutions=None): 

95 ''' 

96 Load traces from file. 

97 

98 :param format: format of the file (%s) 

99 :param getdata: if ``True`` (the default), read data, otherwise only read 

100 traces metadata 

101 :param substitutions: dict with substitutions to be applied to the traces 

102 metadata 

103 

104 :returns: list of loaded traces 

105 

106 When *format* is set to ``'detect'``, the file type is guessed from the 

107 first 512 bytes of the file. Only Mini-SEED, SAC, GSE1, and YAFF format are 

108 detected. When *format* is set to ``'from_extension'``, the filename 

109 extension is used to decide what format should be assumed. The filename 

110 extensions considered are (matching is case insensitive): ``'.sac'``, 

111 ``'.kan'``, ``'.sgy'``, ``'.segy'``, ``'.yaff'``, everything else is 

112 assumed to be in Mini-SEED format. 

113 

114 This function calls :py:func:`iload` and aggregates the loaded traces in a 

115 list. 

116 ''' 

117 

118 return list(iload( 

119 filename, format=format, getdata=getdata, substitutions=substitutions)) 

120 

121 

122load.__doc__ %= allowed_formats('load', 'doc') 

123 

124 

125def detect_format(filename): 

126 try: 

127 f = open(filename, 'rb') 

128 data = f.read(512) 

129 except OSError as e: 

130 raise FileLoadError(e) 

131 finally: 

132 f.close() 

133 

134 formats = [ 

135 (yaff, 'yaff'), 

136 (mseed, 'mseed'), 

137 (sac, 'sac'), 

138 (gse1, 'gse1'), 

139 (gse2, 'gse2'), 

140 (datacube, 'datacube'), 

141 (suds, 'suds'), 

142 (tdms_idas, 'tdms_idas'), 

143 (hdf5_idas, 'hdf5_idas'), 

144 (hdf5_optodas, 'hdf5_optodas')] 

145 

146 for mod, fmt in formats: 

147 if mod.detect(data): 

148 return fmt 

149 

150 raise FileLoadError(UnknownFormat(filename)) 

151 

152 

153def iload(filename, format='mseed', getdata=True, substitutions=None): 

154 ''' 

155 Load traces from file (iterator version). 

156 

157 This function works like :py:func:`load`, but returns an iterator which 

158 yields the loaded traces. 

159 ''' 

160 load_data = getdata 

161 

162 toks = format.split('.', 1) 

163 if len(toks) == 2: 

164 format, subformat = toks 

165 else: 

166 subformat = None 

167 

168 try: 

169 mtime = os.stat(filename)[8] 

170 except OSError as e: 

171 raise FileLoadError(e) 

172 

173 def subs(tr): 

174 make_substitutions(tr, substitutions) 

175 tr.set_mtime(mtime) 

176 return tr 

177 

178 extension_to_format = { 

179 '.yaff': 'yaff', 

180 '.sac': 'sac', 

181 '.kan': 'kan', 

182 '.segy': 'segy', 

183 '.sgy': 'segy', 

184 '.gse': 'gse2', 

185 '.wfdisc': 'css', 

186 '.tdms': 'tdms_idas', 

187 '.h5': 'hdf5_idas', 

188 '.hdf5': 'hdf5_optodas' 

189 } 

190 

191 if format == 'from_extension': 

192 format = 'mseed' 

193 extension = os.path.splitext(filename)[1] 

194 format = extension_to_format.get(extension.lower(), 'mseed') 

195 

196 if format == 'detect': 

197 format = detect_format(filename) 

198 

199 format_to_module = { 

200 'kan': kan, 

201 'segy': segy, 

202 'yaff': yaff, 

203 'sac': sac, 

204 'mseed': mseed, 

205 'seisan': seisan_waveform, 

206 'gse1': gse1, 

207 'gse2': gse2, 

208 'gcf': gcf, 

209 'datacube': datacube, 

210 'suds': suds, 

211 'css': css, 

212 'tdms_idas': tdms_idas, 

213 'hdf5_idas': hdf5_idas, 

214 'hdf5_optodas': hdf5_optodas 

215 } 

216 

217 add_args = { 

218 'seisan': {'subformat': subformat}, 

219 } 

220 

221 if format not in format_to_module: 

222 raise UnsupportedFormat(format) 

223 

224 mod = format_to_module[format] 

225 

226 for tr in mod.iload( 

227 filename, load_data=load_data, **add_args.get(format, {})): 

228 

229 yield subs(tr) 

230 

231 

232def save(traces, filename_template, format='mseed', additional={}, 

233 stations=None, overwrite=True, append=False, check_append=False, 

234 check_append_merge=False, check_append_hook=None, 

235 **kwargs): 

236 ''' 

237 Save traces to file(s). 

238 

239 :param traces: a trace or an iterable of traces to store 

240 :param filename_template: filename template with placeholders for trace 

241 metadata. Uses normal python '%%(placeholder)s' string templates. 

242 The following placeholders are considered: ``network``, 

243 ``station``, ``location``, ``channel``, ``tmin`` 

244 (time of first sample), ``tmax`` (time of last sample), 

245 ``tmin_ms``, ``tmax_ms``, ``tmin_us``, ``tmax_us``. The versions 

246 with '_ms' include milliseconds, the versions with '_us' include 

247 microseconds. 

248 :param format: %s 

249 :param additional: dict with custom template placeholder fillins. 

250 :param overwrite': if ``False``, raise an exception if file exists 

251 :param append': append traces to the file if the file exists 

252 :param check_append': ensure that appended traces do not overlap with 

253 traces already present in the file 

254 :param check_append_merge': try to merge traces with already stored traces 

255 where check_append finds a conflict. ``append`` and 

256 ``check_append`` must be set to use this option. 

257 :param check_append_hook: callback queried for permission to append to an 

258 existing file (for example to prevent overwriting files which 

259 existed prior to the application start but to allow appending to 

260 files created in the current run). The callback takes a single 

261 argument, the current filename. If it returns ``False`` the save 

262 will either fail (if overwrite is ``False``) or truncate the file 

263 (if overwrite is True). If the hook returns ``True`` or if no hook 

264 is installed, appending is allowed. 

265 :returns: list of generated filenames 

266 

267 .. note:: 

268 Network, station, location, and channel codes may be silently truncated 

269 to file format specific maximum lengthes. 

270 ''' 

271 

272 if isinstance(traces, trace.Trace): 

273 traces = [traces] 

274 

275 if format == 'from_extension': 

276 format = os.path.splitext(filename_template)[1][1:] 

277 

278 if append and format not in g_formats_supporting_append: 

279 raise FileSaveError( 

280 '`pyrocko.io.save` has been called with `append=True` but the ' 

281 'file format `%s` does not support appending.' % format) 

282 

283 if format == 'mseed': 

284 return mseed.save( 

285 traces, filename_template, additional, 

286 overwrite=overwrite, 

287 append=append, 

288 check_append=check_append, 

289 check_append_merge=check_append_merge, 

290 check_append_hook=check_append_hook, 

291 **kwargs) 

292 

293 elif format == 'gse2': 

294 return gse2.save(traces, filename_template, additional, 

295 overwrite=overwrite, **kwargs) 

296 

297 elif format == 'sac': 

298 fns = [] 

299 for tr in traces: 

300 fn = tr.fill_template(filename_template, **additional) 

301 if not overwrite and os.path.exists(fn): 

302 raise FileSaveError('file exists: %s' % fn) 

303 

304 if fn in fns: 

305 raise FileSaveError('file just created would be overwritten: ' 

306 '%s (multiple traces map to same filename)' 

307 % fn) 

308 

309 util.ensuredirs(fn) 

310 

311 f = sac.SacFile(from_trace=tr) 

312 if stations: 

313 s = stations[tr.network, tr.station, tr.location] 

314 f.stla = s.lat 

315 f.stlo = s.lon 

316 f.stel = s.elevation 

317 f.stdp = s.depth 

318 f.cmpinc = s.get_channel(tr.channel).dip + 90. 

319 f.cmpaz = s.get_channel(tr.channel).azimuth 

320 

321 f.write(fn) 

322 fns.append(fn) 

323 

324 return fns 

325 

326 elif format == 'text': 

327 fns = [] 

328 for tr in traces: 

329 fn = tr.fill_template(filename_template, **additional) 

330 if not overwrite and os.path.exists(fn): 

331 raise FileSaveError('file exists: %s' % fn) 

332 

333 if fn in fns: 

334 raise FileSaveError('file just created would be overwritten: ' 

335 '%s (multiple traces map to same filename)' 

336 % fn) 

337 

338 util.ensuredirs(fn) 

339 x, y = tr.get_xdata(), tr.get_ydata() 

340 num.savetxt(fn, num.transpose((x, y))) 

341 fns.append(fn) 

342 return fns 

343 

344 elif format == 'yaff': 

345 return yaff.save(traces, filename_template, additional, 

346 overwrite=overwrite, **kwargs) 

347 else: 

348 raise UnsupportedFormat(format) 

349 

350 

351save.__doc__ %= allowed_formats('save', 'doc') 

352 

353 

354class UnknownFormat(Exception): 

355 def __init__(self, filename): 

356 Exception.__init__(self, 'Unknown file format: %s' % filename) 

357 

358 

359class UnsupportedFormat(Exception): 

360 def __init__(self, format): 

361 Exception.__init__(self, 'Unsupported file format: %s' % format) 

362 

363 

364def make_substitutions(tr, substitutions): 

365 if substitutions: 

366 tr.set_codes(**substitutions)