1#!/usr/bin/env python
2# http://pyrocko.org - GPLv3
3#
4# The Pyrocko Developers, 21st Century
5# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, division
8import sys
9import re
10import os
11import logging
12import signal
13import math
14from copy import copy
15from optparse import OptionParser, Option, OptionValueError
17import numpy as num
19from pyrocko import util, config, pile, model, io, trace
20from pyrocko.io import stationxml
22pjoin = os.path.join
24logger = logging.getLogger('pyrocko.apps.jackseis')
26program_name = 'jackseis'
28usage = program_name + ' <inputs> ... [options]'
30description = '''A simple tool to manipulate waveform archive data.'''
32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
33tts = util.time_to_str
34stt = util.str_to_time
37def die(message):
38 sys.exit('%s: error: %s' % (program_name, message))
41name_to_dtype = {
42 'int32': num.int32,
43 'int64': num.int64,
44 'float32': num.float32,
45 'float64': num.float64}
48output_units = {
49 'acceleration': 'M/S**2',
50 'velocity': 'M/S',
51 'displacement': 'M'}
54def str_to_seconds(s):
55 if s.endswith('s'):
56 return float(s[:-1])
57 elif s.endswith('m'):
58 return float(s[:-1])*60.
59 elif s.endswith('h'):
60 return float(s[:-1])*3600.
61 elif s.endswith('d'):
62 return float(s[:-1])*3600.*24.
63 else:
64 return float(s)
67def nice_seconds_floor(s):
68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
69 p = s
70 for x in nice:
71 if s < x:
72 return p
74 p = x
76 return s
79def check_record_length(option, opt, value):
80 try:
81 reclen = int(value)
82 if reclen in io.mseed.VALID_RECORD_LENGTHS:
83 return reclen
84 except Exception:
85 pass
86 raise OptionValueError(
87 'invalid record length %s. (choose from %s)'
88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
91def check_steim(option, opt, value):
92 try:
93 compression = int(value)
94 if compression in (1, 2):
95 return compression
96 except Exception:
97 pass
98 raise OptionValueError(
99 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
102class JackseisOptions(Option):
103 TYPES = Option.TYPES + ('record_length', 'steim')
104 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
105 TYPE_CHECKER['record_length'] = check_record_length
106 TYPE_CHECKER['steim'] = check_steim
109def main(args=None):
110 if args is None:
111 args = sys.argv[1:]
113 parser = OptionParser(
114 usage=usage,
115 description=description,
116 option_class=JackseisOptions,
117 formatter=util.BetterHelpFormatter())
119 parser.add_option(
120 '--format',
121 dest='format',
122 default='detect',
123 choices=io.allowed_formats('load'),
124 help='assume input files are of given FORMAT. Choices: %s' %
125 io.allowed_formats('load', 'cli_help', 'detect'))
127 parser.add_option(
128 '--pattern',
129 dest='regex',
130 metavar='REGEX',
131 help='only include files whose paths match REGEX')
133 parser.add_option(
134 '--stationxml',
135 dest='station_xml_fns',
136 action='append',
137 default=[],
138 metavar='STATIONXML',
139 help='read station metadata from file STATIONXML')
141 parser.add_option(
142 '--event', '--events',
143 dest='event_fns',
144 action='append',
145 default=[],
146 metavar='EVENT',
147 help='read event information from file EVENT')
149 parser.add_option(
150 '--cache',
151 dest='cache_dir',
152 default=config.config().cache_dir,
153 metavar='DIR',
154 help='use directory DIR to cache trace metadata '
155 '(default=\'%default\')')
157 parser.add_option(
158 '--quiet',
159 dest='quiet',
160 action='store_true',
161 default=False,
162 help='disable output of progress information')
164 parser.add_option(
165 '--debug',
166 dest='debug',
167 action='store_true',
168 default=False,
169 help='print debugging information to stderr')
171 parser.add_option(
172 '--tmin',
173 dest='tmin',
174 help='start time as "%s"' % tfmt)
176 parser.add_option(
177 '--tmax',
178 dest='tmax',
179 help='end time as "%s"' % tfmt)
181 parser.add_option(
182 '--tinc',
183 dest='tinc',
184 help='set time length of output files [s] or "auto" to automatically '
185 'choose an appropriate time increment or "huge" to allow to use '
186 'a lot of system memory to merge input traces into huge output '
187 'files.')
189 parser.add_option(
190 '--downsample',
191 dest='downsample',
192 metavar='RATE',
193 help='downsample to RATE [Hz]')
195 parser.add_option(
196 '--output',
197 dest='output_path',
198 metavar='TEMPLATE',
199 help='set output path to TEMPLATE. Available placeholders '
200 'are %n: network, %s: station, %l: location, %c: channel, '
201 '%b: begin time, %e: end time, %j: julian day of year. The '
202 'following additional placeholders use the window begin and end '
203 'times rather than trace begin and end times (to suppress '
204 'producing many small files for gappy traces), %(wmin_year)s, '
205 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
206 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
207 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'')
209 parser.add_option(
210 '--output-dir',
211 metavar='TEMPLATE',
212 dest='output_dir',
213 help='set output directory to TEMPLATE (see --output for details) '
214 'and automatically choose filenames. '
215 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
217 parser.add_option(
218 '--output-format',
219 dest='output_format',
220 default='mseed',
221 choices=io.allowed_formats('save'),
222 help='set output file format. Choices: %s' %
223 io.allowed_formats('save', 'cli_help', 'mseed'))
225 parser.add_option(
226 '--force',
227 dest='force',
228 action='store_true',
229 default=False,
230 help='force overwriting of existing files')
232 parser.add_option(
233 '--no-snap',
234 dest='snap',
235 action='store_false',
236 default=True,
237 help='do not start output files at even multiples of file length')
239 parser.add_option(
240 '--traversal',
241 dest='traversal',
242 choices=('station-by-station', 'channel-by-channel', 'chronological'),
243 default='station-by-station',
244 help='set traversal order for traces processing. '
245 'Choices are \'station-by-station\' [default], '
246 '\'channel-by-channel\', and \'chronological\'. Chronological '
247 'traversal uses more processing memory but makes it possible to '
248 'join multiple stations into single output files')
250 parser.add_option(
251 '--rename-network',
252 action='append',
253 default=[],
254 dest='rename_network',
255 metavar='/PATTERN/REPLACEMENT/',
256 help='update network code, can be given more than once')
258 parser.add_option(
259 '--rename-station',
260 action='append',
261 default=[],
262 dest='rename_station',
263 metavar='/PATTERN/REPLACEMENT/',
264 help='update station code, can be given more than once')
266 parser.add_option(
267 '--rename-location',
268 action='append',
269 default=[],
270 dest='rename_location',
271 metavar='/PATTERN/REPLACEMENT/',
272 help='update location code, can be given more than once')
274 parser.add_option(
275 '--rename-channel',
276 action='append',
277 default=[],
278 dest='rename_channel',
279 metavar='/PATTERN/REPLACEMENT/',
280 help='update channel code, can be given more than once')
282 parser.add_option(
283 '--output-data-type',
284 dest='output_data_type',
285 choices=('same', 'int32', 'int64', 'float32', 'float64'),
286 default='same',
287 metavar='DTYPE',
288 help='set data type. Choices: same [default], int32, '
289 'int64, float32, float64. The output file format must support '
290 'the given type.')
292 parser.add_option(
293 '--output-steim',
294 dest='steim',
295 type='steim',
296 default=2,
297 metavar='STEIM_COMPRESSION',
298 help='set the mseed STEIM compression. Choices: 1 or 2. '
299 'Default is STEIM-2, which can compress full range int32. '
300 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
302 parser.add_option(
303 '--output-record-length',
304 dest='record_length',
305 type='record_length',
306 default=4096,
307 metavar='RECORD_LENGTH',
308 help='set the mseed record length in bytes. Choices: %s. '
309 'Default is 4096 bytes, which is commonly used for archiving.'
310 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
312 quantity_choices = ('acceleration', 'velocity', 'displacement')
313 parser.add_option(
314 '--output-quantity',
315 dest='output_quantity',
316 choices=quantity_choices,
317 help='deconvolve instrument transfer function. Choices: %s'
318 % ', '.join(quantity_choices))
320 parser.add_option(
321 '--restitution-frequency-band',
322 default='0.001,100.0',
323 dest='str_fmin_fmax',
324 metavar='FMIN,FMAX',
325 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
326 'Default: "%default"')
328 sample_snap_choices = ('shift', 'interpolate')
329 parser.add_option(
330 '--sample-snap',
331 dest='sample_snap',
332 choices=sample_snap_choices,
333 help='shift/interpolate traces so that samples are at even multiples '
334 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
336 (options, args) = parser.parse_args(args)
338 if len(args) == 0:
339 parser.print_help()
340 sys.exit(1)
342 if options.debug:
343 util.setup_logging(program_name, 'debug')
344 elif options.quiet:
345 util.setup_logging(program_name, 'warning')
346 else:
347 util.setup_logging(program_name, 'info')
349 tinc = None
350 if options.tinc is not None:
351 if tinc in ('huge', 'auto'):
352 tinc = options.tinc
353 else:
354 try:
355 tinc = str_to_seconds(options.tinc)
356 except Exception:
357 die('invalid argument to --tinc')
359 tmin = None
360 if options.tmin is not None:
361 try:
362 tmin = stt(options.tmin)
363 except Exception:
364 die('invalid argument to --tmin. '
365 'Expected format is ""')
367 tmax = None
368 if options.tmax is not None:
369 try:
370 tmax = stt(options.tmax)
371 except Exception:
372 die('invalid argument to --tmax. '
373 'Expected format is "%s"' % tfmt)
375 target_deltat = None
376 if options.downsample is not None:
377 try:
378 target_deltat = 1.0 / float(options.downsample)
379 except Exception:
380 die('invalid argument to --downsample')
382 replacements = []
383 for k, rename_k, in [
384 ('network', options.rename_network),
385 ('station', options.rename_station),
386 ('location', options.rename_location),
387 ('channel', options.rename_channel)]:
389 for patrep in rename_k:
390 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
391 if not m:
392 die('invalid argument to --rename-%s. '
393 'Expected format is /PATTERN/REPLACEMENT/' % k)
395 replacements.append((k, m.group(1), m.group(2)))
397 sx = None
398 if options.station_xml_fns:
399 sxs = []
400 for station_xml_fn in options.station_xml_fns:
401 sxs.append(stationxml.load_xml(filename=station_xml_fn))
403 sx = stationxml.primitive_merge(sxs)
405 events = []
406 for event_fn in options.event_fns:
407 events.extend(model.load_events(event_fn))
409 p = pile.make_pile(
410 paths=args,
411 selector=None,
412 regex=options.regex,
413 fileformat=options.format,
414 cachedirname=options.cache_dir,
415 show_progress=not options.quiet)
417 if p.tmin is None:
418 die('data selection is empty')
420 if tinc == 'auto':
421 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
423 if options.snap:
424 if tmin is None:
425 tmin = p.tmin
427 if tinc is not None:
428 tmin = int(math.floor(tmin / tinc)) * tinc
430 output_path = options.output_path
431 output_dir = options.output_dir
433 if output_path and not output_dir and os.path.isdir(output_path):
434 output_dir = output_path # compat. with old behaviour
436 if output_dir and not output_path:
437 output_path = 'trace_%(network)s-%(station)s-' \
438 '%(location)s-%(channel)s_%(wmin)s.' + \
439 options.output_format
441 if output_dir and output_path:
442 output_path = pjoin(output_dir, output_path)
444 if not output_path:
445 die('--output not given')
447 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
448 tfade_factor = 2.0
449 ffade_factor = 1.5
450 if options.output_quantity:
451 tpad = 2*tfade_factor/fmin
452 else:
453 tpad = 0.
455 if target_deltat is not None:
456 tpad += target_deltat * 10.
458 if tinc is None:
459 if ((tmax or p.tmax) - (tmin or p.tmin)) \
460 / p.get_deltatmin() > 100000000.:
461 die('use --tinc=huge to really produce such large output files '
462 'or use --tinc=INC to split into smaller files.')
464 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
466 if options.traversal == 'channel-by-channel':
467 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
469 elif options.traversal == 'station-by-station':
470 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
472 else:
473 it = p.chopper(**kwargs)
475 abort = []
477 def got_sigint(signum, frame):
478 abort.append(True)
480 old = signal.signal(signal.SIGINT, got_sigint)
482 save_kwargs = {}
483 if options.output_format == 'mseed':
484 save_kwargs['record_length'] = options.record_length
485 save_kwargs['steim'] = options.steim
487 for batch in it:
488 traces = batch.traces
489 if traces:
490 twmin = batch.tmin
491 twmax = batch.tmax
492 logger.info('processing %s - %s, %i traces' %
493 (tts(twmin), tts(twmax), len(traces)))
495 if options.sample_snap:
496 for tr in traces:
497 tr.snap(interpolate=options.sample_snap == 'interpolate')
499 if target_deltat is not None:
500 out_traces = []
501 for tr in traces:
502 try:
503 tr.downsample_to(
504 target_deltat, snap=True, demean=False)
506 if options.output_data_type == 'same':
507 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
509 out_traces.append(tr)
511 except (trace.TraceTooShort, trace.NoData):
512 pass
514 traces = out_traces
516 if options.output_quantity:
517 out_traces = []
518 tfade = tfade_factor / fmin
519 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
520 for tr in traces:
521 try:
522 response = sx.get_pyrocko_response(
523 tr.nslc_id,
524 timespan=(tr.tmin, tr.tmax),
525 fake_input_units=output_units[
526 options.output_quantity])
528 rest_tr = tr.transfer(
529 tfade, ftap, response, invert=True, demean=True)
531 out_traces.append(rest_tr)
533 except stationxml.NoResponseInformation as e:
534 logger.warn(
535 'Cannot restitute: %s (no response)' % str(e))
537 except stationxml.MultipleResponseInformation as e:
538 logger.warn(
539 'Cannot restitute: %s (multiple responses found)'
540 % str(e))
542 except (trace.TraceTooShort, trace.NoData):
543 logger.warn(
544 'Trace too short: %s' % '.'.join(tr.nslc_id))
546 traces = out_traces
548 if options.output_data_type != 'same':
549 for tr in traces:
550 tr.ydata = tr.ydata.astype(
551 name_to_dtype[options.output_data_type])
553 if replacements:
554 for tr in traces:
555 r = {}
556 for k, pat, repl in replacements:
557 oldval = getattr(tr, k)
558 newval, n = re.subn(pat, repl, oldval)
559 if n:
560 r[k] = newval
562 tr.set_codes(**r)
564 if output_path:
565 otraces = []
566 for tr in traces:
567 try:
568 otr = tr.chop(twmin, twmax, inplace=False)
569 otraces.append(otr)
570 except trace.NoData:
571 pass
573 try:
574 io.save(otraces, output_path, format=options.output_format,
575 overwrite=options.force,
576 additional=dict(
577 wmin_year=tts(twmin, format='%Y'),
578 wmin_month=tts(twmin, format='%m'),
579 wmin_day=tts(twmin, format='%d'),
580 wmin_jday=tts(twmin, format='%j'),
581 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
582 wmax_year=tts(twmax, format='%Y'),
583 wmax_month=tts(twmax, format='%m'),
584 wmax_day=tts(twmax, format='%d'),
585 wmax_jday=tts(twmax, format='%j'),
586 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
587 **save_kwargs)
588 except io.FileSaveError as e:
589 die(str(e))
591 if abort:
592 break
594 signal.signal(signal.SIGINT, old)
596 if abort:
597 die('interrupted.')
600if __name__ == '__main__':
601 main(sys.argv[1:])