1#!/usr/bin/env python
2# http://pyrocko.org - GPLv3
3#
4# The Pyrocko Developers, 21st Century
5# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, division
8import sys
9import re
10import os
11import logging
12import signal
13import math
14from copy import copy
15from optparse import OptionParser, Option, OptionValueError
17import numpy as num
19from pyrocko import util, config, pile, model, io, trace
20from pyrocko.io import stationxml
22pjoin = os.path.join
24logger = logging.getLogger('pyrocko.apps.jackseis')
26program_name = 'jackseis'
28usage = program_name + ' <inputs> ... [options]'
30description = '''A simple tool to manipulate waveform archive data.'''
32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
33tts = util.time_to_str
34stt = util.str_to_time_fillup
37def die(message):
38 sys.exit('%s: error: %s' % (program_name, message))
41name_to_dtype = {
42 'int32': num.int32,
43 'int64': num.int64,
44 'float32': num.float32,
45 'float64': num.float64}
48output_units = {
49 'acceleration': 'M/S**2',
50 'velocity': 'M/S',
51 'displacement': 'M'}
54def str_to_seconds(s):
55 if s.endswith('s'):
56 return float(s[:-1])
57 elif s.endswith('m'):
58 return float(s[:-1])*60.
59 elif s.endswith('h'):
60 return float(s[:-1])*3600.
61 elif s.endswith('d'):
62 return float(s[:-1])*3600.*24.
63 else:
64 return float(s)
67def nice_seconds_floor(s):
68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
69 p = s
70 for x in nice:
71 if s < x:
72 return p
74 p = x
76 return s
79def check_record_length(option, opt, value):
80 try:
81 reclen = int(value)
82 if reclen in io.mseed.VALID_RECORD_LENGTHS:
83 return reclen
84 except Exception:
85 pass
86 raise OptionValueError(
87 'invalid record length %s. (choose from %s)'
88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
91def check_steim(option, opt, value):
92 try:
93 compression = int(value)
94 if compression in (1, 2):
95 return compression
96 except Exception:
97 pass
98 raise OptionValueError(
99 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
102class JackseisOptions(Option):
103 TYPES = Option.TYPES + ('record_length', 'steim')
104 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
105 TYPE_CHECKER['record_length'] = check_record_length
106 TYPE_CHECKER['steim'] = check_steim
109def process(get_pile, options):
110 tinc = None
111 if options.tinc is not None:
112 if tinc in ('huge', 'auto'):
113 tinc = options.tinc
114 else:
115 try:
116 tinc = str_to_seconds(options.tinc)
117 except Exception:
118 die('invalid argument to --tinc')
120 tmin = None
121 if options.tmin is not None:
122 try:
123 tmin = stt(options.tmin)
124 except Exception:
125 die('invalid argument to --tmin. '
126 'Expected format is "%s" % tfmt')
128 tmax = None
129 if options.tmax is not None:
130 try:
131 tmax = stt(options.tmax)
132 except Exception:
133 die('invalid argument to --tmax. '
134 'Expected format is "%s"' % tfmt)
136 target_deltat = None
137 if options.downsample is not None:
138 try:
139 target_deltat = 1.0 / float(options.downsample)
140 except Exception:
141 die('invalid argument to --downsample')
143 replacements = []
144 for k, rename_k, in [
145 ('network', options.rename_network),
146 ('station', options.rename_station),
147 ('location', options.rename_location),
148 ('channel', options.rename_channel)]:
150 for patrep in rename_k:
151 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
152 if not m:
153 die('invalid argument to --rename-%s. '
154 'Expected format is /PATTERN/REPLACEMENT/' % k)
156 replacements.append((k, m.group(1), m.group(2)))
158 sx = None
159 if options.station_xml_fns:
160 sxs = []
161 for station_xml_fn in options.station_xml_fns:
162 sxs.append(stationxml.load_xml(filename=station_xml_fn))
164 sx = stationxml.primitive_merge(sxs)
166 events = []
167 for event_fn in options.event_fns:
168 events.extend(model.load_events(event_fn))
170 p = get_pile()
172 if p.tmin is None:
173 die('data selection is empty')
175 if tinc == 'auto':
176 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
178 if options.snap:
179 if tmin is None:
180 tmin = p.tmin
182 if tinc is not None:
183 tmin = int(math.floor(tmin / tinc)) * tinc
185 output_path = options.output_path
186 output_dir = options.output_dir
188 if output_path and not output_dir and os.path.isdir(output_path):
189 output_dir = output_path # compat. with old behaviour
191 if output_dir and not output_path:
192 output_path = 'trace_%(network)s-%(station)s-' \
193 '%(location)s-%(channel)s_%(wmin)s.' + \
194 options.output_format
196 if output_dir and output_path:
197 output_path = pjoin(output_dir, output_path)
199 if not output_path:
200 die('--output not given')
202 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
203 tfade_factor = 2.0
204 ffade_factor = 1.5
205 if options.output_quantity:
206 tpad = 2*tfade_factor/fmin
207 else:
208 tpad = 0.
210 if target_deltat is not None:
211 tpad += target_deltat * 10.
213 if tinc is None:
214 if ((tmax or p.tmax) - (tmin or p.tmin)) \
215 / p.get_deltatmin() > 100000000.:
216 die('use --tinc=huge to really produce such large output files '
217 'or use --tinc=INC to split into smaller files.')
219 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
221 if options.traversal == 'channel-by-channel':
222 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
224 elif options.traversal == 'station-by-station':
225 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
227 else:
228 it = p.chopper(**kwargs)
230 abort = []
232 def got_sigint(signum, frame):
233 abort.append(True)
235 old = signal.signal(signal.SIGINT, got_sigint)
237 save_kwargs = {}
238 if options.output_format == 'mseed':
239 save_kwargs['record_length'] = options.record_length
240 save_kwargs['steim'] = options.steim
242 def process_traces(batch):
243 traces = batch.traces
244 if traces:
245 twmin = batch.tmin
246 twmax = batch.tmax
247 logger.info('processing %s - %s, %i traces' %
248 (tts(twmin), tts(twmax), len(traces)))
250 if options.sample_snap:
251 for tr in traces:
252 tr.snap(interpolate=options.sample_snap == 'interpolate')
254 if target_deltat is not None:
255 out_traces = []
256 for tr in traces:
257 try:
258 tr.downsample_to(
259 target_deltat, snap=True, demean=False)
261 if options.output_data_type == 'same':
262 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
264 out_traces.append(tr)
266 except (trace.TraceTooShort, trace.NoData):
267 pass
269 traces = out_traces
271 if options.output_quantity:
272 out_traces = []
273 tfade = tfade_factor / fmin
274 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
275 for tr in traces:
276 try:
277 response = sx.get_pyrocko_response(
278 tr.nslc_id,
279 timespan=(tr.tmin, tr.tmax),
280 fake_input_units=output_units[
281 options.output_quantity])
283 rest_tr = tr.transfer(
284 tfade, ftap, response, invert=True, demean=True)
286 out_traces.append(rest_tr)
288 except stationxml.NoResponseInformation as e:
289 logger.warning(
290 'Cannot restitute: %s (no response)' % str(e))
292 except stationxml.MultipleResponseInformation as e:
293 logger.warning(
294 'Cannot restitute: %s (multiple responses found)'
295 % str(e))
297 except (trace.TraceTooShort, trace.NoData):
298 logger.warning(
299 'Trace too short: %s' % '.'.join(tr.nslc_id))
301 traces = out_traces
303 if options.output_data_type != 'same':
304 for tr in traces:
305 tr.ydata = tr.ydata.astype(
306 name_to_dtype[options.output_data_type])
308 if replacements:
309 for tr in traces:
310 r = {}
311 for k, pat, repl in replacements:
312 oldval = getattr(tr, k)
313 newval, n = re.subn(pat, repl, oldval)
314 if n:
315 r[k] = newval
317 tr.set_codes(**r)
319 if output_path:
320 otraces = []
321 for tr in traces:
322 try:
323 otr = tr.chop(twmin, twmax, inplace=False)
324 otraces.append(otr)
325 except trace.NoData:
326 pass
328 try:
329 io.save(otraces, output_path, format=options.output_format,
330 overwrite=options.force,
331 additional=dict(
332 wmin_year=tts(twmin, format='%Y'),
333 wmin_month=tts(twmin, format='%m'),
334 wmin_day=tts(twmin, format='%d'),
335 wmin_jday=tts(twmin, format='%j'),
336 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
337 wmax_year=tts(twmax, format='%Y'),
338 wmax_month=tts(twmax, format='%m'),
339 wmax_day=tts(twmax, format='%d'),
340 wmax_jday=tts(twmax, format='%j'),
341 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
342 **save_kwargs)
343 except io.FileSaveError as e:
344 die(str(e))
346 for batch in it:
347 process_traces(batch)
348 if abort:
349 break
351 signal.signal(signal.SIGINT, old)
353 if abort:
354 die('interrupted.')
357def main(args=None):
358 if args is None:
359 args = sys.argv[1:]
361 parser = OptionParser(
362 usage=usage,
363 description=description,
364 option_class=JackseisOptions,
365 formatter=util.BetterHelpFormatter())
367 parser.add_option(
368 '--format',
369 dest='format',
370 default='detect',
371 choices=io.allowed_formats('load'),
372 help='assume input files are of given FORMAT. Choices: %s' %
373 io.allowed_formats('load', 'cli_help', 'detect'))
375 parser.add_option(
376 '--pattern',
377 dest='regex',
378 metavar='REGEX',
379 help='only include files whose paths match REGEX')
381 parser.add_option(
382 '--stationxml',
383 dest='station_xml_fns',
384 action='append',
385 default=[],
386 metavar='STATIONXML',
387 help='read station metadata from file STATIONXML')
389 parser.add_option(
390 '--event', '--events',
391 dest='event_fns',
392 action='append',
393 default=[],
394 metavar='EVENT',
395 help='read event information from file EVENT')
397 parser.add_option(
398 '--cache',
399 dest='cache_dir',
400 default=config.config().cache_dir,
401 metavar='DIR',
402 help='use directory DIR to cache trace metadata '
403 '(default=\'%default\')')
405 parser.add_option(
406 '--quiet',
407 dest='quiet',
408 action='store_true',
409 default=False,
410 help='disable output of progress information')
412 parser.add_option(
413 '--debug',
414 dest='debug',
415 action='store_true',
416 default=False,
417 help='print debugging information to stderr')
419 parser.add_option(
420 '--tmin',
421 dest='tmin',
422 help='start time as "%s"' % tfmt)
424 parser.add_option(
425 '--tmax',
426 dest='tmax',
427 help='end time as "%s"' % tfmt)
429 parser.add_option(
430 '--tinc',
431 dest='tinc',
432 help='set time length of output files [s] or "auto" to automatically '
433 'choose an appropriate time increment or "huge" to allow to use '
434 'a lot of system memory to merge input traces into huge output '
435 'files.')
437 parser.add_option(
438 '--downsample',
439 dest='downsample',
440 metavar='RATE',
441 help='downsample to RATE [Hz]')
443 parser.add_option(
444 '--output',
445 dest='output_path',
446 metavar='TEMPLATE',
447 help='set output path to TEMPLATE. Available placeholders '
448 'are %n: network, %s: station, %l: location, %c: channel, '
449 '%b: begin time, %e: end time, %j: julian day of year. The '
450 'following additional placeholders use the window begin and end '
451 'times rather than trace begin and end times (to suppress '
452 'producing many small files for gappy traces), %(wmin_year)s, '
453 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
454 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
455 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'')
457 parser.add_option(
458 '--output-dir',
459 metavar='TEMPLATE',
460 dest='output_dir',
461 help='set output directory to TEMPLATE (see --output for details) '
462 'and automatically choose filenames. '
463 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
465 parser.add_option(
466 '--output-format',
467 dest='output_format',
468 default='mseed',
469 choices=io.allowed_formats('save'),
470 help='set output file format. Choices: %s' %
471 io.allowed_formats('save', 'cli_help', 'mseed'))
473 parser.add_option(
474 '--force',
475 dest='force',
476 action='store_true',
477 default=False,
478 help='force overwriting of existing files')
480 parser.add_option(
481 '--no-snap',
482 dest='snap',
483 action='store_false',
484 default=True,
485 help='do not start output files at even multiples of file length')
487 parser.add_option(
488 '--traversal',
489 dest='traversal',
490 choices=('station-by-station', 'channel-by-channel', 'chronological'),
491 default='station-by-station',
492 help='set traversal order for traces processing. '
493 'Choices are \'station-by-station\' [default], '
494 '\'channel-by-channel\', and \'chronological\'. Chronological '
495 'traversal uses more processing memory but makes it possible to '
496 'join multiple stations into single output files')
498 parser.add_option(
499 '--rename-network',
500 action='append',
501 default=[],
502 dest='rename_network',
503 metavar='/PATTERN/REPLACEMENT/',
504 help='update network code, can be given more than once')
506 parser.add_option(
507 '--rename-station',
508 action='append',
509 default=[],
510 dest='rename_station',
511 metavar='/PATTERN/REPLACEMENT/',
512 help='update station code, can be given more than once')
514 parser.add_option(
515 '--rename-location',
516 action='append',
517 default=[],
518 dest='rename_location',
519 metavar='/PATTERN/REPLACEMENT/',
520 help='update location code, can be given more than once')
522 parser.add_option(
523 '--rename-channel',
524 action='append',
525 default=[],
526 dest='rename_channel',
527 metavar='/PATTERN/REPLACEMENT/',
528 help='update channel code, can be given more than once')
530 parser.add_option(
531 '--output-data-type',
532 dest='output_data_type',
533 choices=('same', 'int32', 'int64', 'float32', 'float64'),
534 default='same',
535 metavar='DTYPE',
536 help='set data type. Choices: same [default], int32, '
537 'int64, float32, float64. The output file format must support '
538 'the given type.')
540 parser.add_option(
541 '--output-steim',
542 dest='steim',
543 type='steim',
544 default=2,
545 metavar='STEIM_COMPRESSION',
546 help='set the mseed STEIM compression. Choices: 1 or 2. '
547 'Default is STEIM-2, which can compress full range int32. '
548 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
550 parser.add_option(
551 '--output-record-length',
552 dest='record_length',
553 type='record_length',
554 default=4096,
555 metavar='RECORD_LENGTH',
556 help='set the mseed record length in bytes. Choices: %s. '
557 'Default is 4096 bytes, which is commonly used for archiving.'
558 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
560 quantity_choices = ('acceleration', 'velocity', 'displacement')
561 parser.add_option(
562 '--output-quantity',
563 dest='output_quantity',
564 choices=quantity_choices,
565 help='deconvolve instrument transfer function. Choices: %s'
566 % ', '.join(quantity_choices))
568 parser.add_option(
569 '--restitution-frequency-band',
570 default='0.001,100.0',
571 dest='str_fmin_fmax',
572 metavar='FMIN,FMAX',
573 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
574 'Default: "%default"')
576 sample_snap_choices = ('shift', 'interpolate')
577 parser.add_option(
578 '--sample-snap',
579 dest='sample_snap',
580 choices=sample_snap_choices,
581 help='shift/interpolate traces so that samples are at even multiples '
582 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
584 (options, args) = parser.parse_args(args)
586 if len(args) == 0:
587 parser.print_help()
588 sys.exit(1)
590 if options.debug:
591 util.setup_logging(program_name, 'debug')
592 elif options.quiet:
593 util.setup_logging(program_name, 'warning')
594 else:
595 util.setup_logging(program_name, 'info')
597 def get_pile():
598 return pile.make_pile(
599 paths=args,
600 selector=None,
601 regex=options.regex,
602 fileformat=options.format,
603 cachedirname=options.cache_dir,
604 show_progress=not options.quiet)
606 return process(get_pile, options)
609if __name__ == '__main__':
610 main(sys.argv[1:])