1#!/usr/bin/env python
2# http://pyrocko.org - GPLv3
3#
4# The Pyrocko Developers, 21st Century
5# ---|P------/S----------~Lg----------
7import sys
8import re
9import os
10import logging
11import signal
12import math
13from copy import copy
14from optparse import OptionParser, Option, OptionValueError
16import numpy as num
18from pyrocko import util, config, pile, model, io, trace
19from pyrocko.io import stationxml
21pjoin = os.path.join
23logger = logging.getLogger('pyrocko.apps.jackseis')
25program_name = 'jackseis'
27usage = program_name + ' <inputs> ... [options]'
29description = '''A simple tool to manipulate waveform archive data.'''
31tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
32tts = util.time_to_str
33stt = util.str_to_time_fillup
36def die(message):
37 sys.exit('%s: error: %s' % (program_name, message))
40name_to_dtype = {
41 'int32': num.int32,
42 'int64': num.int64,
43 'float32': num.float32,
44 'float64': num.float64}
47output_units = {
48 'acceleration': 'M/S**2',
49 'velocity': 'M/S',
50 'displacement': 'M'}
53def str_to_seconds(s):
54 if s.endswith('s'):
55 return float(s[:-1])
56 elif s.endswith('m'):
57 return float(s[:-1])*60.
58 elif s.endswith('h'):
59 return float(s[:-1])*3600.
60 elif s.endswith('d'):
61 return float(s[:-1])*3600.*24.
62 else:
63 return float(s)
66def nice_seconds_floor(s):
67 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
68 p = s
69 for x in nice:
70 if s < x:
71 return p
73 p = x
75 return s
78def check_record_length(option, opt, value):
79 try:
80 reclen = int(value)
81 if reclen in io.mseed.VALID_RECORD_LENGTHS:
82 return reclen
83 except Exception:
84 pass
85 raise OptionValueError(
86 'invalid record length %s. (choose from %s)'
87 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
90def check_steim(option, opt, value):
91 try:
92 compression = int(value)
93 if compression in (1, 2):
94 return compression
95 except Exception:
96 pass
97 raise OptionValueError(
98 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
101class JackseisOptions(Option):
102 TYPES = Option.TYPES + ('record_length', 'steim')
103 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
104 TYPE_CHECKER['record_length'] = check_record_length
105 TYPE_CHECKER['steim'] = check_steim
108def process(get_pile, options):
109 tinc = None
110 if options.tinc is not None:
111 if tinc in ('huge', 'auto'):
112 tinc = options.tinc
113 else:
114 try:
115 tinc = str_to_seconds(options.tinc)
116 except Exception:
117 die('invalid argument to --tinc')
119 tmin = None
120 if options.tmin is not None:
121 try:
122 tmin = stt(options.tmin)
123 except Exception:
124 die('invalid argument to --tmin. '
125 'Expected format is "%s" % tfmt')
127 tmax = None
128 if options.tmax is not None:
129 try:
130 tmax = stt(options.tmax)
131 except Exception:
132 die('invalid argument to --tmax. '
133 'Expected format is "%s"' % tfmt)
135 target_deltat = None
136 if options.downsample is not None:
137 try:
138 target_deltat = 1.0 / float(options.downsample)
139 except Exception:
140 die('invalid argument to --downsample')
142 replacements = []
143 for k, rename_k, in [
144 ('network', options.rename_network),
145 ('station', options.rename_station),
146 ('location', options.rename_location),
147 ('channel', options.rename_channel)]:
149 for patrep in rename_k:
150 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
151 if not m:
152 die('invalid argument to --rename-%s. '
153 'Expected format is /PATTERN/REPLACEMENT/' % k)
155 replacements.append((k, m.group(1), m.group(2)))
157 sx = None
158 if options.station_xml_fns:
159 sxs = []
160 for station_xml_fn in options.station_xml_fns:
161 sxs.append(stationxml.load_xml(filename=station_xml_fn))
163 sx = stationxml.primitive_merge(sxs)
165 events = []
166 for event_fn in options.event_fns:
167 events.extend(model.load_events(event_fn))
169 p = get_pile()
171 if p.tmin is None:
172 die('data selection is empty')
174 if tinc == 'auto':
175 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
177 if options.snap:
178 if tmin is None:
179 tmin = p.tmin
181 if tinc is not None:
182 tmin = int(math.floor(tmin / tinc)) * tinc
184 output_path = options.output_path
185 output_dir = options.output_dir
187 if output_path and not output_dir and os.path.isdir(output_path):
188 output_dir = output_path # compat. with old behaviour
190 if output_dir and not output_path:
191 output_path = 'trace_%(network)s-%(station)s-' \
192 '%(location)s-%(channel)s_%(wmin)s.' + \
193 options.output_format
195 if output_dir and output_path:
196 output_path = pjoin(output_dir, output_path)
198 if not output_path:
199 die('--output not given')
201 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
202 tfade_factor = 2.0
203 ffade_factor = 1.5
204 if options.output_quantity:
205 tpad = 2*tfade_factor/fmin
206 else:
207 tpad = 0.
209 if target_deltat is not None:
210 tpad += target_deltat * 10.
212 if tinc is None:
213 if ((tmax or p.tmax) - (tmin or p.tmin)) \
214 / p.get_deltatmin() > 100000000.:
215 die('use --tinc=huge to really produce such large output files '
216 'or use --tinc=INC to split into smaller files.')
218 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
220 if options.traversal == 'channel-by-channel':
221 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
223 elif options.traversal == 'station-by-station':
224 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
226 else:
227 it = p.chopper(**kwargs)
229 abort = []
231 def got_sigint(signum, frame):
232 abort.append(True)
234 old = signal.signal(signal.SIGINT, got_sigint)
236 save_kwargs = {}
237 if options.output_format == 'mseed':
238 save_kwargs['record_length'] = options.record_length
239 save_kwargs['steim'] = options.steim
241 def process_traces(batch):
242 traces = batch.traces
243 if traces:
244 twmin = batch.tmin
245 twmax = batch.tmax
246 logger.info('processing %s - %s, %i traces' %
247 (tts(twmin), tts(twmax), len(traces)))
249 if options.sample_snap:
250 for tr in traces:
251 tr.snap(interpolate=options.sample_snap == 'interpolate')
253 if target_deltat is not None:
254 out_traces = []
255 for tr in traces:
256 try:
257 tr.downsample_to(
258 target_deltat, snap=True, demean=False)
260 if options.output_data_type == 'same':
261 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
263 out_traces.append(tr)
265 except (trace.TraceTooShort, trace.NoData):
266 pass
268 traces = out_traces
270 if options.output_quantity:
271 out_traces = []
272 tfade = tfade_factor / fmin
273 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
274 for tr in traces:
275 try:
276 response = sx.get_pyrocko_response(
277 tr.nslc_id,
278 timespan=(tr.tmin, tr.tmax),
279 fake_input_units=output_units[
280 options.output_quantity])
282 rest_tr = tr.transfer(
283 tfade, ftap, response, invert=True, demean=True)
285 out_traces.append(rest_tr)
287 except stationxml.NoResponseInformation as e:
288 logger.warning(
289 'Cannot restitute: %s (no response)' % str(e))
291 except stationxml.MultipleResponseInformation as e:
292 logger.warning(
293 'Cannot restitute: %s (multiple responses found)'
294 % str(e))
296 except (trace.TraceTooShort, trace.NoData):
297 logger.warning(
298 'Trace too short: %s' % '.'.join(tr.nslc_id))
300 traces = out_traces
302 if options.output_data_type != 'same':
303 for tr in traces:
304 tr.ydata = tr.ydata.astype(
305 name_to_dtype[options.output_data_type])
307 if replacements:
308 for tr in traces:
309 r = {}
310 for k, pat, repl in replacements:
311 oldval = getattr(tr, k)
312 newval, n = re.subn(pat, repl, oldval)
313 if n:
314 r[k] = newval
316 tr.set_codes(**r)
318 if output_path:
319 otraces = []
320 for tr in traces:
321 try:
322 otr = tr.chop(twmin, twmax, inplace=False)
323 otraces.append(otr)
324 except trace.NoData:
325 pass
327 try:
328 io.save(otraces, output_path, format=options.output_format,
329 overwrite=options.force,
330 additional=dict(
331 wmin_year=tts(twmin, format='%Y'),
332 wmin_month=tts(twmin, format='%m'),
333 wmin_day=tts(twmin, format='%d'),
334 wmin_jday=tts(twmin, format='%j'),
335 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
336 wmax_year=tts(twmax, format='%Y'),
337 wmax_month=tts(twmax, format='%m'),
338 wmax_day=tts(twmax, format='%d'),
339 wmax_jday=tts(twmax, format='%j'),
340 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
341 **save_kwargs)
342 except io.FileSaveError as e:
343 die(str(e))
345 for batch in it:
346 process_traces(batch)
347 if abort:
348 break
350 signal.signal(signal.SIGINT, old)
352 if abort:
353 die('interrupted.')
356def main(args=None):
357 if args is None:
358 args = sys.argv[1:]
360 parser = OptionParser(
361 usage=usage,
362 description=description,
363 option_class=JackseisOptions,
364 formatter=util.BetterHelpFormatter())
366 parser.add_option(
367 '--format',
368 dest='format',
369 default='detect',
370 choices=io.allowed_formats('load'),
371 help='assume input files are of given FORMAT. Choices: %s' %
372 io.allowed_formats('load', 'cli_help', 'detect'))
374 parser.add_option(
375 '--pattern',
376 dest='regex',
377 metavar='REGEX',
378 help='only include files whose paths match REGEX')
380 parser.add_option(
381 '--stationxml',
382 dest='station_xml_fns',
383 action='append',
384 default=[],
385 metavar='STATIONXML',
386 help='read station metadata from file STATIONXML')
388 parser.add_option(
389 '--event', '--events',
390 dest='event_fns',
391 action='append',
392 default=[],
393 metavar='EVENT',
394 help='read event information from file EVENT')
396 parser.add_option(
397 '--cache',
398 dest='cache_dir',
399 default=config.config().cache_dir,
400 metavar='DIR',
401 help='use directory DIR to cache trace metadata '
402 '(default=\'%default\')')
404 parser.add_option(
405 '--quiet',
406 dest='quiet',
407 action='store_true',
408 default=False,
409 help='disable output of progress information')
411 parser.add_option(
412 '--debug',
413 dest='debug',
414 action='store_true',
415 default=False,
416 help='print debugging information to stderr')
418 parser.add_option(
419 '--tmin',
420 dest='tmin',
421 help='start time as "%s"' % tfmt)
423 parser.add_option(
424 '--tmax',
425 dest='tmax',
426 help='end time as "%s"' % tfmt)
428 parser.add_option(
429 '--tinc',
430 dest='tinc',
431 help='set time length of output files [s] or "auto" to automatically '
432 'choose an appropriate time increment or "huge" to allow to use '
433 'a lot of system memory to merge input traces into huge output '
434 'files.')
436 parser.add_option(
437 '--downsample',
438 dest='downsample',
439 metavar='RATE',
440 help='downsample to RATE [Hz]')
442 parser.add_option(
443 '--output',
444 dest='output_path',
445 metavar='TEMPLATE',
446 help='set output path to TEMPLATE. Available placeholders '
447 'are %n: network, %s: station, %l: location, %c: channel, '
448 '%b: begin time, %e: end time, %j: julian day of year. The '
449 'following additional placeholders use the window begin and end '
450 'times rather than trace begin and end times (to suppress '
451 'producing many small files for gappy traces), %(wmin_year)s, '
452 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
453 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
454 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'')
456 parser.add_option(
457 '--output-dir',
458 metavar='TEMPLATE',
459 dest='output_dir',
460 help='set output directory to TEMPLATE (see --output for details) '
461 'and automatically choose filenames. '
462 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
464 parser.add_option(
465 '--output-format',
466 dest='output_format',
467 default='mseed',
468 choices=io.allowed_formats('save'),
469 help='set output file format. Choices: %s' %
470 io.allowed_formats('save', 'cli_help', 'mseed'))
472 parser.add_option(
473 '--force',
474 dest='force',
475 action='store_true',
476 default=False,
477 help='force overwriting of existing files')
479 parser.add_option(
480 '--no-snap',
481 dest='snap',
482 action='store_false',
483 default=True,
484 help='do not start output files at even multiples of file length')
486 parser.add_option(
487 '--traversal',
488 dest='traversal',
489 choices=('station-by-station', 'channel-by-channel', 'chronological'),
490 default='station-by-station',
491 help='set traversal order for traces processing. '
492 'Choices are \'station-by-station\' [default], '
493 '\'channel-by-channel\', and \'chronological\'. Chronological '
494 'traversal uses more processing memory but makes it possible to '
495 'join multiple stations into single output files')
497 parser.add_option(
498 '--rename-network',
499 action='append',
500 default=[],
501 dest='rename_network',
502 metavar='/PATTERN/REPLACEMENT/',
503 help='update network code, can be given more than once')
505 parser.add_option(
506 '--rename-station',
507 action='append',
508 default=[],
509 dest='rename_station',
510 metavar='/PATTERN/REPLACEMENT/',
511 help='update station code, can be given more than once')
513 parser.add_option(
514 '--rename-location',
515 action='append',
516 default=[],
517 dest='rename_location',
518 metavar='/PATTERN/REPLACEMENT/',
519 help='update location code, can be given more than once')
521 parser.add_option(
522 '--rename-channel',
523 action='append',
524 default=[],
525 dest='rename_channel',
526 metavar='/PATTERN/REPLACEMENT/',
527 help='update channel code, can be given more than once')
529 parser.add_option(
530 '--output-data-type',
531 dest='output_data_type',
532 choices=('same', 'int32', 'int64', 'float32', 'float64'),
533 default='same',
534 metavar='DTYPE',
535 help='set data type. Choices: same [default], int32, '
536 'int64, float32, float64. The output file format must support '
537 'the given type.')
539 parser.add_option(
540 '--output-steim',
541 dest='steim',
542 type='steim',
543 default=2,
544 metavar='STEIM_COMPRESSION',
545 help='set the mseed STEIM compression. Choices: 1 or 2. '
546 'Default is STEIM-2, which can compress full range int32. '
547 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
549 parser.add_option(
550 '--output-record-length',
551 dest='record_length',
552 type='record_length',
553 default=4096,
554 metavar='RECORD_LENGTH',
555 help='set the mseed record length in bytes. Choices: %s. '
556 'Default is 4096 bytes, which is commonly used for archiving.'
557 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
559 quantity_choices = ('acceleration', 'velocity', 'displacement')
560 parser.add_option(
561 '--output-quantity',
562 dest='output_quantity',
563 choices=quantity_choices,
564 help='deconvolve instrument transfer function. Choices: %s'
565 % ', '.join(quantity_choices))
567 parser.add_option(
568 '--restitution-frequency-band',
569 default='0.001,100.0',
570 dest='str_fmin_fmax',
571 metavar='FMIN,FMAX',
572 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
573 'Default: "%default"')
575 sample_snap_choices = ('shift', 'interpolate')
576 parser.add_option(
577 '--sample-snap',
578 dest='sample_snap',
579 choices=sample_snap_choices,
580 help='shift/interpolate traces so that samples are at even multiples '
581 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
583 (options, args) = parser.parse_args(args)
585 if len(args) == 0:
586 parser.print_help()
587 sys.exit(1)
589 if options.debug:
590 util.setup_logging(program_name, 'debug')
591 elif options.quiet:
592 util.setup_logging(program_name, 'warning')
593 else:
594 util.setup_logging(program_name, 'info')
596 def get_pile():
597 return pile.make_pile(
598 paths=args,
599 selector=None,
600 regex=options.regex,
601 fileformat=options.format,
602 cachedirname=options.cache_dir,
603 show_progress=not options.quiet)
605 return process(get_pile, options)
608if __name__ == '__main__':
609 main(sys.argv[1:])