1#!/usr/bin/env python
2# http://pyrocko.org - GPLv3
3#
4# The Pyrocko Developers, 21st Century
5# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, division
8import sys
9import re
10import os
11import logging
12import signal
13import math
14from copy import copy
15from optparse import OptionParser, Option, OptionValueError
17import numpy as num
19from pyrocko import util, config, pile, model, io, trace
20from pyrocko.io import stationxml
22pjoin = os.path.join
24logger = logging.getLogger('pyrocko.apps.jackseis')
26program_name = 'jackseis'
28usage = program_name + ' <inputs> ... [options]'
30description = '''A simple tool to manipulate waveform archive data.'''
32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
33tts = util.time_to_str
34stt = util.str_to_time
37def die(message):
38 sys.exit('%s: error: %s' % (program_name, message))
41name_to_dtype = {
42 'int32': num.int32,
43 'int64': num.int64,
44 'float32': num.float32,
45 'float64': num.float64}
48output_units = {
49 'acceleration': 'M/S**2',
50 'velocity': 'M/S',
51 'displacement': 'M'}
54def str_to_seconds(s):
55 if s.endswith('s'):
56 return float(s[:-1])
57 elif s.endswith('m'):
58 return float(s[:-1])*60.
59 elif s.endswith('h'):
60 return float(s[:-1])*3600.
61 elif s.endswith('d'):
62 return float(s[:-1])*3600.*24.
63 else:
64 return float(s)
67def nice_seconds_floor(s):
68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
69 p = s
70 for x in nice:
71 if s < x:
72 return p
74 p = x
76 return s
79def check_record_length(option, opt, value):
80 try:
81 reclen = int(value)
82 if reclen in io.mseed.VALID_RECORD_LENGTHS:
83 return reclen
84 except Exception:
85 pass
86 raise OptionValueError(
87 'invalid record length %s. (choose from %s)'
88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
91def check_steim(option, opt, value):
92 try:
93 compression = int(value)
94 if compression in (1, 2):
95 return compression
96 except Exception:
97 pass
98 raise OptionValueError(
99 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
102class JackseisOptions(Option):
103 TYPES = Option.TYPES + ('record_length', 'steim')
104 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
105 TYPE_CHECKER['record_length'] = check_record_length
106 TYPE_CHECKER['steim'] = check_steim
109def main(args=None):
110 if args is None:
111 args = sys.argv[1:]
113 parser = OptionParser(
114 usage=usage,
115 description=description,
116 option_class=JackseisOptions,
117 formatter=util.BetterHelpFormatter())
119 parser.add_option(
120 '--format',
121 dest='format',
122 default='detect',
123 choices=io.allowed_formats('load'),
124 help='assume input files are of given FORMAT. Choices: %s' %
125 io.allowed_formats('load', 'cli_help', 'detect'))
127 parser.add_option(
128 '--pattern',
129 dest='regex',
130 metavar='REGEX',
131 help='only include files whose paths match REGEX')
133 parser.add_option(
134 '--stationxml',
135 dest='station_xml_fns',
136 action='append',
137 default=[],
138 metavar='STATIONXML',
139 help='read station metadata from file STATIONXML')
141 parser.add_option(
142 '--event', '--events',
143 dest='event_fns',
144 action='append',
145 default=[],
146 metavar='EVENT',
147 help='read event information from file EVENT')
149 parser.add_option(
150 '--cache',
151 dest='cache_dir',
152 default=config.config().cache_dir,
153 metavar='DIR',
154 help='use directory DIR to cache trace metadata '
155 '(default=\'%default\')')
157 parser.add_option(
158 '--quiet',
159 dest='quiet',
160 action='store_true',
161 default=False,
162 help='disable output of progress information')
164 parser.add_option(
165 '--debug',
166 dest='debug',
167 action='store_true',
168 default=False,
169 help='print debugging information to stderr')
171 parser.add_option(
172 '--tmin',
173 dest='tmin',
174 help='start time as "%s"' % tfmt)
176 parser.add_option(
177 '--tmax',
178 dest='tmax',
179 help='end time as "%s"' % tfmt)
181 parser.add_option(
182 '--tinc',
183 dest='tinc',
184 help='set time length of output files [s] or "auto" to automatically '
185 'choose an appropriate time increment or "huge" to allow to use '
186 'a lot of system memory to merge input traces into huge output '
187 'files.')
189 parser.add_option(
190 '--downsample',
191 dest='downsample',
192 metavar='RATE',
193 help='downsample to RATE [Hz]')
195 parser.add_option(
196 '--scale',
197 dest='scale',
198 type=float,
199 metavar='FACTOR',
200 help='scale seismograms with given FACTOR. Changes data type to '
201 'float64. Use --output-data-type to change again after scaling.')
203 parser.add_option(
204 '--output',
205 dest='output_path',
206 metavar='TEMPLATE',
207 help='set output path to TEMPLATE. Available placeholders '
208 'are %n: network, %s: station, %l: location, %c: channel, '
209 '%b: begin time, %e: end time, %j: julian day of year. The '
210 'following additional placeholders use the window begin and end '
211 'times rather than trace begin and end times (to suppress '
212 'producing many small files for gappy traces), %(wmin_year)s, '
213 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
214 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
215 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'')
217 parser.add_option(
218 '--output-dir',
219 metavar='TEMPLATE',
220 dest='output_dir',
221 help='set output directory to TEMPLATE (see --output for details) '
222 'and automatically choose filenames. '
223 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
225 parser.add_option(
226 '--output-format',
227 dest='output_format',
228 default='mseed',
229 choices=io.allowed_formats('save'),
230 help='set output file format. Choices: %s' %
231 io.allowed_formats('save', 'cli_help', 'mseed'))
233 parser.add_option(
234 '--force',
235 dest='force',
236 action='store_true',
237 default=False,
238 help='force overwriting of existing files')
240 parser.add_option(
241 '--no-snap',
242 dest='snap',
243 action='store_false',
244 default=True,
245 help='do not start output files at even multiples of file length')
247 parser.add_option(
248 '--traversal',
249 dest='traversal',
250 choices=('station-by-station', 'channel-by-channel', 'chronological'),
251 default='station-by-station',
252 help='set traversal order for traces processing. '
253 'Choices are \'station-by-station\' [default], '
254 '\'channel-by-channel\', and \'chronological\'. Chronological '
255 'traversal uses more processing memory but makes it possible to '
256 'join multiple stations into single output files')
258 parser.add_option(
259 '--rename-network',
260 action='append',
261 default=[],
262 dest='rename_network',
263 metavar='/PATTERN/REPLACEMENT/',
264 help='update network code, can be given more than once')
266 parser.add_option(
267 '--rename-station',
268 action='append',
269 default=[],
270 dest='rename_station',
271 metavar='/PATTERN/REPLACEMENT/',
272 help='update station code, can be given more than once')
274 parser.add_option(
275 '--rename-location',
276 action='append',
277 default=[],
278 dest='rename_location',
279 metavar='/PATTERN/REPLACEMENT/',
280 help='update location code, can be given more than once')
282 parser.add_option(
283 '--rename-channel',
284 action='append',
285 default=[],
286 dest='rename_channel',
287 metavar='/PATTERN/REPLACEMENT/',
288 help='update channel code, can be given more than once')
290 parser.add_option(
291 '--output-data-type',
292 dest='output_data_type',
293 choices=('same', 'int32', 'int64', 'float32', 'float64'),
294 default='same',
295 metavar='DTYPE',
296 help='set data type. Choices: same [default], int32, '
297 'int64, float32, float64. The output file format must support '
298 'the given type.')
300 parser.add_option(
301 '--output-steim',
302 dest='steim',
303 type='steim',
304 default=2,
305 metavar='STEIM_COMPRESSION',
306 help='set the mseed STEIM compression. Choices: 1 or 2. '
307 'Default is STEIM-2, which can compress full range int32. '
308 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
310 parser.add_option(
311 '--output-record-length',
312 dest='record_length',
313 type='record_length',
314 default=4096,
315 metavar='RECORD_LENGTH',
316 help='set the mseed record length in bytes. Choices: %s. '
317 'Default is 4096 bytes, which is commonly used for archiving.'
318 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
320 quantity_choices = ('acceleration', 'velocity', 'displacement')
321 parser.add_option(
322 '--output-quantity',
323 dest='output_quantity',
324 choices=quantity_choices,
325 help='deconvolve instrument transfer function. Choices: %s'
326 % ', '.join(quantity_choices))
328 parser.add_option(
329 '--restitution-frequency-band',
330 default='0.001,100.0',
331 dest='str_fmin_fmax',
332 metavar='FMIN,FMAX',
333 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
334 'Default: "%default"')
336 sample_snap_choices = ('shift', 'interpolate')
337 parser.add_option(
338 '--sample-snap',
339 dest='sample_snap',
340 choices=sample_snap_choices,
341 help='shift/interpolate traces so that samples are at even multiples '
342 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
344 (options, args) = parser.parse_args(args)
346 if len(args) == 0:
347 parser.print_help()
348 sys.exit(1)
350 if options.debug:
351 util.setup_logging(program_name, 'debug')
352 elif options.quiet:
353 util.setup_logging(program_name, 'warning')
354 else:
355 util.setup_logging(program_name, 'info')
357 tinc = None
358 if options.tinc is not None:
359 if tinc in ('huge', 'auto'):
360 tinc = options.tinc
361 else:
362 try:
363 tinc = str_to_seconds(options.tinc)
364 except Exception:
365 die('invalid argument to --tinc')
367 tmin = None
368 if options.tmin is not None:
369 try:
370 tmin = stt(options.tmin)
371 except Exception:
372 die('invalid argument to --tmin. '
373 'Expected format is ""')
375 tmax = None
376 if options.tmax is not None:
377 try:
378 tmax = stt(options.tmax)
379 except Exception:
380 die('invalid argument to --tmax. '
381 'Expected format is "%s"' % tfmt)
383 target_deltat = None
384 if options.downsample is not None:
385 try:
386 target_deltat = 1.0 / float(options.downsample)
387 except Exception:
388 die('invalid argument to --downsample')
390 replacements = []
391 for k, rename_k, in [
392 ('network', options.rename_network),
393 ('station', options.rename_station),
394 ('location', options.rename_location),
395 ('channel', options.rename_channel)]:
397 for patrep in rename_k:
398 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
399 if not m:
400 die('invalid argument to --rename-%s. '
401 'Expected format is /PATTERN/REPLACEMENT/' % k)
403 replacements.append((k, m.group(1), m.group(2)))
405 sx = None
406 if options.station_xml_fns:
407 sxs = []
408 for station_xml_fn in options.station_xml_fns:
409 sxs.append(stationxml.load_xml(filename=station_xml_fn))
411 sx = stationxml.primitive_merge(sxs)
413 events = []
414 for event_fn in options.event_fns:
415 events.extend(model.load_events(event_fn))
417 p = pile.make_pile(
418 paths=args,
419 selector=None,
420 regex=options.regex,
421 fileformat=options.format,
422 cachedirname=options.cache_dir,
423 show_progress=not options.quiet)
425 if p.tmin is None:
426 die('data selection is empty')
428 if tinc == 'auto':
429 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
431 if options.snap:
432 if tmin is None:
433 tmin = p.tmin
435 if tinc is not None:
436 tmin = int(math.floor(tmin / tinc)) * tinc
438 output_path = options.output_path
439 output_dir = options.output_dir
441 if output_path and not output_dir and os.path.isdir(output_path):
442 output_dir = output_path # compat. with old behaviour
444 if output_dir and not output_path:
445 output_path = 'trace_%(network)s-%(station)s-' \
446 '%(location)s-%(channel)s_%(wmin)s.' + \
447 options.output_format
449 if output_dir and output_path:
450 output_path = pjoin(output_dir, output_path)
452 if not output_path:
453 die('--output not given')
455 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
456 tfade_factor = 2.0
457 ffade_factor = 1.5
458 if options.output_quantity:
459 tpad = 2*tfade_factor/fmin
460 else:
461 tpad = 0.
463 if target_deltat is not None:
464 tpad += target_deltat * 10.
466 if tinc is None:
467 if ((tmax or p.tmax) - (tmin or p.tmin)) \
468 / p.get_deltatmin() > 100000000.:
469 die('use --tinc=huge to really produce such large output files '
470 'or use --tinc=INC to split into smaller files.')
472 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
474 if options.traversal == 'channel-by-channel':
475 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
477 elif options.traversal == 'station-by-station':
478 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
480 else:
481 it = p.chopper(**kwargs)
483 abort = []
485 def got_sigint(signum, frame):
486 abort.append(True)
488 old = signal.signal(signal.SIGINT, got_sigint)
490 save_kwargs = {}
491 if options.output_format == 'mseed':
492 save_kwargs['record_length'] = options.record_length
493 save_kwargs['steim'] = options.steim
495 for batch in it:
496 traces = batch.traces
497 if traces:
498 twmin = batch.tmin
499 twmax = batch.tmax
500 logger.info('processing %s - %s, %i traces' %
501 (tts(twmin), tts(twmax), len(traces)))
503 if options.sample_snap:
504 for tr in traces:
505 tr.snap(interpolate=options.sample_snap == 'interpolate')
507 if target_deltat is not None:
508 out_traces = []
509 for tr in traces:
510 try:
511 tr.downsample_to(
512 target_deltat, snap=True, demean=False)
514 if options.output_data_type == 'same':
515 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
517 out_traces.append(tr)
519 except (trace.TraceTooShort, trace.NoData):
520 pass
522 traces = out_traces
524 if options.output_quantity:
525 out_traces = []
526 tfade = tfade_factor / fmin
527 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
528 for tr in traces:
529 try:
530 response = sx.get_pyrocko_response(
531 tr.nslc_id,
532 timespan=(tr.tmin, tr.tmax),
533 fake_input_units=output_units[
534 options.output_quantity])
536 rest_tr = tr.transfer(
537 tfade, ftap, response, invert=True, demean=True)
539 out_traces.append(rest_tr)
541 except stationxml.NoResponseInformation as e:
542 logger.warn(
543 'Cannot restitute: %s (no response)' % str(e))
545 except stationxml.MultipleResponseInformation as e:
546 logger.warn(
547 'Cannot restitute: %s (multiple responses found)'
548 % str(e))
550 except (trace.TraceTooShort, trace.NoData):
551 logger.warn(
552 'Trace too short: %s' % '.'.join(tr.nslc_id))
554 traces = out_traces
556 if options.scale is not None:
557 for tr in traces:
558 tr.ydata = options.scale * tr.ydata
560 if options.output_data_type != 'same':
561 for tr in traces:
562 tr.ydata = tr.ydata.astype(
563 name_to_dtype[options.output_data_type])
565 if replacements:
566 for tr in traces:
567 r = {}
568 for k, pat, repl in replacements:
569 oldval = getattr(tr, k)
570 newval, n = re.subn(pat, repl, oldval)
571 if n:
572 r[k] = newval
574 tr.set_codes(**r)
576 if output_path:
577 otraces = []
578 for tr in traces:
579 try:
580 otr = tr.chop(twmin, twmax, inplace=False)
581 otraces.append(otr)
582 except trace.NoData:
583 pass
585 try:
586 io.save(otraces, output_path, format=options.output_format,
587 overwrite=options.force,
588 additional=dict(
589 wmin_year=tts(twmin, format='%Y'),
590 wmin_month=tts(twmin, format='%m'),
591 wmin_day=tts(twmin, format='%d'),
592 wmin_jday=tts(twmin, format='%j'),
593 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
594 wmax_year=tts(twmax, format='%Y'),
595 wmax_month=tts(twmax, format='%m'),
596 wmax_day=tts(twmax, format='%d'),
597 wmax_jday=tts(twmax, format='%j'),
598 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
599 **save_kwargs)
600 except io.FileSaveError as e:
601 die(str(e))
603 if abort:
604 break
606 signal.signal(signal.SIGINT, old)
608 if abort:
609 die('interrupted.')
612if __name__ == '__main__':
613 main(sys.argv[1:])