1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import re
8import os
9import logging
10import signal
11import math
12from copy import copy
13from optparse import OptionParser, Option, OptionValueError
15import numpy as num
17from pyrocko import util, config, pile, model, io, trace
18from pyrocko.io import stationxml
20pjoin = os.path.join
22logger = logging.getLogger('pyrocko.apps.jackseis')
24program_name = 'jackseis'
26usage = program_name + ' <inputs> ... [options]'
28description = '''A simple tool to manipulate waveform archive data.'''
30tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
31tts = util.time_to_str
32stt = util.str_to_time_fillup
35def die(message):
36 sys.exit('%s: error: %s' % (program_name, message))
39name_to_dtype = {
40 'int32': num.int32,
41 'int64': num.int64,
42 'float32': num.float32,
43 'float64': num.float64}
46output_units = {
47 'acceleration': 'M/S**2',
48 'velocity': 'M/S',
49 'displacement': 'M'}
52def str_to_seconds(s):
53 if s.endswith('s'):
54 return float(s[:-1])
55 elif s.endswith('m'):
56 return float(s[:-1])*60.
57 elif s.endswith('h'):
58 return float(s[:-1])*3600.
59 elif s.endswith('d'):
60 return float(s[:-1])*3600.*24.
61 else:
62 return float(s)
65def nice_seconds_floor(s):
66 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
67 p = s
68 for x in nice:
69 if s < x:
70 return p
72 p = x
74 return s
77def check_record_length(option, opt, value):
78 try:
79 reclen = int(value)
80 if reclen in io.mseed.VALID_RECORD_LENGTHS:
81 return reclen
82 except Exception:
83 pass
84 raise OptionValueError(
85 'invalid record length %s. (choose from %s)'
86 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
89def check_steim(option, opt, value):
90 try:
91 compression = int(value)
92 if compression in (1, 2):
93 return compression
94 except Exception:
95 pass
96 raise OptionValueError(
97 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
100class JackseisOptions(Option):
101 TYPES = Option.TYPES + ('record_length', 'steim')
102 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
103 TYPE_CHECKER['record_length'] = check_record_length
104 TYPE_CHECKER['steim'] = check_steim
107def process(get_pile, options):
108 tinc = None
109 if options.tinc is not None:
110 if tinc in ('huge', 'auto'):
111 tinc = options.tinc
112 else:
113 try:
114 tinc = str_to_seconds(options.tinc)
115 except Exception:
116 die('invalid argument to --tinc')
118 tmin = None
119 if options.tmin is not None:
120 try:
121 tmin = stt(options.tmin)
122 except Exception:
123 die('invalid argument to --tmin. '
124 'Expected format is "%s" % tfmt')
126 tmax = None
127 if options.tmax is not None:
128 try:
129 tmax = stt(options.tmax)
130 except Exception:
131 die('invalid argument to --tmax. '
132 'Expected format is "%s"' % tfmt)
134 target_deltat = None
135 if options.downsample is not None:
136 try:
137 target_deltat = 1.0 / float(options.downsample)
138 except Exception:
139 die('invalid argument to --downsample')
141 replacements = []
142 for k, rename_k, in [
143 ('network', options.rename_network),
144 ('station', options.rename_station),
145 ('location', options.rename_location),
146 ('channel', options.rename_channel)]:
148 for patrep in rename_k:
149 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
150 if not m:
151 die('invalid argument to --rename-%s. '
152 'Expected format is /PATTERN/REPLACEMENT/' % k)
154 replacements.append((k, m.group(1), m.group(2)))
156 sx = None
157 if options.station_xml_fns:
158 sxs = []
159 for station_xml_fn in options.station_xml_fns:
160 sxs.append(stationxml.load_xml(filename=station_xml_fn))
162 sx = stationxml.primitive_merge(sxs)
164 events = []
165 for event_fn in options.event_fns:
166 events.extend(model.load_events(event_fn))
168 p = get_pile()
170 if p.tmin is None:
171 die('data selection is empty')
173 if tinc == 'auto':
174 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
176 if options.snap:
177 if tmin is None:
178 tmin = p.tmin
180 if tinc is not None:
181 tmin = int(math.floor(tmin / tinc)) * tinc
183 output_path = options.output_path
184 output_dir = options.output_dir
186 if output_path and not output_dir and os.path.isdir(output_path):
187 output_dir = output_path # compat. with old behaviour
189 if output_dir and not output_path:
190 output_path = 'trace_%(network)s-%(station)s-' \
191 '%(location)s-%(channel)s_%(wmin)s.' + \
192 options.output_format
194 if output_dir and output_path:
195 output_path = pjoin(output_dir, output_path)
197 if not output_path:
198 die('--output not given')
200 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
201 tfade_factor = 2.0
202 ffade_factor = 1.5
203 if options.output_quantity:
204 tpad = 2*tfade_factor/fmin
205 else:
206 tpad = 0.
208 if target_deltat is not None:
209 tpad += target_deltat * 10.
211 if tinc is None:
212 if ((tmax or p.tmax) - (tmin or p.tmin)) \
213 / p.get_deltatmin() > 100000000.:
214 die('use --tinc=huge to really produce such large output files '
215 'or use --tinc=INC to split into smaller files.')
217 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
219 if options.traversal == 'channel-by-channel':
220 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
222 elif options.traversal == 'station-by-station':
223 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
225 else:
226 it = p.chopper(**kwargs)
228 abort = []
230 def got_sigint(signum, frame):
231 abort.append(True)
233 old = signal.signal(signal.SIGINT, got_sigint)
235 save_kwargs = {}
236 if options.output_format == 'mseed':
237 save_kwargs['record_length'] = options.record_length
238 save_kwargs['steim'] = options.steim
240 def process_traces(batch):
241 traces = batch.traces
242 if traces:
243 twmin = batch.tmin
244 twmax = batch.tmax
245 logger.info('processing %s - %s, %i traces' %
246 (tts(twmin), tts(twmax), len(traces)))
248 if options.sample_snap:
249 for tr in traces:
250 tr.snap(interpolate=options.sample_snap == 'interpolate')
252 if target_deltat is not None:
253 out_traces = []
254 for tr in traces:
255 try:
256 tr.downsample_to(
257 target_deltat,
258 snap=True,
259 demean=False,
260 allow_upsample_max=4)
262 if options.output_data_type == 'same':
263 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
265 out_traces.append(tr)
267 except (trace.TraceTooShort, trace.NoData):
268 pass
270 traces = out_traces
272 if options.output_quantity:
273 out_traces = []
274 tfade = tfade_factor / fmin
275 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
276 for tr in traces:
277 try:
278 response = sx.get_pyrocko_response(
279 tr.nslc_id,
280 timespan=(tr.tmin, tr.tmax),
281 fake_input_units=output_units[
282 options.output_quantity])
284 rest_tr = tr.transfer(
285 tfade, ftap, response, invert=True, demean=True)
287 out_traces.append(rest_tr)
289 except stationxml.NoResponseInformation as e:
290 logger.warning(
291 'Cannot restitute: %s (no response)' % str(e))
293 except stationxml.MultipleResponseInformation as e:
294 logger.warning(
295 'Cannot restitute: %s (multiple responses found)'
296 % str(e))
298 except (trace.TraceTooShort, trace.NoData):
299 logger.warning(
300 'Trace too short: %s' % '.'.join(tr.nslc_id))
302 traces = out_traces
304 if options.scale is not None:
305 for tr in traces:
306 tr.ydata = options.scale * tr.ydata
308 if options.output_data_type != 'same':
309 for tr in traces:
310 tr.ydata = tr.ydata.astype(
311 name_to_dtype[options.output_data_type])
313 if replacements:
314 for tr in traces:
315 r = {}
316 for k, pat, repl in replacements:
317 oldval = getattr(tr, k)
318 newval, n = re.subn(pat, repl, oldval)
319 if n:
320 r[k] = newval
322 tr.set_codes(**r)
324 if output_path:
325 otraces = []
326 for tr in traces:
327 try:
328 otr = tr.chop(twmin, twmax, inplace=False)
329 otraces.append(otr)
330 except trace.NoData:
331 pass
333 try:
334 io.save(otraces, output_path, format=options.output_format,
335 overwrite=options.force,
336 additional=dict(
337 wmin_year=tts(twmin, format='%Y'),
338 wmin_month=tts(twmin, format='%m'),
339 wmin_day=tts(twmin, format='%d'),
340 wmin_jday=tts(twmin, format='%j'),
341 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
342 wmax_year=tts(twmax, format='%Y'),
343 wmax_month=tts(twmax, format='%m'),
344 wmax_day=tts(twmax, format='%d'),
345 wmax_jday=tts(twmax, format='%j'),
346 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
347 **save_kwargs)
348 except io.FileSaveError as e:
349 die(str(e))
351 for batch in it:
352 process_traces(batch)
353 if abort:
354 break
356 signal.signal(signal.SIGINT, old)
358 if abort:
359 die('interrupted.')
362def main(args=None):
363 if args is None:
364 args = sys.argv[1:]
366 parser = OptionParser(
367 usage=usage,
368 description=description,
369 option_class=JackseisOptions,
370 formatter=util.BetterHelpFormatter())
372 parser.add_option(
373 '--format',
374 dest='format',
375 default='detect',
376 choices=io.allowed_formats('load'),
377 help='assume input files are of given FORMAT. Choices: %s' %
378 io.allowed_formats('load', 'cli_help', 'detect'))
380 parser.add_option(
381 '--pattern',
382 dest='regex',
383 metavar='REGEX',
384 help='only include files whose paths match REGEX')
386 parser.add_option(
387 '--stationxml',
388 dest='station_xml_fns',
389 action='append',
390 default=[],
391 metavar='STATIONXML',
392 help='read station metadata from file STATIONXML')
394 parser.add_option(
395 '--event', '--events',
396 dest='event_fns',
397 action='append',
398 default=[],
399 metavar='EVENT',
400 help='read event information from file EVENT')
402 parser.add_option(
403 '--cache',
404 dest='cache_dir',
405 default=config.config().cache_dir,
406 metavar='DIR',
407 help='use directory DIR to cache trace metadata '
408 "(default='%default')")
410 parser.add_option(
411 '--quiet',
412 dest='quiet',
413 action='store_true',
414 default=False,
415 help='disable output of progress information')
417 parser.add_option(
418 '--debug',
419 dest='debug',
420 action='store_true',
421 default=False,
422 help='print debugging information to stderr')
424 parser.add_option(
425 '--tmin',
426 dest='tmin',
427 help='start time as "%s"' % tfmt)
429 parser.add_option(
430 '--tmax',
431 dest='tmax',
432 help='end time as "%s"' % tfmt)
434 parser.add_option(
435 '--tinc',
436 dest='tinc',
437 help='set time length of output files [s] or "auto" to automatically '
438 'choose an appropriate time increment or "huge" to allow to use '
439 'a lot of system memory to merge input traces into huge output '
440 'files.')
442 parser.add_option(
443 '--downsample',
444 dest='downsample',
445 metavar='RATE',
446 help='downsample to RATE [Hz]')
448 parser.add_option(
449 '--scale',
450 dest='scale',
451 type=float,
452 metavar='FACTOR',
453 help='scale seismograms with given FACTOR. Changes data type to '
454 'float64. Use --output-data-type to change again after scaling.')
456 parser.add_option(
457 '--output',
458 dest='output_path',
459 metavar='TEMPLATE',
460 help='set output path to TEMPLATE. Available placeholders '
461 'are %n: network, %s: station, %l: location, %c: channel, '
462 '%b: begin time, %e: end time, %j: julian day of year. The '
463 'following additional placeholders use the window begin and end '
464 'times rather than trace begin and end times (to suppress '
465 'producing many small files for gappy traces), %(wmin_year)s, '
466 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
467 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
468 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'")
470 parser.add_option(
471 '--output-dir',
472 metavar='TEMPLATE',
473 dest='output_dir',
474 help='set output directory to TEMPLATE (see --output for details) '
475 'and automatically choose filenames. '
476 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
478 parser.add_option(
479 '--output-format',
480 dest='output_format',
481 default='mseed',
482 choices=io.allowed_formats('save'),
483 help='set output file format. Choices: %s' %
484 io.allowed_formats('save', 'cli_help', 'mseed'))
486 parser.add_option(
487 '--force',
488 dest='force',
489 action='store_true',
490 default=False,
491 help='force overwriting of existing files')
493 parser.add_option(
494 '--no-snap',
495 dest='snap',
496 action='store_false',
497 default=True,
498 help='do not start output files at even multiples of file length')
500 parser.add_option(
501 '--traversal',
502 dest='traversal',
503 choices=('station-by-station', 'channel-by-channel', 'chronological'),
504 default='station-by-station',
505 help='set traversal order for traces processing. '
506 "Choices are 'station-by-station' [default], "
507 "'channel-by-channel', and 'chronological'. Chronological "
508 'traversal uses more processing memory but makes it possible to '
509 'join multiple stations into single output files')
511 parser.add_option(
512 '--rename-network',
513 action='append',
514 default=[],
515 dest='rename_network',
516 metavar='/PATTERN/REPLACEMENT/',
517 help='update network code, can be given more than once')
519 parser.add_option(
520 '--rename-station',
521 action='append',
522 default=[],
523 dest='rename_station',
524 metavar='/PATTERN/REPLACEMENT/',
525 help='update station code, can be given more than once')
527 parser.add_option(
528 '--rename-location',
529 action='append',
530 default=[],
531 dest='rename_location',
532 metavar='/PATTERN/REPLACEMENT/',
533 help='update location code, can be given more than once')
535 parser.add_option(
536 '--rename-channel',
537 action='append',
538 default=[],
539 dest='rename_channel',
540 metavar='/PATTERN/REPLACEMENT/',
541 help='update channel code, can be given more than once')
543 parser.add_option(
544 '--output-data-type',
545 dest='output_data_type',
546 choices=('same', 'int32', 'int64', 'float32', 'float64'),
547 default='same',
548 metavar='DTYPE',
549 help='set data type. Choices: same [default], int32, '
550 'int64, float32, float64. The output file format must support '
551 'the given type.')
553 parser.add_option(
554 '--output-steim',
555 dest='steim',
556 type='steim',
557 default=2,
558 metavar='STEIM_COMPRESSION',
559 help='set the mseed STEIM compression. Choices: 1 or 2. '
560 'Default is STEIM-2, which can compress full range int32. '
561 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
563 parser.add_option(
564 '--output-record-length',
565 dest='record_length',
566 type='record_length',
567 default=4096,
568 metavar='RECORD_LENGTH',
569 help='set the mseed record length in bytes. Choices: %s. '
570 'Default is 4096 bytes, which is commonly used for archiving.'
571 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
573 quantity_choices = ('acceleration', 'velocity', 'displacement')
574 parser.add_option(
575 '--output-quantity',
576 dest='output_quantity',
577 choices=quantity_choices,
578 help='deconvolve instrument transfer function. Choices: %s'
579 % ', '.join(quantity_choices))
581 parser.add_option(
582 '--restitution-frequency-band',
583 default='0.001,100.0',
584 dest='str_fmin_fmax',
585 metavar='FMIN,FMAX',
586 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
587 'Default: "%default"')
589 sample_snap_choices = ('shift', 'interpolate')
590 parser.add_option(
591 '--sample-snap',
592 dest='sample_snap',
593 choices=sample_snap_choices,
594 help='shift/interpolate traces so that samples are at even multiples '
595 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
597 (options, args) = parser.parse_args(args)
599 if len(args) == 0:
600 parser.print_help()
601 sys.exit(1)
603 if options.debug:
604 util.setup_logging(program_name, 'debug')
605 elif options.quiet:
606 util.setup_logging(program_name, 'warning')
607 else:
608 util.setup_logging(program_name, 'info')
610 def get_pile():
611 return pile.make_pile(
612 paths=args,
613 selector=None,
614 regex=options.regex,
615 fileformat=options.format,
616 cachedirname=options.cache_dir,
617 show_progress=not options.quiet)
619 return process(get_pile, options)
622if __name__ == '__main__':
623 main()