1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import re
8import os
9import logging
10import signal
11import math
12from copy import copy
13from optparse import OptionParser, Option, OptionValueError
15import numpy as num
17from pyrocko import util, config, pile, model, io, trace
18from pyrocko.io import stationxml
20pjoin = os.path.join
22logger = logging.getLogger('pyrocko.apps.jackseis')
24program_name = 'jackseis'
26usage = program_name + ' <inputs> ... [options]'
28description = '''A simple tool to manipulate waveform archive data.'''
30tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
31tts = util.time_to_str
32stt = util.str_to_time_fillup
35def die(message):
36 sys.exit('%s: error: %s' % (program_name, message))
39name_to_dtype = {
40 'int32': num.int32,
41 'int64': num.int64,
42 'float32': num.float32,
43 'float64': num.float64}
46output_units = {
47 'acceleration': 'M/S**2',
48 'velocity': 'M/S',
49 'displacement': 'M'}
52def str_to_seconds(s):
53 if s.endswith('s'):
54 return float(s[:-1])
55 elif s.endswith('m'):
56 return float(s[:-1])*60.
57 elif s.endswith('h'):
58 return float(s[:-1])*3600.
59 elif s.endswith('d'):
60 return float(s[:-1])*3600.*24.
61 else:
62 return float(s)
65def nice_seconds_floor(s):
66 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
67 p = s
68 for x in nice:
69 if s < x:
70 return p
72 p = x
74 return s
77def check_record_length(option, opt, value):
78 try:
79 reclen = int(value)
80 if reclen in io.mseed.VALID_RECORD_LENGTHS:
81 return reclen
82 except Exception:
83 pass
84 raise OptionValueError(
85 'invalid record length %s. (choose from %s)'
86 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
89def check_steim(option, opt, value):
90 try:
91 compression = int(value)
92 if compression in (1, 2):
93 return compression
94 except Exception:
95 pass
96 raise OptionValueError(
97 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
100class JackseisOptions(Option):
101 TYPES = Option.TYPES + ('record_length', 'steim')
102 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
103 TYPE_CHECKER['record_length'] = check_record_length
104 TYPE_CHECKER['steim'] = check_steim
107def process(get_pile, options):
108 tinc = None
109 if options.tinc is not None:
110 if tinc in ('huge', 'auto'):
111 tinc = options.tinc
112 else:
113 try:
114 tinc = str_to_seconds(options.tinc)
115 except Exception:
116 die('invalid argument to --tinc')
118 tmin = None
119 if options.tmin is not None:
120 try:
121 tmin = stt(options.tmin)
122 except Exception:
123 die('invalid argument to --tmin. '
124 'Expected format is "%s" % tfmt')
126 tmax = None
127 if options.tmax is not None:
128 try:
129 tmax = stt(options.tmax)
130 except Exception:
131 die('invalid argument to --tmax. '
132 'Expected format is "%s"' % tfmt)
134 target_deltat = None
135 if options.downsample is not None:
136 try:
137 target_deltat = 1.0 / float(options.downsample)
138 except Exception:
139 die('invalid argument to --downsample')
141 replacements = []
142 for k, rename_k, in [
143 ('network', options.rename_network),
144 ('station', options.rename_station),
145 ('location', options.rename_location),
146 ('channel', options.rename_channel)]:
148 for patrep in rename_k:
149 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
150 if not m:
151 die('invalid argument to --rename-%s. '
152 'Expected format is /PATTERN/REPLACEMENT/' % k)
154 replacements.append((k, m.group(1), m.group(2)))
156 sx = None
157 if options.station_xml_fns:
158 sxs = []
159 for station_xml_fn in options.station_xml_fns:
160 sxs.append(stationxml.load_xml(filename=station_xml_fn))
162 sx = stationxml.primitive_merge(sxs)
164 events = []
165 for event_fn in options.event_fns:
166 events.extend(model.load_events(event_fn))
168 p = get_pile()
170 if p.tmin is None:
171 die('data selection is empty')
173 if tinc == 'auto':
174 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
176 if options.snap:
177 if tmin is None:
178 tmin = p.tmin
180 if tinc is not None:
181 tmin = int(math.floor(tmin / tinc)) * tinc
183 output_path = options.output_path
184 output_dir = options.output_dir
186 if output_path and not output_dir and os.path.isdir(output_path):
187 output_dir = output_path # compat. with old behaviour
189 if output_dir and not output_path:
190 output_path = 'trace_%(network)s-%(station)s-' \
191 '%(location)s-%(channel)s_%(wmin)s.' + \
192 options.output_format
194 if output_dir and output_path:
195 output_path = pjoin(output_dir, output_path)
197 if not output_path:
198 die('--output not given')
200 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
201 tfade_factor = 2.0
202 ffade_factor = 1.5
203 if options.output_quantity:
204 tpad = 2*tfade_factor/fmin
205 else:
206 tpad = 0.
208 if target_deltat is not None:
209 tpad += target_deltat * 10.
211 if tinc is None:
212 if ((tmax or p.tmax) - (tmin or p.tmin)) \
213 / p.get_deltatmin() > 100000000.:
214 die('use --tinc=huge to really produce such large output files '
215 'or use --tinc=INC to split into smaller files.')
217 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
219 if options.traversal == 'channel-by-channel':
220 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
222 elif options.traversal == 'station-by-station':
223 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
225 else:
226 it = p.chopper(**kwargs)
228 abort = []
230 def got_sigint(signum, frame):
231 abort.append(True)
233 old = signal.signal(signal.SIGINT, got_sigint)
235 save_kwargs = {}
236 if options.output_format == 'mseed':
237 save_kwargs['record_length'] = options.record_length
238 save_kwargs['steim'] = options.steim
240 def process_traces(batch):
241 traces = batch.traces
242 if traces:
243 twmin = batch.tmin
244 twmax = batch.tmax
245 logger.info('processing %s - %s, %i traces' %
246 (tts(twmin), tts(twmax), len(traces)))
248 if options.sample_snap:
249 for tr in traces:
250 tr.snap(interpolate=options.sample_snap == 'interpolate')
252 if target_deltat is not None:
253 out_traces = []
254 for tr in traces:
255 try:
256 tr.downsample_to(
257 target_deltat, snap=True, demean=False)
259 if options.output_data_type == 'same':
260 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
262 out_traces.append(tr)
264 except (trace.TraceTooShort, trace.NoData):
265 pass
267 traces = out_traces
269 if options.output_quantity:
270 out_traces = []
271 tfade = tfade_factor / fmin
272 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
273 for tr in traces:
274 try:
275 response = sx.get_pyrocko_response(
276 tr.nslc_id,
277 timespan=(tr.tmin, tr.tmax),
278 fake_input_units=output_units[
279 options.output_quantity])
281 rest_tr = tr.transfer(
282 tfade, ftap, response, invert=True, demean=True)
284 out_traces.append(rest_tr)
286 except stationxml.NoResponseInformation as e:
287 logger.warning(
288 'Cannot restitute: %s (no response)' % str(e))
290 except stationxml.MultipleResponseInformation as e:
291 logger.warning(
292 'Cannot restitute: %s (multiple responses found)'
293 % str(e))
295 except (trace.TraceTooShort, trace.NoData):
296 logger.warning(
297 'Trace too short: %s' % '.'.join(tr.nslc_id))
299 traces = out_traces
301 if options.scale is not None:
302 for tr in traces:
303 tr.ydata = options.scale * tr.ydata
305 if options.output_data_type != 'same':
306 for tr in traces:
307 tr.ydata = tr.ydata.astype(
308 name_to_dtype[options.output_data_type])
310 if replacements:
311 for tr in traces:
312 r = {}
313 for k, pat, repl in replacements:
314 oldval = getattr(tr, k)
315 newval, n = re.subn(pat, repl, oldval)
316 if n:
317 r[k] = newval
319 tr.set_codes(**r)
321 if output_path:
322 otraces = []
323 for tr in traces:
324 try:
325 otr = tr.chop(twmin, twmax, inplace=False)
326 otraces.append(otr)
327 except trace.NoData:
328 pass
330 try:
331 io.save(otraces, output_path, format=options.output_format,
332 overwrite=options.force,
333 additional=dict(
334 wmin_year=tts(twmin, format='%Y'),
335 wmin_month=tts(twmin, format='%m'),
336 wmin_day=tts(twmin, format='%d'),
337 wmin_jday=tts(twmin, format='%j'),
338 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
339 wmax_year=tts(twmax, format='%Y'),
340 wmax_month=tts(twmax, format='%m'),
341 wmax_day=tts(twmax, format='%d'),
342 wmax_jday=tts(twmax, format='%j'),
343 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
344 **save_kwargs)
345 except io.FileSaveError as e:
346 die(str(e))
348 for batch in it:
349 process_traces(batch)
350 if abort:
351 break
353 signal.signal(signal.SIGINT, old)
355 if abort:
356 die('interrupted.')
359def main(args=None):
360 if args is None:
361 args = sys.argv[1:]
363 parser = OptionParser(
364 usage=usage,
365 description=description,
366 option_class=JackseisOptions,
367 formatter=util.BetterHelpFormatter())
369 parser.add_option(
370 '--format',
371 dest='format',
372 default='detect',
373 choices=io.allowed_formats('load'),
374 help='assume input files are of given FORMAT. Choices: %s' %
375 io.allowed_formats('load', 'cli_help', 'detect'))
377 parser.add_option(
378 '--pattern',
379 dest='regex',
380 metavar='REGEX',
381 help='only include files whose paths match REGEX')
383 parser.add_option(
384 '--stationxml',
385 dest='station_xml_fns',
386 action='append',
387 default=[],
388 metavar='STATIONXML',
389 help='read station metadata from file STATIONXML')
391 parser.add_option(
392 '--event', '--events',
393 dest='event_fns',
394 action='append',
395 default=[],
396 metavar='EVENT',
397 help='read event information from file EVENT')
399 parser.add_option(
400 '--cache',
401 dest='cache_dir',
402 default=config.config().cache_dir,
403 metavar='DIR',
404 help='use directory DIR to cache trace metadata '
405 "(default='%default')")
407 parser.add_option(
408 '--quiet',
409 dest='quiet',
410 action='store_true',
411 default=False,
412 help='disable output of progress information')
414 parser.add_option(
415 '--debug',
416 dest='debug',
417 action='store_true',
418 default=False,
419 help='print debugging information to stderr')
421 parser.add_option(
422 '--tmin',
423 dest='tmin',
424 help='start time as "%s"' % tfmt)
426 parser.add_option(
427 '--tmax',
428 dest='tmax',
429 help='end time as "%s"' % tfmt)
431 parser.add_option(
432 '--tinc',
433 dest='tinc',
434 help='set time length of output files [s] or "auto" to automatically '
435 'choose an appropriate time increment or "huge" to allow to use '
436 'a lot of system memory to merge input traces into huge output '
437 'files.')
439 parser.add_option(
440 '--downsample',
441 dest='downsample',
442 metavar='RATE',
443 help='downsample to RATE [Hz]')
445 parser.add_option(
446 '--scale',
447 dest='scale',
448 type=float,
449 metavar='FACTOR',
450 help='scale seismograms with given FACTOR. Changes data type to '
451 'float64. Use --output-data-type to change again after scaling.')
453 parser.add_option(
454 '--output',
455 dest='output_path',
456 metavar='TEMPLATE',
457 help='set output path to TEMPLATE. Available placeholders '
458 'are %n: network, %s: station, %l: location, %c: channel, '
459 '%b: begin time, %e: end time, %j: julian day of year. The '
460 'following additional placeholders use the window begin and end '
461 'times rather than trace begin and end times (to suppress '
462 'producing many small files for gappy traces), %(wmin_year)s, '
463 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
464 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
465 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'")
467 parser.add_option(
468 '--output-dir',
469 metavar='TEMPLATE',
470 dest='output_dir',
471 help='set output directory to TEMPLATE (see --output for details) '
472 'and automatically choose filenames. '
473 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
475 parser.add_option(
476 '--output-format',
477 dest='output_format',
478 default='mseed',
479 choices=io.allowed_formats('save'),
480 help='set output file format. Choices: %s' %
481 io.allowed_formats('save', 'cli_help', 'mseed'))
483 parser.add_option(
484 '--force',
485 dest='force',
486 action='store_true',
487 default=False,
488 help='force overwriting of existing files')
490 parser.add_option(
491 '--no-snap',
492 dest='snap',
493 action='store_false',
494 default=True,
495 help='do not start output files at even multiples of file length')
497 parser.add_option(
498 '--traversal',
499 dest='traversal',
500 choices=('station-by-station', 'channel-by-channel', 'chronological'),
501 default='station-by-station',
502 help='set traversal order for traces processing. '
503 "Choices are 'station-by-station' [default], "
504 "'channel-by-channel', and 'chronological'. Chronological "
505 'traversal uses more processing memory but makes it possible to '
506 'join multiple stations into single output files')
508 parser.add_option(
509 '--rename-network',
510 action='append',
511 default=[],
512 dest='rename_network',
513 metavar='/PATTERN/REPLACEMENT/',
514 help='update network code, can be given more than once')
516 parser.add_option(
517 '--rename-station',
518 action='append',
519 default=[],
520 dest='rename_station',
521 metavar='/PATTERN/REPLACEMENT/',
522 help='update station code, can be given more than once')
524 parser.add_option(
525 '--rename-location',
526 action='append',
527 default=[],
528 dest='rename_location',
529 metavar='/PATTERN/REPLACEMENT/',
530 help='update location code, can be given more than once')
532 parser.add_option(
533 '--rename-channel',
534 action='append',
535 default=[],
536 dest='rename_channel',
537 metavar='/PATTERN/REPLACEMENT/',
538 help='update channel code, can be given more than once')
540 parser.add_option(
541 '--output-data-type',
542 dest='output_data_type',
543 choices=('same', 'int32', 'int64', 'float32', 'float64'),
544 default='same',
545 metavar='DTYPE',
546 help='set data type. Choices: same [default], int32, '
547 'int64, float32, float64. The output file format must support '
548 'the given type.')
550 parser.add_option(
551 '--output-steim',
552 dest='steim',
553 type='steim',
554 default=2,
555 metavar='STEIM_COMPRESSION',
556 help='set the mseed STEIM compression. Choices: 1 or 2. '
557 'Default is STEIM-2, which can compress full range int32. '
558 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
560 parser.add_option(
561 '--output-record-length',
562 dest='record_length',
563 type='record_length',
564 default=4096,
565 metavar='RECORD_LENGTH',
566 help='set the mseed record length in bytes. Choices: %s. '
567 'Default is 4096 bytes, which is commonly used for archiving.'
568 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
570 quantity_choices = ('acceleration', 'velocity', 'displacement')
571 parser.add_option(
572 '--output-quantity',
573 dest='output_quantity',
574 choices=quantity_choices,
575 help='deconvolve instrument transfer function. Choices: %s'
576 % ', '.join(quantity_choices))
578 parser.add_option(
579 '--restitution-frequency-band',
580 default='0.001,100.0',
581 dest='str_fmin_fmax',
582 metavar='FMIN,FMAX',
583 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
584 'Default: "%default"')
586 sample_snap_choices = ('shift', 'interpolate')
587 parser.add_option(
588 '--sample-snap',
589 dest='sample_snap',
590 choices=sample_snap_choices,
591 help='shift/interpolate traces so that samples are at even multiples '
592 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
594 (options, args) = parser.parse_args(args)
596 if len(args) == 0:
597 parser.print_help()
598 sys.exit(1)
600 if options.debug:
601 util.setup_logging(program_name, 'debug')
602 elif options.quiet:
603 util.setup_logging(program_name, 'warning')
604 else:
605 util.setup_logging(program_name, 'info')
607 def get_pile():
608 return pile.make_pile(
609 paths=args,
610 selector=None,
611 regex=options.regex,
612 fileformat=options.format,
613 cachedirname=options.cache_dir,
614 show_progress=not options.quiet)
616 return process(get_pile, options)
619if __name__ == '__main__':
620 main()