Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/apps/jackseis.py: 25%
265 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 15:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 15:01 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Jackseis - manipulate seismic waveform archives.
8'''
10import sys
11import re
12import os
13import logging
14import signal
15import math
16from copy import copy
17from optparse import OptionParser, Option, OptionValueError
19import numpy as num
21from pyrocko import util, config, pile, model, io, trace
22from pyrocko.io import stationxml
24pjoin = os.path.join
26logger = logging.getLogger('pyrocko.apps.jackseis')
28program_name = 'jackseis'
30usage = program_name + ' <inputs> ... [options]'
32description = '''A simple tool to manipulate waveform archive data.'''
34tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]'
35tts = util.time_to_str
36stt = util.str_to_time_fillup
39def die(message):
40 sys.exit('%s: error: %s' % (program_name, message))
43name_to_dtype = {
44 'int32': num.int32,
45 'int64': num.int64,
46 'float32': num.float32,
47 'float64': num.float64}
50output_units = {
51 'acceleration': 'M/S**2',
52 'velocity': 'M/S',
53 'displacement': 'M'}
56def str_to_seconds(s):
57 if s.endswith('s'):
58 return float(s[:-1])
59 elif s.endswith('m'):
60 return float(s[:-1])*60.
61 elif s.endswith('h'):
62 return float(s[:-1])*3600.
63 elif s.endswith('d'):
64 return float(s[:-1])*3600.*24.
65 else:
66 return float(s)
69def nice_seconds_floor(s):
70 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.]
71 p = s
72 for x in nice:
73 if s < x:
74 return p
76 p = x
78 return s
81def check_record_length(option, opt, value):
82 try:
83 reclen = int(value)
84 if reclen in io.mseed.VALID_RECORD_LENGTHS:
85 return reclen
86 except Exception:
87 pass
88 raise OptionValueError(
89 'invalid record length %s. (choose from %s)'
90 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)))
93def check_steim(option, opt, value):
94 try:
95 compression = int(value)
96 if compression in (1, 2):
97 return compression
98 except Exception:
99 pass
100 raise OptionValueError(
101 'invalid STEIM compression %s. (choose from 1, 2)' % compression)
104class JackseisOptions(Option):
105 TYPES = Option.TYPES + ('record_length', 'steim')
106 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
107 TYPE_CHECKER['record_length'] = check_record_length
108 TYPE_CHECKER['steim'] = check_steim
111def process(get_pile, options):
112 tinc = None
113 if options.tinc is not None:
114 if tinc in ('huge', 'auto'):
115 tinc = options.tinc
116 else:
117 try:
118 tinc = str_to_seconds(options.tinc)
119 except Exception:
120 die('invalid argument to --tinc')
122 tmin = None
123 if options.tmin is not None:
124 try:
125 tmin = stt(options.tmin)
126 except Exception:
127 die('invalid argument to --tmin. '
128 'Expected format is "%s" % tfmt')
130 tmax = None
131 if options.tmax is not None:
132 try:
133 tmax = stt(options.tmax)
134 except Exception:
135 die('invalid argument to --tmax. '
136 'Expected format is "%s"' % tfmt)
138 target_deltat = None
139 if options.downsample is not None:
140 try:
141 target_deltat = 1.0 / float(options.downsample)
142 except Exception:
143 die('invalid argument to --downsample')
145 replacements = []
146 for k, rename_k, in [
147 ('network', options.rename_network),
148 ('station', options.rename_station),
149 ('location', options.rename_location),
150 ('channel', options.rename_channel)]:
152 for patrep in rename_k:
153 m = re.match(r'/([^/]+)/([^/]*)/', patrep)
154 if not m:
155 die('invalid argument to --rename-%s. '
156 'Expected format is /PATTERN/REPLACEMENT/' % k)
158 replacements.append((k, m.group(1), m.group(2)))
160 sx = None
161 if options.station_xml_fns:
162 sxs = []
163 for station_xml_fn in options.station_xml_fns:
164 sxs.append(stationxml.load_xml(filename=station_xml_fn))
166 sx = stationxml.primitive_merge(sxs)
168 events = []
169 for event_fn in options.event_fns:
170 events.extend(model.load_events(event_fn))
172 p = get_pile()
174 if p.tmin is None:
175 die('data selection is empty')
177 if tinc == 'auto':
178 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.)
180 if options.snap:
181 if tmin is None:
182 tmin = p.tmin
184 if tinc is not None:
185 tmin = int(math.floor(tmin / tinc)) * tinc
187 output_path = options.output_path
188 output_dir = options.output_dir
190 if output_path and not output_dir and os.path.isdir(output_path):
191 output_dir = output_path # compat. with old behaviour
193 if output_dir and not output_path:
194 output_path = 'trace_%(network)s-%(station)s-' \
195 '%(location)s-%(channel)s_%(wmin)s.' + \
196 options.output_format
198 if output_dir and output_path:
199 output_path = pjoin(output_dir, output_path)
201 if not output_path:
202 die('--output not given')
204 fmin, fmax = map(float, options.str_fmin_fmax.split(','))
205 tfade_factor = 2.0
206 ffade_factor = 1.5
207 if options.output_quantity:
208 tpad = 2*tfade_factor/fmin
209 else:
210 tpad = 0.
212 if target_deltat is not None:
213 tpad += target_deltat * 10.
215 if tinc is None:
216 if ((tmax or p.tmax) - (tmin or p.tmin)) \
217 / p.get_deltatmin() > 100000000.:
218 die('use --tinc=huge to really produce such large output files '
219 'or use --tinc=INC to split into smaller files.')
221 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch')
223 if options.traversal == 'channel-by-channel':
224 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs)
226 elif options.traversal == 'station-by-station':
227 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs)
229 else:
230 it = p.chopper(**kwargs)
232 abort = []
234 def got_sigint(signum, frame):
235 abort.append(True)
237 old = signal.signal(signal.SIGINT, got_sigint)
239 save_kwargs = {}
240 if options.output_format == 'mseed':
241 save_kwargs['record_length'] = options.record_length
242 save_kwargs['steim'] = options.steim
244 def process_traces(batch):
245 traces = batch.traces
246 if traces:
247 twmin = batch.tmin
248 twmax = batch.tmax
249 logger.info('processing %s - %s, %i traces' %
250 (tts(twmin), tts(twmax), len(traces)))
252 if options.sample_snap:
253 for tr in traces:
254 tr.snap(interpolate=options.sample_snap == 'interpolate')
256 if target_deltat is not None:
257 out_traces = []
258 for tr in traces:
259 try:
260 tr.downsample_to(
261 target_deltat,
262 snap=True,
263 demean=False,
264 allow_upsample_max=4)
266 if options.output_data_type == 'same':
267 tr.ydata = tr.ydata.astype(tr.ydata.dtype)
269 out_traces.append(tr)
271 except (trace.TraceTooShort, trace.NoData):
272 pass
274 traces = out_traces
276 if options.output_quantity:
277 out_traces = []
278 tfade = tfade_factor / fmin
279 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax)
280 for tr in traces:
281 try:
282 response = sx.get_pyrocko_response(
283 tr.nslc_id,
284 timespan=(tr.tmin, tr.tmax),
285 fake_input_units=output_units[
286 options.output_quantity])
288 rest_tr = tr.transfer(
289 tfade, ftap, response, invert=True, demean=True)
291 out_traces.append(rest_tr)
293 except stationxml.NoResponseInformation as e:
294 logger.warning(
295 'Cannot restitute: %s (no response)' % str(e))
297 except stationxml.MultipleResponseInformation as e:
298 logger.warning(
299 'Cannot restitute: %s (multiple responses found)'
300 % str(e))
302 except (trace.TraceTooShort, trace.NoData):
303 logger.warning(
304 'Trace too short: %s' % '.'.join(tr.nslc_id))
306 traces = out_traces
308 if options.scale is not None:
309 for tr in traces:
310 tr.ydata = options.scale * tr.ydata
312 if options.output_data_type != 'same':
313 for tr in traces:
314 tr.ydata = tr.ydata.astype(
315 name_to_dtype[options.output_data_type])
317 if replacements:
318 for tr in traces:
319 r = {}
320 for k, pat, repl in replacements:
321 oldval = getattr(tr, k)
322 newval, n = re.subn(pat, repl, oldval)
323 if n:
324 r[k] = newval
326 tr.set_codes(**r)
328 if output_path:
329 otraces = []
330 for tr in traces:
331 try:
332 otr = tr.chop(twmin, twmax, inplace=False)
333 otraces.append(otr)
334 except trace.NoData:
335 pass
337 try:
338 io.save(otraces, output_path, format=options.output_format,
339 overwrite=options.force,
340 additional=dict(
341 wmin_year=tts(twmin, format='%Y'),
342 wmin_month=tts(twmin, format='%m'),
343 wmin_day=tts(twmin, format='%d'),
344 wmin_jday=tts(twmin, format='%j'),
345 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'),
346 wmax_year=tts(twmax, format='%Y'),
347 wmax_month=tts(twmax, format='%m'),
348 wmax_day=tts(twmax, format='%d'),
349 wmax_jday=tts(twmax, format='%j'),
350 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')),
351 **save_kwargs)
352 except io.FileSaveError as e:
353 die(str(e))
355 for batch in it:
356 process_traces(batch)
357 if abort:
358 break
360 signal.signal(signal.SIGINT, old)
362 if abort:
363 die('interrupted.')
366def main(args=None):
367 '''
368 CLI entry point for Pyrocko's ``jackseis`` app.
369 '''
370 if args is None:
371 args = sys.argv[1:]
373 parser = OptionParser(
374 usage=usage,
375 description=description,
376 option_class=JackseisOptions,
377 formatter=util.BetterHelpFormatter())
379 parser.add_option(
380 '--format',
381 dest='format',
382 default='detect',
383 choices=io.allowed_formats('load'),
384 help='assume input files are of given FORMAT. Choices: %s' %
385 io.allowed_formats('load', 'cli_help', 'detect'))
387 parser.add_option(
388 '--pattern',
389 dest='regex',
390 metavar='REGEX',
391 help='only include files whose paths match REGEX')
393 parser.add_option(
394 '--stationxml',
395 dest='station_xml_fns',
396 action='append',
397 default=[],
398 metavar='STATIONXML',
399 help='read station metadata from file STATIONXML')
401 parser.add_option(
402 '--event', '--events',
403 dest='event_fns',
404 action='append',
405 default=[],
406 metavar='EVENT',
407 help='read event information from file EVENT')
409 parser.add_option(
410 '--cache',
411 dest='cache_dir',
412 default=config.config().cache_dir,
413 metavar='DIR',
414 help='use directory DIR to cache trace metadata '
415 "(default='%default')")
417 parser.add_option(
418 '--quiet',
419 dest='quiet',
420 action='store_true',
421 default=False,
422 help='disable output of progress information')
424 parser.add_option(
425 '--debug',
426 dest='debug',
427 action='store_true',
428 default=False,
429 help='print debugging information to stderr')
431 parser.add_option(
432 '--tmin',
433 dest='tmin',
434 help='start time as "%s"' % tfmt)
436 parser.add_option(
437 '--tmax',
438 dest='tmax',
439 help='end time as "%s"' % tfmt)
441 parser.add_option(
442 '--tinc',
443 dest='tinc',
444 help='set time length of output files [s] or "auto" to automatically '
445 'choose an appropriate time increment or "huge" to allow to use '
446 'a lot of system memory to merge input traces into huge output '
447 'files.')
449 parser.add_option(
450 '--downsample',
451 dest='downsample',
452 metavar='RATE',
453 help='downsample to RATE [Hz]')
455 parser.add_option(
456 '--scale',
457 dest='scale',
458 type=float,
459 metavar='FACTOR',
460 help='scale seismograms with given FACTOR. Changes data type to '
461 'float64. Use --output-data-type to change again after scaling.')
463 parser.add_option(
464 '--output',
465 dest='output_path',
466 metavar='TEMPLATE',
467 help='set output path to TEMPLATE. Available placeholders '
468 'are %n: network, %s: station, %l: location, %c: channel, '
469 '%b: begin time, %e: end time, %j: julian day of year. The '
470 'following additional placeholders use the window begin and end '
471 'times rather than trace begin and end times (to suppress '
472 'producing many small files for gappy traces), %(wmin_year)s, '
473 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, '
474 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, '
475 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'")
477 parser.add_option(
478 '--output-dir',
479 metavar='TEMPLATE',
480 dest='output_dir',
481 help='set output directory to TEMPLATE (see --output for details) '
482 'and automatically choose filenames. '
483 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT')
485 parser.add_option(
486 '--output-format',
487 dest='output_format',
488 default='mseed',
489 choices=io.allowed_formats('save'),
490 help='set output file format. Choices: %s' %
491 io.allowed_formats('save', 'cli_help', 'mseed'))
493 parser.add_option(
494 '--force',
495 dest='force',
496 action='store_true',
497 default=False,
498 help='force overwriting of existing files')
500 parser.add_option(
501 '--no-snap',
502 dest='snap',
503 action='store_false',
504 default=True,
505 help='do not start output files at even multiples of file length')
507 parser.add_option(
508 '--traversal',
509 dest='traversal',
510 choices=('station-by-station', 'channel-by-channel', 'chronological'),
511 default='station-by-station',
512 help='set traversal order for traces processing. '
513 "Choices are 'station-by-station' [default], "
514 "'channel-by-channel', and 'chronological'. Chronological "
515 'traversal uses more processing memory but makes it possible to '
516 'join multiple stations into single output files')
518 parser.add_option(
519 '--rename-network',
520 action='append',
521 default=[],
522 dest='rename_network',
523 metavar='/PATTERN/REPLACEMENT/',
524 help='update network code, can be given more than once')
526 parser.add_option(
527 '--rename-station',
528 action='append',
529 default=[],
530 dest='rename_station',
531 metavar='/PATTERN/REPLACEMENT/',
532 help='update station code, can be given more than once')
534 parser.add_option(
535 '--rename-location',
536 action='append',
537 default=[],
538 dest='rename_location',
539 metavar='/PATTERN/REPLACEMENT/',
540 help='update location code, can be given more than once')
542 parser.add_option(
543 '--rename-channel',
544 action='append',
545 default=[],
546 dest='rename_channel',
547 metavar='/PATTERN/REPLACEMENT/',
548 help='update channel code, can be given more than once')
550 parser.add_option(
551 '--output-data-type',
552 dest='output_data_type',
553 choices=('same', 'int32', 'int64', 'float32', 'float64'),
554 default='same',
555 metavar='DTYPE',
556 help='set data type. Choices: same [default], int32, '
557 'int64, float32, float64. The output file format must support '
558 'the given type.')
560 parser.add_option(
561 '--output-steim',
562 dest='steim',
563 type='steim',
564 default=2,
565 metavar='STEIM_COMPRESSION',
566 help='set the mseed STEIM compression. Choices: 1 or 2. '
567 'Default is STEIM-2, which can compress full range int32. '
568 'NOTE: STEIM-2 is limited to 30 bit dynamic range.')
570 parser.add_option(
571 '--output-record-length',
572 dest='record_length',
573 type='record_length',
574 default=4096,
575 metavar='RECORD_LENGTH',
576 help='set the mseed record length in bytes. Choices: %s. '
577 'Default is 4096 bytes, which is commonly used for archiving.'
578 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))
580 quantity_choices = ('acceleration', 'velocity', 'displacement')
581 parser.add_option(
582 '--output-quantity',
583 dest='output_quantity',
584 choices=quantity_choices,
585 help='deconvolve instrument transfer function. Choices: %s'
586 % ', '.join(quantity_choices))
588 parser.add_option(
589 '--restitution-frequency-band',
590 default='0.001,100.0',
591 dest='str_fmin_fmax',
592 metavar='FMIN,FMAX',
593 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. '
594 'Default: "%default"')
596 sample_snap_choices = ('shift', 'interpolate')
597 parser.add_option(
598 '--sample-snap',
599 dest='sample_snap',
600 choices=sample_snap_choices,
601 help='shift/interpolate traces so that samples are at even multiples '
602 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices))
604 (options, args) = parser.parse_args(args)
606 if len(args) == 0:
607 parser.print_help()
608 sys.exit(1)
610 if options.debug:
611 util.setup_logging(program_name, 'debug')
612 elif options.quiet:
613 util.setup_logging(program_name, 'warning')
614 else:
615 util.setup_logging(program_name, 'info')
617 def get_pile():
618 return pile.make_pile(
619 paths=args,
620 selector=None,
621 regex=options.regex,
622 fileformat=options.format,
623 cachedirname=options.cache_dir,
624 show_progress=not options.quiet)
626 return process(get_pile, options)
629if __name__ == '__main__':
630 main()