Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/apps/jackseis.py: 25%

265 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-04 09:52 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Jackseis - manipulate seismic waveform archives. 

8''' 

9 

10import sys 

11import re 

12import os 

13import logging 

14import signal 

15import math 

16from copy import copy 

17from optparse import OptionParser, Option, OptionValueError 

18 

19import numpy as num 

20 

21from pyrocko import util, config, pile, model, io, trace 

22from pyrocko.io import stationxml 

23 

24pjoin = os.path.join 

25 

26logger = logging.getLogger('pyrocko.apps.jackseis') 

27 

28program_name = 'jackseis' 

29 

30usage = program_name + ' <inputs> ... [options]' 

31 

32description = '''A simple tool to manipulate waveform archive data.''' 

33 

34tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

35tts = util.time_to_str 

36stt = util.str_to_time_fillup 

37 

38 

39def die(message): 

40 sys.exit('%s: error: %s' % (program_name, message)) 

41 

42 

43name_to_dtype = { 

44 'int32': num.int32, 

45 'int64': num.int64, 

46 'float32': num.float32, 

47 'float64': num.float64} 

48 

49 

50output_units = { 

51 'acceleration': 'M/S**2', 

52 'velocity': 'M/S', 

53 'displacement': 'M'} 

54 

55 

56def str_to_seconds(s): 

57 if s.endswith('s'): 

58 return float(s[:-1]) 

59 elif s.endswith('m'): 

60 return float(s[:-1])*60. 

61 elif s.endswith('h'): 

62 return float(s[:-1])*3600. 

63 elif s.endswith('d'): 

64 return float(s[:-1])*3600.*24. 

65 else: 

66 return float(s) 

67 

68 

69def nice_seconds_floor(s): 

70 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

71 p = s 

72 for x in nice: 

73 if s < x: 

74 return p 

75 

76 p = x 

77 

78 return s 

79 

80 

81def check_record_length(option, opt, value): 

82 try: 

83 reclen = int(value) 

84 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

85 return reclen 

86 except Exception: 

87 pass 

88 raise OptionValueError( 

89 'invalid record length %s. (choose from %s)' 

90 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

91 

92 

93def check_steim(option, opt, value): 

94 try: 

95 compression = int(value) 

96 if compression in (1, 2): 

97 return compression 

98 except Exception: 

99 pass 

100 raise OptionValueError( 

101 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

102 

103 

104class JackseisOptions(Option): 

105 TYPES = Option.TYPES + ('record_length', 'steim') 

106 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

107 TYPE_CHECKER['record_length'] = check_record_length 

108 TYPE_CHECKER['steim'] = check_steim 

109 

110 

111def process(get_pile, options): 

112 tinc = None 

113 if options.tinc is not None: 

114 if tinc in ('huge', 'auto'): 

115 tinc = options.tinc 

116 else: 

117 try: 

118 tinc = str_to_seconds(options.tinc) 

119 except Exception: 

120 die('invalid argument to --tinc') 

121 

122 tmin = None 

123 if options.tmin is not None: 

124 try: 

125 tmin = stt(options.tmin) 

126 except Exception: 

127 die('invalid argument to --tmin. ' 

128 'Expected format is "%s" % tfmt') 

129 

130 tmax = None 

131 if options.tmax is not None: 

132 try: 

133 tmax = stt(options.tmax) 

134 except Exception: 

135 die('invalid argument to --tmax. ' 

136 'Expected format is "%s"' % tfmt) 

137 

138 target_deltat = None 

139 if options.downsample is not None: 

140 try: 

141 target_deltat = 1.0 / float(options.downsample) 

142 except Exception: 

143 die('invalid argument to --downsample') 

144 

145 replacements = [] 

146 for k, rename_k, in [ 

147 ('network', options.rename_network), 

148 ('station', options.rename_station), 

149 ('location', options.rename_location), 

150 ('channel', options.rename_channel)]: 

151 

152 for patrep in rename_k: 

153 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

154 if not m: 

155 die('invalid argument to --rename-%s. ' 

156 'Expected format is /PATTERN/REPLACEMENT/' % k) 

157 

158 replacements.append((k, m.group(1), m.group(2))) 

159 

160 sx = None 

161 if options.station_xml_fns: 

162 sxs = [] 

163 for station_xml_fn in options.station_xml_fns: 

164 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

165 

166 sx = stationxml.primitive_merge(sxs) 

167 

168 events = [] 

169 for event_fn in options.event_fns: 

170 events.extend(model.load_events(event_fn)) 

171 

172 p = get_pile() 

173 

174 if p.tmin is None: 

175 die('data selection is empty') 

176 

177 if tinc == 'auto': 

178 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

179 

180 if options.snap: 

181 if tmin is None: 

182 tmin = p.tmin 

183 

184 if tinc is not None: 

185 tmin = int(math.floor(tmin / tinc)) * tinc 

186 

187 output_path = options.output_path 

188 output_dir = options.output_dir 

189 

190 if output_path and not output_dir and os.path.isdir(output_path): 

191 output_dir = output_path # compat. with old behaviour 

192 

193 if output_dir and not output_path: 

194 output_path = 'trace_%(network)s-%(station)s-' \ 

195 '%(location)s-%(channel)s_%(wmin)s.' + \ 

196 options.output_format 

197 

198 if output_dir and output_path: 

199 output_path = pjoin(output_dir, output_path) 

200 

201 if not output_path: 

202 die('--output not given') 

203 

204 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

205 tfade_factor = 2.0 

206 ffade_factor = 1.5 

207 if options.output_quantity: 

208 tpad = 2*tfade_factor/fmin 

209 else: 

210 tpad = 0. 

211 

212 if target_deltat is not None: 

213 tpad += target_deltat * 10. 

214 

215 if tinc is None: 

216 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

217 / p.get_deltatmin() > 100000000.: 

218 die('use --tinc=huge to really produce such large output files ' 

219 'or use --tinc=INC to split into smaller files.') 

220 

221 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

222 

223 if options.traversal == 'channel-by-channel': 

224 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

225 

226 elif options.traversal == 'station-by-station': 

227 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

228 

229 else: 

230 it = p.chopper(**kwargs) 

231 

232 abort = [] 

233 

234 def got_sigint(signum, frame): 

235 abort.append(True) 

236 

237 old = signal.signal(signal.SIGINT, got_sigint) 

238 

239 save_kwargs = {} 

240 if options.output_format == 'mseed': 

241 save_kwargs['record_length'] = options.record_length 

242 save_kwargs['steim'] = options.steim 

243 

244 def process_traces(batch): 

245 traces = batch.traces 

246 if traces: 

247 twmin = batch.tmin 

248 twmax = batch.tmax 

249 logger.info('processing %s - %s, %i traces' % 

250 (tts(twmin), tts(twmax), len(traces))) 

251 

252 if options.sample_snap: 

253 for tr in traces: 

254 tr.snap(interpolate=options.sample_snap == 'interpolate') 

255 

256 if target_deltat is not None: 

257 out_traces = [] 

258 for tr in traces: 

259 try: 

260 tr.downsample_to( 

261 target_deltat, 

262 snap=True, 

263 demean=False, 

264 allow_upsample_max=4) 

265 

266 if options.output_data_type == 'same': 

267 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

268 

269 out_traces.append(tr) 

270 

271 except (trace.TraceTooShort, trace.NoData): 

272 pass 

273 

274 traces = out_traces 

275 

276 if options.output_quantity: 

277 out_traces = [] 

278 tfade = tfade_factor / fmin 

279 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

280 for tr in traces: 

281 try: 

282 response = sx.get_pyrocko_response( 

283 tr.nslc_id, 

284 timespan=(tr.tmin, tr.tmax), 

285 fake_input_units=output_units[ 

286 options.output_quantity]) 

287 

288 rest_tr = tr.transfer( 

289 tfade, ftap, response, invert=True, demean=True) 

290 

291 out_traces.append(rest_tr) 

292 

293 except stationxml.NoResponseInformation as e: 

294 logger.warning( 

295 'Cannot restitute: %s (no response)' % str(e)) 

296 

297 except stationxml.MultipleResponseInformation as e: 

298 logger.warning( 

299 'Cannot restitute: %s (multiple responses found)' 

300 % str(e)) 

301 

302 except (trace.TraceTooShort, trace.NoData): 

303 logger.warning( 

304 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

305 

306 traces = out_traces 

307 

308 if options.scale is not None: 

309 for tr in traces: 

310 tr.ydata = options.scale * tr.ydata 

311 

312 if options.output_data_type != 'same': 

313 for tr in traces: 

314 tr.ydata = tr.ydata.astype( 

315 name_to_dtype[options.output_data_type]) 

316 

317 if replacements: 

318 for tr in traces: 

319 r = {} 

320 for k, pat, repl in replacements: 

321 oldval = getattr(tr, k) 

322 newval, n = re.subn(pat, repl, oldval) 

323 if n: 

324 r[k] = newval 

325 

326 tr.set_codes(**r) 

327 

328 if output_path: 

329 otraces = [] 

330 for tr in traces: 

331 try: 

332 otr = tr.chop(twmin, twmax, inplace=False) 

333 otraces.append(otr) 

334 except trace.NoData: 

335 pass 

336 

337 try: 

338 io.save(otraces, output_path, format=options.output_format, 

339 overwrite=options.force, 

340 additional=dict( 

341 wmin_year=tts(twmin, format='%Y'), 

342 wmin_month=tts(twmin, format='%m'), 

343 wmin_day=tts(twmin, format='%d'), 

344 wmin_jday=tts(twmin, format='%j'), 

345 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

346 wmax_year=tts(twmax, format='%Y'), 

347 wmax_month=tts(twmax, format='%m'), 

348 wmax_day=tts(twmax, format='%d'), 

349 wmax_jday=tts(twmax, format='%j'), 

350 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

351 **save_kwargs) 

352 except io.FileSaveError as e: 

353 die(str(e)) 

354 

355 for batch in it: 

356 process_traces(batch) 

357 if abort: 

358 break 

359 

360 signal.signal(signal.SIGINT, old) 

361 

362 if abort: 

363 die('interrupted.') 

364 

365 

366def main(args=None): 

367 ''' 

368 CLI entry point for Pyrocko's ``jackseis`` app. 

369 ''' 

370 if args is None: 

371 args = sys.argv[1:] 

372 

373 parser = OptionParser( 

374 usage=usage, 

375 description=description, 

376 option_class=JackseisOptions, 

377 formatter=util.BetterHelpFormatter()) 

378 

379 parser.add_option( 

380 '--format', 

381 dest='format', 

382 default='detect', 

383 choices=io.allowed_formats('load'), 

384 help='assume input files are of given FORMAT. Choices: %s' % 

385 io.allowed_formats('load', 'cli_help', 'detect')) 

386 

387 parser.add_option( 

388 '--pattern', 

389 dest='regex', 

390 metavar='REGEX', 

391 help='only include files whose paths match REGEX') 

392 

393 parser.add_option( 

394 '--stationxml', 

395 dest='station_xml_fns', 

396 action='append', 

397 default=[], 

398 metavar='STATIONXML', 

399 help='read station metadata from file STATIONXML') 

400 

401 parser.add_option( 

402 '--event', '--events', 

403 dest='event_fns', 

404 action='append', 

405 default=[], 

406 metavar='EVENT', 

407 help='read event information from file EVENT') 

408 

409 parser.add_option( 

410 '--cache', 

411 dest='cache_dir', 

412 default=config.config().cache_dir, 

413 metavar='DIR', 

414 help='use directory DIR to cache trace metadata ' 

415 "(default='%default')") 

416 

417 parser.add_option( 

418 '--quiet', 

419 dest='quiet', 

420 action='store_true', 

421 default=False, 

422 help='disable output of progress information') 

423 

424 parser.add_option( 

425 '--debug', 

426 dest='debug', 

427 action='store_true', 

428 default=False, 

429 help='print debugging information to stderr') 

430 

431 parser.add_option( 

432 '--tmin', 

433 dest='tmin', 

434 help='start time as "%s"' % tfmt) 

435 

436 parser.add_option( 

437 '--tmax', 

438 dest='tmax', 

439 help='end time as "%s"' % tfmt) 

440 

441 parser.add_option( 

442 '--tinc', 

443 dest='tinc', 

444 help='set time length of output files [s] or "auto" to automatically ' 

445 'choose an appropriate time increment or "huge" to allow to use ' 

446 'a lot of system memory to merge input traces into huge output ' 

447 'files.') 

448 

449 parser.add_option( 

450 '--downsample', 

451 dest='downsample', 

452 metavar='RATE', 

453 help='downsample to RATE [Hz]') 

454 

455 parser.add_option( 

456 '--scale', 

457 dest='scale', 

458 type=float, 

459 metavar='FACTOR', 

460 help='scale seismograms with given FACTOR. Changes data type to ' 

461 'float64. Use --output-data-type to change again after scaling.') 

462 

463 parser.add_option( 

464 '--output', 

465 dest='output_path', 

466 metavar='TEMPLATE', 

467 help='set output path to TEMPLATE. Available placeholders ' 

468 'are %n: network, %s: station, %l: location, %c: channel, ' 

469 '%b: begin time, %e: end time, %j: julian day of year. The ' 

470 'following additional placeholders use the window begin and end ' 

471 'times rather than trace begin and end times (to suppress ' 

472 'producing many small files for gappy traces), %(wmin_year)s, ' 

473 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

474 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

475 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'") 

476 

477 parser.add_option( 

478 '--output-dir', 

479 metavar='TEMPLATE', 

480 dest='output_dir', 

481 help='set output directory to TEMPLATE (see --output for details) ' 

482 'and automatically choose filenames. ' 

483 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

484 

485 parser.add_option( 

486 '--output-format', 

487 dest='output_format', 

488 default='mseed', 

489 choices=io.allowed_formats('save'), 

490 help='set output file format. Choices: %s' % 

491 io.allowed_formats('save', 'cli_help', 'mseed')) 

492 

493 parser.add_option( 

494 '--force', 

495 dest='force', 

496 action='store_true', 

497 default=False, 

498 help='force overwriting of existing files') 

499 

500 parser.add_option( 

501 '--no-snap', 

502 dest='snap', 

503 action='store_false', 

504 default=True, 

505 help='do not start output files at even multiples of file length') 

506 

507 parser.add_option( 

508 '--traversal', 

509 dest='traversal', 

510 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

511 default='station-by-station', 

512 help='set traversal order for traces processing. ' 

513 "Choices are 'station-by-station' [default], " 

514 "'channel-by-channel', and 'chronological'. Chronological " 

515 'traversal uses more processing memory but makes it possible to ' 

516 'join multiple stations into single output files') 

517 

518 parser.add_option( 

519 '--rename-network', 

520 action='append', 

521 default=[], 

522 dest='rename_network', 

523 metavar='/PATTERN/REPLACEMENT/', 

524 help='update network code, can be given more than once') 

525 

526 parser.add_option( 

527 '--rename-station', 

528 action='append', 

529 default=[], 

530 dest='rename_station', 

531 metavar='/PATTERN/REPLACEMENT/', 

532 help='update station code, can be given more than once') 

533 

534 parser.add_option( 

535 '--rename-location', 

536 action='append', 

537 default=[], 

538 dest='rename_location', 

539 metavar='/PATTERN/REPLACEMENT/', 

540 help='update location code, can be given more than once') 

541 

542 parser.add_option( 

543 '--rename-channel', 

544 action='append', 

545 default=[], 

546 dest='rename_channel', 

547 metavar='/PATTERN/REPLACEMENT/', 

548 help='update channel code, can be given more than once') 

549 

550 parser.add_option( 

551 '--output-data-type', 

552 dest='output_data_type', 

553 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

554 default='same', 

555 metavar='DTYPE', 

556 help='set data type. Choices: same [default], int32, ' 

557 'int64, float32, float64. The output file format must support ' 

558 'the given type.') 

559 

560 parser.add_option( 

561 '--output-steim', 

562 dest='steim', 

563 type='steim', 

564 default=2, 

565 metavar='STEIM_COMPRESSION', 

566 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

567 'Default is STEIM-2, which can compress full range int32. ' 

568 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

569 

570 parser.add_option( 

571 '--output-record-length', 

572 dest='record_length', 

573 type='record_length', 

574 default=4096, 

575 metavar='RECORD_LENGTH', 

576 help='set the mseed record length in bytes. Choices: %s. ' 

577 'Default is 4096 bytes, which is commonly used for archiving.' 

578 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

579 

580 quantity_choices = ('acceleration', 'velocity', 'displacement') 

581 parser.add_option( 

582 '--output-quantity', 

583 dest='output_quantity', 

584 choices=quantity_choices, 

585 help='deconvolve instrument transfer function. Choices: %s' 

586 % ', '.join(quantity_choices)) 

587 

588 parser.add_option( 

589 '--restitution-frequency-band', 

590 default='0.001,100.0', 

591 dest='str_fmin_fmax', 

592 metavar='FMIN,FMAX', 

593 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

594 'Default: "%default"') 

595 

596 sample_snap_choices = ('shift', 'interpolate') 

597 parser.add_option( 

598 '--sample-snap', 

599 dest='sample_snap', 

600 choices=sample_snap_choices, 

601 help='shift/interpolate traces so that samples are at even multiples ' 

602 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

603 

604 (options, args) = parser.parse_args(args) 

605 

606 if len(args) == 0: 

607 parser.print_help() 

608 sys.exit(1) 

609 

610 if options.debug: 

611 util.setup_logging(program_name, 'debug') 

612 elif options.quiet: 

613 util.setup_logging(program_name, 'warning') 

614 else: 

615 util.setup_logging(program_name, 'info') 

616 

617 def get_pile(): 

618 return pile.make_pile( 

619 paths=args, 

620 selector=None, 

621 regex=options.regex, 

622 fileformat=options.format, 

623 cachedirname=options.cache_dir, 

624 show_progress=not options.quiet) 

625 

626 return process(get_pile, options) 

627 

628 

629if __name__ == '__main__': 

630 main()