1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import re 

8import os 

9import logging 

10import signal 

11import math 

12from copy import copy 

13from optparse import OptionParser, Option, OptionValueError 

14 

15import numpy as num 

16 

17from pyrocko import util, config, pile, model, io, trace 

18from pyrocko.io import stationxml 

19 

20pjoin = os.path.join 

21 

22logger = logging.getLogger('pyrocko.apps.jackseis') 

23 

24program_name = 'jackseis' 

25 

26usage = program_name + ' <inputs> ... [options]' 

27 

28description = '''A simple tool to manipulate waveform archive data.''' 

29 

30tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

31tts = util.time_to_str 

32stt = util.str_to_time_fillup 

33 

34 

35def die(message): 

36 sys.exit('%s: error: %s' % (program_name, message)) 

37 

38 

39name_to_dtype = { 

40 'int32': num.int32, 

41 'int64': num.int64, 

42 'float32': num.float32, 

43 'float64': num.float64} 

44 

45 

46output_units = { 

47 'acceleration': 'M/S**2', 

48 'velocity': 'M/S', 

49 'displacement': 'M'} 

50 

51 

52def str_to_seconds(s): 

53 if s.endswith('s'): 

54 return float(s[:-1]) 

55 elif s.endswith('m'): 

56 return float(s[:-1])*60. 

57 elif s.endswith('h'): 

58 return float(s[:-1])*3600. 

59 elif s.endswith('d'): 

60 return float(s[:-1])*3600.*24. 

61 else: 

62 return float(s) 

63 

64 

65def nice_seconds_floor(s): 

66 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

67 p = s 

68 for x in nice: 

69 if s < x: 

70 return p 

71 

72 p = x 

73 

74 return s 

75 

76 

77def check_record_length(option, opt, value): 

78 try: 

79 reclen = int(value) 

80 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

81 return reclen 

82 except Exception: 

83 pass 

84 raise OptionValueError( 

85 'invalid record length %s. (choose from %s)' 

86 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

87 

88 

89def check_steim(option, opt, value): 

90 try: 

91 compression = int(value) 

92 if compression in (1, 2): 

93 return compression 

94 except Exception: 

95 pass 

96 raise OptionValueError( 

97 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

98 

99 

100class JackseisOptions(Option): 

101 TYPES = Option.TYPES + ('record_length', 'steim') 

102 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

103 TYPE_CHECKER['record_length'] = check_record_length 

104 TYPE_CHECKER['steim'] = check_steim 

105 

106 

107def process(get_pile, options): 

108 tinc = None 

109 if options.tinc is not None: 

110 if tinc in ('huge', 'auto'): 

111 tinc = options.tinc 

112 else: 

113 try: 

114 tinc = str_to_seconds(options.tinc) 

115 except Exception: 

116 die('invalid argument to --tinc') 

117 

118 tmin = None 

119 if options.tmin is not None: 

120 try: 

121 tmin = stt(options.tmin) 

122 except Exception: 

123 die('invalid argument to --tmin. ' 

124 'Expected format is "%s" % tfmt') 

125 

126 tmax = None 

127 if options.tmax is not None: 

128 try: 

129 tmax = stt(options.tmax) 

130 except Exception: 

131 die('invalid argument to --tmax. ' 

132 'Expected format is "%s"' % tfmt) 

133 

134 target_deltat = None 

135 if options.downsample is not None: 

136 try: 

137 target_deltat = 1.0 / float(options.downsample) 

138 except Exception: 

139 die('invalid argument to --downsample') 

140 

141 replacements = [] 

142 for k, rename_k, in [ 

143 ('network', options.rename_network), 

144 ('station', options.rename_station), 

145 ('location', options.rename_location), 

146 ('channel', options.rename_channel)]: 

147 

148 for patrep in rename_k: 

149 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

150 if not m: 

151 die('invalid argument to --rename-%s. ' 

152 'Expected format is /PATTERN/REPLACEMENT/' % k) 

153 

154 replacements.append((k, m.group(1), m.group(2))) 

155 

156 sx = None 

157 if options.station_xml_fns: 

158 sxs = [] 

159 for station_xml_fn in options.station_xml_fns: 

160 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

161 

162 sx = stationxml.primitive_merge(sxs) 

163 

164 events = [] 

165 for event_fn in options.event_fns: 

166 events.extend(model.load_events(event_fn)) 

167 

168 p = get_pile() 

169 

170 if p.tmin is None: 

171 die('data selection is empty') 

172 

173 if tinc == 'auto': 

174 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

175 

176 if options.snap: 

177 if tmin is None: 

178 tmin = p.tmin 

179 

180 if tinc is not None: 

181 tmin = int(math.floor(tmin / tinc)) * tinc 

182 

183 output_path = options.output_path 

184 output_dir = options.output_dir 

185 

186 if output_path and not output_dir and os.path.isdir(output_path): 

187 output_dir = output_path # compat. with old behaviour 

188 

189 if output_dir and not output_path: 

190 output_path = 'trace_%(network)s-%(station)s-' \ 

191 '%(location)s-%(channel)s_%(wmin)s.' + \ 

192 options.output_format 

193 

194 if output_dir and output_path: 

195 output_path = pjoin(output_dir, output_path) 

196 

197 if not output_path: 

198 die('--output not given') 

199 

200 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

201 tfade_factor = 2.0 

202 ffade_factor = 1.5 

203 if options.output_quantity: 

204 tpad = 2*tfade_factor/fmin 

205 else: 

206 tpad = 0. 

207 

208 if target_deltat is not None: 

209 tpad += target_deltat * 10. 

210 

211 if tinc is None: 

212 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

213 / p.get_deltatmin() > 100000000.: 

214 die('use --tinc=huge to really produce such large output files ' 

215 'or use --tinc=INC to split into smaller files.') 

216 

217 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

218 

219 if options.traversal == 'channel-by-channel': 

220 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

221 

222 elif options.traversal == 'station-by-station': 

223 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

224 

225 else: 

226 it = p.chopper(**kwargs) 

227 

228 abort = [] 

229 

230 def got_sigint(signum, frame): 

231 abort.append(True) 

232 

233 old = signal.signal(signal.SIGINT, got_sigint) 

234 

235 save_kwargs = {} 

236 if options.output_format == 'mseed': 

237 save_kwargs['record_length'] = options.record_length 

238 save_kwargs['steim'] = options.steim 

239 

240 def process_traces(batch): 

241 traces = batch.traces 

242 if traces: 

243 twmin = batch.tmin 

244 twmax = batch.tmax 

245 logger.info('processing %s - %s, %i traces' % 

246 (tts(twmin), tts(twmax), len(traces))) 

247 

248 if options.sample_snap: 

249 for tr in traces: 

250 tr.snap(interpolate=options.sample_snap == 'interpolate') 

251 

252 if target_deltat is not None: 

253 out_traces = [] 

254 for tr in traces: 

255 try: 

256 tr.downsample_to( 

257 target_deltat, 

258 snap=True, 

259 demean=False, 

260 allow_upsample_max=4) 

261 

262 if options.output_data_type == 'same': 

263 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

264 

265 out_traces.append(tr) 

266 

267 except (trace.TraceTooShort, trace.NoData): 

268 pass 

269 

270 traces = out_traces 

271 

272 if options.output_quantity: 

273 out_traces = [] 

274 tfade = tfade_factor / fmin 

275 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

276 for tr in traces: 

277 try: 

278 response = sx.get_pyrocko_response( 

279 tr.nslc_id, 

280 timespan=(tr.tmin, tr.tmax), 

281 fake_input_units=output_units[ 

282 options.output_quantity]) 

283 

284 rest_tr = tr.transfer( 

285 tfade, ftap, response, invert=True, demean=True) 

286 

287 out_traces.append(rest_tr) 

288 

289 except stationxml.NoResponseInformation as e: 

290 logger.warning( 

291 'Cannot restitute: %s (no response)' % str(e)) 

292 

293 except stationxml.MultipleResponseInformation as e: 

294 logger.warning( 

295 'Cannot restitute: %s (multiple responses found)' 

296 % str(e)) 

297 

298 except (trace.TraceTooShort, trace.NoData): 

299 logger.warning( 

300 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

301 

302 traces = out_traces 

303 

304 if options.scale is not None: 

305 for tr in traces: 

306 tr.ydata = options.scale * tr.ydata 

307 

308 if options.output_data_type != 'same': 

309 for tr in traces: 

310 tr.ydata = tr.ydata.astype( 

311 name_to_dtype[options.output_data_type]) 

312 

313 if replacements: 

314 for tr in traces: 

315 r = {} 

316 for k, pat, repl in replacements: 

317 oldval = getattr(tr, k) 

318 newval, n = re.subn(pat, repl, oldval) 

319 if n: 

320 r[k] = newval 

321 

322 tr.set_codes(**r) 

323 

324 if output_path: 

325 otraces = [] 

326 for tr in traces: 

327 try: 

328 otr = tr.chop(twmin, twmax, inplace=False) 

329 otraces.append(otr) 

330 except trace.NoData: 

331 pass 

332 

333 try: 

334 io.save(otraces, output_path, format=options.output_format, 

335 overwrite=options.force, 

336 additional=dict( 

337 wmin_year=tts(twmin, format='%Y'), 

338 wmin_month=tts(twmin, format='%m'), 

339 wmin_day=tts(twmin, format='%d'), 

340 wmin_jday=tts(twmin, format='%j'), 

341 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

342 wmax_year=tts(twmax, format='%Y'), 

343 wmax_month=tts(twmax, format='%m'), 

344 wmax_day=tts(twmax, format='%d'), 

345 wmax_jday=tts(twmax, format='%j'), 

346 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

347 **save_kwargs) 

348 except io.FileSaveError as e: 

349 die(str(e)) 

350 

351 for batch in it: 

352 process_traces(batch) 

353 if abort: 

354 break 

355 

356 signal.signal(signal.SIGINT, old) 

357 

358 if abort: 

359 die('interrupted.') 

360 

361 

362def main(args=None): 

363 if args is None: 

364 args = sys.argv[1:] 

365 

366 parser = OptionParser( 

367 usage=usage, 

368 description=description, 

369 option_class=JackseisOptions, 

370 formatter=util.BetterHelpFormatter()) 

371 

372 parser.add_option( 

373 '--format', 

374 dest='format', 

375 default='detect', 

376 choices=io.allowed_formats('load'), 

377 help='assume input files are of given FORMAT. Choices: %s' % 

378 io.allowed_formats('load', 'cli_help', 'detect')) 

379 

380 parser.add_option( 

381 '--pattern', 

382 dest='regex', 

383 metavar='REGEX', 

384 help='only include files whose paths match REGEX') 

385 

386 parser.add_option( 

387 '--stationxml', 

388 dest='station_xml_fns', 

389 action='append', 

390 default=[], 

391 metavar='STATIONXML', 

392 help='read station metadata from file STATIONXML') 

393 

394 parser.add_option( 

395 '--event', '--events', 

396 dest='event_fns', 

397 action='append', 

398 default=[], 

399 metavar='EVENT', 

400 help='read event information from file EVENT') 

401 

402 parser.add_option( 

403 '--cache', 

404 dest='cache_dir', 

405 default=config.config().cache_dir, 

406 metavar='DIR', 

407 help='use directory DIR to cache trace metadata ' 

408 "(default='%default')") 

409 

410 parser.add_option( 

411 '--quiet', 

412 dest='quiet', 

413 action='store_true', 

414 default=False, 

415 help='disable output of progress information') 

416 

417 parser.add_option( 

418 '--debug', 

419 dest='debug', 

420 action='store_true', 

421 default=False, 

422 help='print debugging information to stderr') 

423 

424 parser.add_option( 

425 '--tmin', 

426 dest='tmin', 

427 help='start time as "%s"' % tfmt) 

428 

429 parser.add_option( 

430 '--tmax', 

431 dest='tmax', 

432 help='end time as "%s"' % tfmt) 

433 

434 parser.add_option( 

435 '--tinc', 

436 dest='tinc', 

437 help='set time length of output files [s] or "auto" to automatically ' 

438 'choose an appropriate time increment or "huge" to allow to use ' 

439 'a lot of system memory to merge input traces into huge output ' 

440 'files.') 

441 

442 parser.add_option( 

443 '--downsample', 

444 dest='downsample', 

445 metavar='RATE', 

446 help='downsample to RATE [Hz]') 

447 

448 parser.add_option( 

449 '--scale', 

450 dest='scale', 

451 type=float, 

452 metavar='FACTOR', 

453 help='scale seismograms with given FACTOR. Changes data type to ' 

454 'float64. Use --output-data-type to change again after scaling.') 

455 

456 parser.add_option( 

457 '--output', 

458 dest='output_path', 

459 metavar='TEMPLATE', 

460 help='set output path to TEMPLATE. Available placeholders ' 

461 'are %n: network, %s: station, %l: location, %c: channel, ' 

462 '%b: begin time, %e: end time, %j: julian day of year. The ' 

463 'following additional placeholders use the window begin and end ' 

464 'times rather than trace begin and end times (to suppress ' 

465 'producing many small files for gappy traces), %(wmin_year)s, ' 

466 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

467 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

468 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'") 

469 

470 parser.add_option( 

471 '--output-dir', 

472 metavar='TEMPLATE', 

473 dest='output_dir', 

474 help='set output directory to TEMPLATE (see --output for details) ' 

475 'and automatically choose filenames. ' 

476 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

477 

478 parser.add_option( 

479 '--output-format', 

480 dest='output_format', 

481 default='mseed', 

482 choices=io.allowed_formats('save'), 

483 help='set output file format. Choices: %s' % 

484 io.allowed_formats('save', 'cli_help', 'mseed')) 

485 

486 parser.add_option( 

487 '--force', 

488 dest='force', 

489 action='store_true', 

490 default=False, 

491 help='force overwriting of existing files') 

492 

493 parser.add_option( 

494 '--no-snap', 

495 dest='snap', 

496 action='store_false', 

497 default=True, 

498 help='do not start output files at even multiples of file length') 

499 

500 parser.add_option( 

501 '--traversal', 

502 dest='traversal', 

503 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

504 default='station-by-station', 

505 help='set traversal order for traces processing. ' 

506 "Choices are 'station-by-station' [default], " 

507 "'channel-by-channel', and 'chronological'. Chronological " 

508 'traversal uses more processing memory but makes it possible to ' 

509 'join multiple stations into single output files') 

510 

511 parser.add_option( 

512 '--rename-network', 

513 action='append', 

514 default=[], 

515 dest='rename_network', 

516 metavar='/PATTERN/REPLACEMENT/', 

517 help='update network code, can be given more than once') 

518 

519 parser.add_option( 

520 '--rename-station', 

521 action='append', 

522 default=[], 

523 dest='rename_station', 

524 metavar='/PATTERN/REPLACEMENT/', 

525 help='update station code, can be given more than once') 

526 

527 parser.add_option( 

528 '--rename-location', 

529 action='append', 

530 default=[], 

531 dest='rename_location', 

532 metavar='/PATTERN/REPLACEMENT/', 

533 help='update location code, can be given more than once') 

534 

535 parser.add_option( 

536 '--rename-channel', 

537 action='append', 

538 default=[], 

539 dest='rename_channel', 

540 metavar='/PATTERN/REPLACEMENT/', 

541 help='update channel code, can be given more than once') 

542 

543 parser.add_option( 

544 '--output-data-type', 

545 dest='output_data_type', 

546 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

547 default='same', 

548 metavar='DTYPE', 

549 help='set data type. Choices: same [default], int32, ' 

550 'int64, float32, float64. The output file format must support ' 

551 'the given type.') 

552 

553 parser.add_option( 

554 '--output-steim', 

555 dest='steim', 

556 type='steim', 

557 default=2, 

558 metavar='STEIM_COMPRESSION', 

559 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

560 'Default is STEIM-2, which can compress full range int32. ' 

561 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

562 

563 parser.add_option( 

564 '--output-record-length', 

565 dest='record_length', 

566 type='record_length', 

567 default=4096, 

568 metavar='RECORD_LENGTH', 

569 help='set the mseed record length in bytes. Choices: %s. ' 

570 'Default is 4096 bytes, which is commonly used for archiving.' 

571 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

572 

573 quantity_choices = ('acceleration', 'velocity', 'displacement') 

574 parser.add_option( 

575 '--output-quantity', 

576 dest='output_quantity', 

577 choices=quantity_choices, 

578 help='deconvolve instrument transfer function. Choices: %s' 

579 % ', '.join(quantity_choices)) 

580 

581 parser.add_option( 

582 '--restitution-frequency-band', 

583 default='0.001,100.0', 

584 dest='str_fmin_fmax', 

585 metavar='FMIN,FMAX', 

586 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

587 'Default: "%default"') 

588 

589 sample_snap_choices = ('shift', 'interpolate') 

590 parser.add_option( 

591 '--sample-snap', 

592 dest='sample_snap', 

593 choices=sample_snap_choices, 

594 help='shift/interpolate traces so that samples are at even multiples ' 

595 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

596 

597 (options, args) = parser.parse_args(args) 

598 

599 if len(args) == 0: 

600 parser.print_help() 

601 sys.exit(1) 

602 

603 if options.debug: 

604 util.setup_logging(program_name, 'debug') 

605 elif options.quiet: 

606 util.setup_logging(program_name, 'warning') 

607 else: 

608 util.setup_logging(program_name, 'info') 

609 

610 def get_pile(): 

611 return pile.make_pile( 

612 paths=args, 

613 selector=None, 

614 regex=options.regex, 

615 fileformat=options.format, 

616 cachedirname=options.cache_dir, 

617 show_progress=not options.quiet) 

618 

619 return process(get_pile, options) 

620 

621 

622if __name__ == '__main__': 

623 main()