1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import re 

8import os 

9import logging 

10import signal 

11import math 

12from copy import copy 

13from optparse import OptionParser, Option, OptionValueError 

14 

15import numpy as num 

16 

17from pyrocko import util, config, pile, model, io, trace 

18from pyrocko.io import stationxml 

19 

20pjoin = os.path.join 

21 

22logger = logging.getLogger('pyrocko.apps.jackseis') 

23 

24program_name = 'jackseis' 

25 

26usage = program_name + ' <inputs> ... [options]' 

27 

28description = '''A simple tool to manipulate waveform archive data.''' 

29 

30tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

31tts = util.time_to_str 

32stt = util.str_to_time_fillup 

33 

34 

35def die(message): 

36 sys.exit('%s: error: %s' % (program_name, message)) 

37 

38 

39name_to_dtype = { 

40 'int32': num.int32, 

41 'int64': num.int64, 

42 'float32': num.float32, 

43 'float64': num.float64} 

44 

45 

46output_units = { 

47 'acceleration': 'M/S**2', 

48 'velocity': 'M/S', 

49 'displacement': 'M'} 

50 

51 

52def str_to_seconds(s): 

53 if s.endswith('s'): 

54 return float(s[:-1]) 

55 elif s.endswith('m'): 

56 return float(s[:-1])*60. 

57 elif s.endswith('h'): 

58 return float(s[:-1])*3600. 

59 elif s.endswith('d'): 

60 return float(s[:-1])*3600.*24. 

61 else: 

62 return float(s) 

63 

64 

65def nice_seconds_floor(s): 

66 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

67 p = s 

68 for x in nice: 

69 if s < x: 

70 return p 

71 

72 p = x 

73 

74 return s 

75 

76 

77def check_record_length(option, opt, value): 

78 try: 

79 reclen = int(value) 

80 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

81 return reclen 

82 except Exception: 

83 pass 

84 raise OptionValueError( 

85 'invalid record length %s. (choose from %s)' 

86 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

87 

88 

89def check_steim(option, opt, value): 

90 try: 

91 compression = int(value) 

92 if compression in (1, 2): 

93 return compression 

94 except Exception: 

95 pass 

96 raise OptionValueError( 

97 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

98 

99 

100class JackseisOptions(Option): 

101 TYPES = Option.TYPES + ('record_length', 'steim') 

102 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

103 TYPE_CHECKER['record_length'] = check_record_length 

104 TYPE_CHECKER['steim'] = check_steim 

105 

106 

107def process(get_pile, options): 

108 tinc = None 

109 if options.tinc is not None: 

110 if tinc in ('huge', 'auto'): 

111 tinc = options.tinc 

112 else: 

113 try: 

114 tinc = str_to_seconds(options.tinc) 

115 except Exception: 

116 die('invalid argument to --tinc') 

117 

118 tmin = None 

119 if options.tmin is not None: 

120 try: 

121 tmin = stt(options.tmin) 

122 except Exception: 

123 die('invalid argument to --tmin. ' 

124 'Expected format is "%s" % tfmt') 

125 

126 tmax = None 

127 if options.tmax is not None: 

128 try: 

129 tmax = stt(options.tmax) 

130 except Exception: 

131 die('invalid argument to --tmax. ' 

132 'Expected format is "%s"' % tfmt) 

133 

134 target_deltat = None 

135 if options.downsample is not None: 

136 try: 

137 target_deltat = 1.0 / float(options.downsample) 

138 except Exception: 

139 die('invalid argument to --downsample') 

140 

141 replacements = [] 

142 for k, rename_k, in [ 

143 ('network', options.rename_network), 

144 ('station', options.rename_station), 

145 ('location', options.rename_location), 

146 ('channel', options.rename_channel)]: 

147 

148 for patrep in rename_k: 

149 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

150 if not m: 

151 die('invalid argument to --rename-%s. ' 

152 'Expected format is /PATTERN/REPLACEMENT/' % k) 

153 

154 replacements.append((k, m.group(1), m.group(2))) 

155 

156 sx = None 

157 if options.station_xml_fns: 

158 sxs = [] 

159 for station_xml_fn in options.station_xml_fns: 

160 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

161 

162 sx = stationxml.primitive_merge(sxs) 

163 

164 events = [] 

165 for event_fn in options.event_fns: 

166 events.extend(model.load_events(event_fn)) 

167 

168 p = get_pile() 

169 

170 if p.tmin is None: 

171 die('data selection is empty') 

172 

173 if tinc == 'auto': 

174 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

175 

176 if options.snap: 

177 if tmin is None: 

178 tmin = p.tmin 

179 

180 if tinc is not None: 

181 tmin = int(math.floor(tmin / tinc)) * tinc 

182 

183 output_path = options.output_path 

184 output_dir = options.output_dir 

185 

186 if output_path and not output_dir and os.path.isdir(output_path): 

187 output_dir = output_path # compat. with old behaviour 

188 

189 if output_dir and not output_path: 

190 output_path = 'trace_%(network)s-%(station)s-' \ 

191 '%(location)s-%(channel)s_%(wmin)s.' + \ 

192 options.output_format 

193 

194 if output_dir and output_path: 

195 output_path = pjoin(output_dir, output_path) 

196 

197 if not output_path: 

198 die('--output not given') 

199 

200 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

201 tfade_factor = 2.0 

202 ffade_factor = 1.5 

203 if options.output_quantity: 

204 tpad = 2*tfade_factor/fmin 

205 else: 

206 tpad = 0. 

207 

208 if target_deltat is not None: 

209 tpad += target_deltat * 10. 

210 

211 if tinc is None: 

212 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

213 / p.get_deltatmin() > 100000000.: 

214 die('use --tinc=huge to really produce such large output files ' 

215 'or use --tinc=INC to split into smaller files.') 

216 

217 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

218 

219 if options.traversal == 'channel-by-channel': 

220 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

221 

222 elif options.traversal == 'station-by-station': 

223 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

224 

225 else: 

226 it = p.chopper(**kwargs) 

227 

228 abort = [] 

229 

230 def got_sigint(signum, frame): 

231 abort.append(True) 

232 

233 old = signal.signal(signal.SIGINT, got_sigint) 

234 

235 save_kwargs = {} 

236 if options.output_format == 'mseed': 

237 save_kwargs['record_length'] = options.record_length 

238 save_kwargs['steim'] = options.steim 

239 

240 def process_traces(batch): 

241 traces = batch.traces 

242 if traces: 

243 twmin = batch.tmin 

244 twmax = batch.tmax 

245 logger.info('processing %s - %s, %i traces' % 

246 (tts(twmin), tts(twmax), len(traces))) 

247 

248 if options.sample_snap: 

249 for tr in traces: 

250 tr.snap(interpolate=options.sample_snap == 'interpolate') 

251 

252 if target_deltat is not None: 

253 out_traces = [] 

254 for tr in traces: 

255 try: 

256 tr.downsample_to( 

257 target_deltat, snap=True, demean=False) 

258 

259 if options.output_data_type == 'same': 

260 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

261 

262 out_traces.append(tr) 

263 

264 except (trace.TraceTooShort, trace.NoData): 

265 pass 

266 

267 traces = out_traces 

268 

269 if options.output_quantity: 

270 out_traces = [] 

271 tfade = tfade_factor / fmin 

272 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

273 for tr in traces: 

274 try: 

275 response = sx.get_pyrocko_response( 

276 tr.nslc_id, 

277 timespan=(tr.tmin, tr.tmax), 

278 fake_input_units=output_units[ 

279 options.output_quantity]) 

280 

281 rest_tr = tr.transfer( 

282 tfade, ftap, response, invert=True, demean=True) 

283 

284 out_traces.append(rest_tr) 

285 

286 except stationxml.NoResponseInformation as e: 

287 logger.warning( 

288 'Cannot restitute: %s (no response)' % str(e)) 

289 

290 except stationxml.MultipleResponseInformation as e: 

291 logger.warning( 

292 'Cannot restitute: %s (multiple responses found)' 

293 % str(e)) 

294 

295 except (trace.TraceTooShort, trace.NoData): 

296 logger.warning( 

297 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

298 

299 traces = out_traces 

300 

301 if options.scale is not None: 

302 for tr in traces: 

303 tr.ydata = options.scale * tr.ydata 

304 

305 if options.output_data_type != 'same': 

306 for tr in traces: 

307 tr.ydata = tr.ydata.astype( 

308 name_to_dtype[options.output_data_type]) 

309 

310 if replacements: 

311 for tr in traces: 

312 r = {} 

313 for k, pat, repl in replacements: 

314 oldval = getattr(tr, k) 

315 newval, n = re.subn(pat, repl, oldval) 

316 if n: 

317 r[k] = newval 

318 

319 tr.set_codes(**r) 

320 

321 if output_path: 

322 otraces = [] 

323 for tr in traces: 

324 try: 

325 otr = tr.chop(twmin, twmax, inplace=False) 

326 otraces.append(otr) 

327 except trace.NoData: 

328 pass 

329 

330 try: 

331 io.save(otraces, output_path, format=options.output_format, 

332 overwrite=options.force, 

333 additional=dict( 

334 wmin_year=tts(twmin, format='%Y'), 

335 wmin_month=tts(twmin, format='%m'), 

336 wmin_day=tts(twmin, format='%d'), 

337 wmin_jday=tts(twmin, format='%j'), 

338 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

339 wmax_year=tts(twmax, format='%Y'), 

340 wmax_month=tts(twmax, format='%m'), 

341 wmax_day=tts(twmax, format='%d'), 

342 wmax_jday=tts(twmax, format='%j'), 

343 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

344 **save_kwargs) 

345 except io.FileSaveError as e: 

346 die(str(e)) 

347 

348 for batch in it: 

349 process_traces(batch) 

350 if abort: 

351 break 

352 

353 signal.signal(signal.SIGINT, old) 

354 

355 if abort: 

356 die('interrupted.') 

357 

358 

359def main(args=None): 

360 if args is None: 

361 args = sys.argv[1:] 

362 

363 parser = OptionParser( 

364 usage=usage, 

365 description=description, 

366 option_class=JackseisOptions, 

367 formatter=util.BetterHelpFormatter()) 

368 

369 parser.add_option( 

370 '--format', 

371 dest='format', 

372 default='detect', 

373 choices=io.allowed_formats('load'), 

374 help='assume input files are of given FORMAT. Choices: %s' % 

375 io.allowed_formats('load', 'cli_help', 'detect')) 

376 

377 parser.add_option( 

378 '--pattern', 

379 dest='regex', 

380 metavar='REGEX', 

381 help='only include files whose paths match REGEX') 

382 

383 parser.add_option( 

384 '--stationxml', 

385 dest='station_xml_fns', 

386 action='append', 

387 default=[], 

388 metavar='STATIONXML', 

389 help='read station metadata from file STATIONXML') 

390 

391 parser.add_option( 

392 '--event', '--events', 

393 dest='event_fns', 

394 action='append', 

395 default=[], 

396 metavar='EVENT', 

397 help='read event information from file EVENT') 

398 

399 parser.add_option( 

400 '--cache', 

401 dest='cache_dir', 

402 default=config.config().cache_dir, 

403 metavar='DIR', 

404 help='use directory DIR to cache trace metadata ' 

405 "(default='%default')") 

406 

407 parser.add_option( 

408 '--quiet', 

409 dest='quiet', 

410 action='store_true', 

411 default=False, 

412 help='disable output of progress information') 

413 

414 parser.add_option( 

415 '--debug', 

416 dest='debug', 

417 action='store_true', 

418 default=False, 

419 help='print debugging information to stderr') 

420 

421 parser.add_option( 

422 '--tmin', 

423 dest='tmin', 

424 help='start time as "%s"' % tfmt) 

425 

426 parser.add_option( 

427 '--tmax', 

428 dest='tmax', 

429 help='end time as "%s"' % tfmt) 

430 

431 parser.add_option( 

432 '--tinc', 

433 dest='tinc', 

434 help='set time length of output files [s] or "auto" to automatically ' 

435 'choose an appropriate time increment or "huge" to allow to use ' 

436 'a lot of system memory to merge input traces into huge output ' 

437 'files.') 

438 

439 parser.add_option( 

440 '--downsample', 

441 dest='downsample', 

442 metavar='RATE', 

443 help='downsample to RATE [Hz]') 

444 

445 parser.add_option( 

446 '--scale', 

447 dest='scale', 

448 type=float, 

449 metavar='FACTOR', 

450 help='scale seismograms with given FACTOR. Changes data type to ' 

451 'float64. Use --output-data-type to change again after scaling.') 

452 

453 parser.add_option( 

454 '--output', 

455 dest='output_path', 

456 metavar='TEMPLATE', 

457 help='set output path to TEMPLATE. Available placeholders ' 

458 'are %n: network, %s: station, %l: location, %c: channel, ' 

459 '%b: begin time, %e: end time, %j: julian day of year. The ' 

460 'following additional placeholders use the window begin and end ' 

461 'times rather than trace begin and end times (to suppress ' 

462 'producing many small files for gappy traces), %(wmin_year)s, ' 

463 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

464 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

465 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'") 

466 

467 parser.add_option( 

468 '--output-dir', 

469 metavar='TEMPLATE', 

470 dest='output_dir', 

471 help='set output directory to TEMPLATE (see --output for details) ' 

472 'and automatically choose filenames. ' 

473 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

474 

475 parser.add_option( 

476 '--output-format', 

477 dest='output_format', 

478 default='mseed', 

479 choices=io.allowed_formats('save'), 

480 help='set output file format. Choices: %s' % 

481 io.allowed_formats('save', 'cli_help', 'mseed')) 

482 

483 parser.add_option( 

484 '--force', 

485 dest='force', 

486 action='store_true', 

487 default=False, 

488 help='force overwriting of existing files') 

489 

490 parser.add_option( 

491 '--no-snap', 

492 dest='snap', 

493 action='store_false', 

494 default=True, 

495 help='do not start output files at even multiples of file length') 

496 

497 parser.add_option( 

498 '--traversal', 

499 dest='traversal', 

500 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

501 default='station-by-station', 

502 help='set traversal order for traces processing. ' 

503 "Choices are 'station-by-station' [default], " 

504 "'channel-by-channel', and 'chronological'. Chronological " 

505 'traversal uses more processing memory but makes it possible to ' 

506 'join multiple stations into single output files') 

507 

508 parser.add_option( 

509 '--rename-network', 

510 action='append', 

511 default=[], 

512 dest='rename_network', 

513 metavar='/PATTERN/REPLACEMENT/', 

514 help='update network code, can be given more than once') 

515 

516 parser.add_option( 

517 '--rename-station', 

518 action='append', 

519 default=[], 

520 dest='rename_station', 

521 metavar='/PATTERN/REPLACEMENT/', 

522 help='update station code, can be given more than once') 

523 

524 parser.add_option( 

525 '--rename-location', 

526 action='append', 

527 default=[], 

528 dest='rename_location', 

529 metavar='/PATTERN/REPLACEMENT/', 

530 help='update location code, can be given more than once') 

531 

532 parser.add_option( 

533 '--rename-channel', 

534 action='append', 

535 default=[], 

536 dest='rename_channel', 

537 metavar='/PATTERN/REPLACEMENT/', 

538 help='update channel code, can be given more than once') 

539 

540 parser.add_option( 

541 '--output-data-type', 

542 dest='output_data_type', 

543 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

544 default='same', 

545 metavar='DTYPE', 

546 help='set data type. Choices: same [default], int32, ' 

547 'int64, float32, float64. The output file format must support ' 

548 'the given type.') 

549 

550 parser.add_option( 

551 '--output-steim', 

552 dest='steim', 

553 type='steim', 

554 default=2, 

555 metavar='STEIM_COMPRESSION', 

556 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

557 'Default is STEIM-2, which can compress full range int32. ' 

558 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

559 

560 parser.add_option( 

561 '--output-record-length', 

562 dest='record_length', 

563 type='record_length', 

564 default=4096, 

565 metavar='RECORD_LENGTH', 

566 help='set the mseed record length in bytes. Choices: %s. ' 

567 'Default is 4096 bytes, which is commonly used for archiving.' 

568 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

569 

570 quantity_choices = ('acceleration', 'velocity', 'displacement') 

571 parser.add_option( 

572 '--output-quantity', 

573 dest='output_quantity', 

574 choices=quantity_choices, 

575 help='deconvolve instrument transfer function. Choices: %s' 

576 % ', '.join(quantity_choices)) 

577 

578 parser.add_option( 

579 '--restitution-frequency-band', 

580 default='0.001,100.0', 

581 dest='str_fmin_fmax', 

582 metavar='FMIN,FMAX', 

583 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

584 'Default: "%default"') 

585 

586 sample_snap_choices = ('shift', 'interpolate') 

587 parser.add_option( 

588 '--sample-snap', 

589 dest='sample_snap', 

590 choices=sample_snap_choices, 

591 help='shift/interpolate traces so that samples are at even multiples ' 

592 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

593 

594 (options, args) = parser.parse_args(args) 

595 

596 if len(args) == 0: 

597 parser.print_help() 

598 sys.exit(1) 

599 

600 if options.debug: 

601 util.setup_logging(program_name, 'debug') 

602 elif options.quiet: 

603 util.setup_logging(program_name, 'warning') 

604 else: 

605 util.setup_logging(program_name, 'info') 

606 

607 def get_pile(): 

608 return pile.make_pile( 

609 paths=args, 

610 selector=None, 

611 regex=options.regex, 

612 fileformat=options.format, 

613 cachedirname=options.cache_dir, 

614 show_progress=not options.quiet) 

615 

616 return process(get_pile, options) 

617 

618 

619if __name__ == '__main__': 

620 main()