1#!/usr/bin/env python 

2# http://pyrocko.org - GPLv3 

3# 

4# The Pyrocko Developers, 21st Century 

5# ---|P------/S----------~Lg---------- 

6from __future__ import absolute_import, division 

7 

8import sys 

9import re 

10import os 

11import logging 

12import signal 

13import math 

14from copy import copy 

15from optparse import OptionParser, Option, OptionValueError 

16 

17import numpy as num 

18 

19from pyrocko import util, config, pile, model, io, trace 

20from pyrocko.io import stationxml 

21 

22pjoin = os.path.join 

23 

24logger = logging.getLogger('pyrocko.apps.jackseis') 

25 

26program_name = 'jackseis' 

27 

28usage = program_name + ' <inputs> ... [options]' 

29 

30description = '''A simple tool to manipulate waveform archive data.''' 

31 

32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

33tts = util.time_to_str 

34stt = util.str_to_time 

35 

36 

37def die(message): 

38 sys.exit('%s: error: %s' % (program_name, message)) 

39 

40 

41name_to_dtype = { 

42 'int32': num.int32, 

43 'int64': num.int64, 

44 'float32': num.float32, 

45 'float64': num.float64} 

46 

47 

48output_units = { 

49 'acceleration': 'M/S**2', 

50 'velocity': 'M/S', 

51 'displacement': 'M'} 

52 

53 

54def str_to_seconds(s): 

55 if s.endswith('s'): 

56 return float(s[:-1]) 

57 elif s.endswith('m'): 

58 return float(s[:-1])*60. 

59 elif s.endswith('h'): 

60 return float(s[:-1])*3600. 

61 elif s.endswith('d'): 

62 return float(s[:-1])*3600.*24. 

63 else: 

64 return float(s) 

65 

66 

67def nice_seconds_floor(s): 

68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

69 p = s 

70 for x in nice: 

71 if s < x: 

72 return p 

73 

74 p = x 

75 

76 return s 

77 

78 

79def check_record_length(option, opt, value): 

80 try: 

81 reclen = int(value) 

82 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

83 return reclen 

84 except Exception: 

85 pass 

86 raise OptionValueError( 

87 'invalid record length %s. (choose from %s)' 

88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

89 

90 

91def check_steim(option, opt, value): 

92 try: 

93 compression = int(value) 

94 if compression in (1, 2): 

95 return compression 

96 except Exception: 

97 pass 

98 raise OptionValueError( 

99 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

100 

101 

102class JackseisOptions(Option): 

103 TYPES = Option.TYPES + ('record_length', 'steim') 

104 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

105 TYPE_CHECKER['record_length'] = check_record_length 

106 TYPE_CHECKER['steim'] = check_steim 

107 

108 

109def main(args=None): 

110 if args is None: 

111 args = sys.argv[1:] 

112 

113 parser = OptionParser( 

114 usage=usage, 

115 description=description, 

116 option_class=JackseisOptions, 

117 formatter=util.BetterHelpFormatter()) 

118 

119 parser.add_option( 

120 '--format', 

121 dest='format', 

122 default='detect', 

123 choices=io.allowed_formats('load'), 

124 help='assume input files are of given FORMAT. Choices: %s' % 

125 io.allowed_formats('load', 'cli_help', 'detect')) 

126 

127 parser.add_option( 

128 '--pattern', 

129 dest='regex', 

130 metavar='REGEX', 

131 help='only include files whose paths match REGEX') 

132 

133 parser.add_option( 

134 '--stationxml', 

135 dest='station_xml_fns', 

136 action='append', 

137 default=[], 

138 metavar='STATIONXML', 

139 help='read station metadata from file STATIONXML') 

140 

141 parser.add_option( 

142 '--event', '--events', 

143 dest='event_fns', 

144 action='append', 

145 default=[], 

146 metavar='EVENT', 

147 help='read event information from file EVENT') 

148 

149 parser.add_option( 

150 '--cache', 

151 dest='cache_dir', 

152 default=config.config().cache_dir, 

153 metavar='DIR', 

154 help='use directory DIR to cache trace metadata ' 

155 '(default=\'%default\')') 

156 

157 parser.add_option( 

158 '--quiet', 

159 dest='quiet', 

160 action='store_true', 

161 default=False, 

162 help='disable output of progress information') 

163 

164 parser.add_option( 

165 '--debug', 

166 dest='debug', 

167 action='store_true', 

168 default=False, 

169 help='print debugging information to stderr') 

170 

171 parser.add_option( 

172 '--tmin', 

173 dest='tmin', 

174 help='start time as "%s"' % tfmt) 

175 

176 parser.add_option( 

177 '--tmax', 

178 dest='tmax', 

179 help='end time as "%s"' % tfmt) 

180 

181 parser.add_option( 

182 '--tinc', 

183 dest='tinc', 

184 help='set time length of output files [s] or "auto" to automatically ' 

185 'choose an appropriate time increment or "huge" to allow to use ' 

186 'a lot of system memory to merge input traces into huge output ' 

187 'files.') 

188 

189 parser.add_option( 

190 '--downsample', 

191 dest='downsample', 

192 metavar='RATE', 

193 help='downsample to RATE [Hz]') 

194 

195 parser.add_option( 

196 '--scale', 

197 dest='scale', 

198 type=float, 

199 metavar='FACTOR', 

200 help='scale seismograms with given FACTOR. Changes data type to ' 

201 'float64. Use --output-data-type to change again after scaling.') 

202 

203 parser.add_option( 

204 '--output', 

205 dest='output_path', 

206 metavar='TEMPLATE', 

207 help='set output path to TEMPLATE. Available placeholders ' 

208 'are %n: network, %s: station, %l: location, %c: channel, ' 

209 '%b: begin time, %e: end time, %j: julian day of year. The ' 

210 'following additional placeholders use the window begin and end ' 

211 'times rather than trace begin and end times (to suppress ' 

212 'producing many small files for gappy traces), %(wmin_year)s, ' 

213 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

214 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

215 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'') 

216 

217 parser.add_option( 

218 '--output-dir', 

219 metavar='TEMPLATE', 

220 dest='output_dir', 

221 help='set output directory to TEMPLATE (see --output for details) ' 

222 'and automatically choose filenames. ' 

223 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

224 

225 parser.add_option( 

226 '--output-format', 

227 dest='output_format', 

228 default='mseed', 

229 choices=io.allowed_formats('save'), 

230 help='set output file format. Choices: %s' % 

231 io.allowed_formats('save', 'cli_help', 'mseed')) 

232 

233 parser.add_option( 

234 '--force', 

235 dest='force', 

236 action='store_true', 

237 default=False, 

238 help='force overwriting of existing files') 

239 

240 parser.add_option( 

241 '--no-snap', 

242 dest='snap', 

243 action='store_false', 

244 default=True, 

245 help='do not start output files at even multiples of file length') 

246 

247 parser.add_option( 

248 '--traversal', 

249 dest='traversal', 

250 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

251 default='station-by-station', 

252 help='set traversal order for traces processing. ' 

253 'Choices are \'station-by-station\' [default], ' 

254 '\'channel-by-channel\', and \'chronological\'. Chronological ' 

255 'traversal uses more processing memory but makes it possible to ' 

256 'join multiple stations into single output files') 

257 

258 parser.add_option( 

259 '--rename-network', 

260 action='append', 

261 default=[], 

262 dest='rename_network', 

263 metavar='/PATTERN/REPLACEMENT/', 

264 help='update network code, can be given more than once') 

265 

266 parser.add_option( 

267 '--rename-station', 

268 action='append', 

269 default=[], 

270 dest='rename_station', 

271 metavar='/PATTERN/REPLACEMENT/', 

272 help='update station code, can be given more than once') 

273 

274 parser.add_option( 

275 '--rename-location', 

276 action='append', 

277 default=[], 

278 dest='rename_location', 

279 metavar='/PATTERN/REPLACEMENT/', 

280 help='update location code, can be given more than once') 

281 

282 parser.add_option( 

283 '--rename-channel', 

284 action='append', 

285 default=[], 

286 dest='rename_channel', 

287 metavar='/PATTERN/REPLACEMENT/', 

288 help='update channel code, can be given more than once') 

289 

290 parser.add_option( 

291 '--output-data-type', 

292 dest='output_data_type', 

293 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

294 default='same', 

295 metavar='DTYPE', 

296 help='set data type. Choices: same [default], int32, ' 

297 'int64, float32, float64. The output file format must support ' 

298 'the given type.') 

299 

300 parser.add_option( 

301 '--output-steim', 

302 dest='steim', 

303 type='steim', 

304 default=2, 

305 metavar='STEIM_COMPRESSION', 

306 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

307 'Default is STEIM-2, which can compress full range int32. ' 

308 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

309 

310 parser.add_option( 

311 '--output-record-length', 

312 dest='record_length', 

313 type='record_length', 

314 default=4096, 

315 metavar='RECORD_LENGTH', 

316 help='set the mseed record length in bytes. Choices: %s. ' 

317 'Default is 4096 bytes, which is commonly used for archiving.' 

318 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

319 

320 quantity_choices = ('acceleration', 'velocity', 'displacement') 

321 parser.add_option( 

322 '--output-quantity', 

323 dest='output_quantity', 

324 choices=quantity_choices, 

325 help='deconvolve instrument transfer function. Choices: %s' 

326 % ', '.join(quantity_choices)) 

327 

328 parser.add_option( 

329 '--restitution-frequency-band', 

330 default='0.001,100.0', 

331 dest='str_fmin_fmax', 

332 metavar='FMIN,FMAX', 

333 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

334 'Default: "%default"') 

335 

336 sample_snap_choices = ('shift', 'interpolate') 

337 parser.add_option( 

338 '--sample-snap', 

339 dest='sample_snap', 

340 choices=sample_snap_choices, 

341 help='shift/interpolate traces so that samples are at even multiples ' 

342 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

343 

344 (options, args) = parser.parse_args(args) 

345 

346 if len(args) == 0: 

347 parser.print_help() 

348 sys.exit(1) 

349 

350 if options.debug: 

351 util.setup_logging(program_name, 'debug') 

352 elif options.quiet: 

353 util.setup_logging(program_name, 'warning') 

354 else: 

355 util.setup_logging(program_name, 'info') 

356 

357 tinc = None 

358 if options.tinc is not None: 

359 if tinc in ('huge', 'auto'): 

360 tinc = options.tinc 

361 else: 

362 try: 

363 tinc = str_to_seconds(options.tinc) 

364 except Exception: 

365 die('invalid argument to --tinc') 

366 

367 tmin = None 

368 if options.tmin is not None: 

369 try: 

370 tmin = stt(options.tmin) 

371 except Exception: 

372 die('invalid argument to --tmin. ' 

373 'Expected format is ""') 

374 

375 tmax = None 

376 if options.tmax is not None: 

377 try: 

378 tmax = stt(options.tmax) 

379 except Exception: 

380 die('invalid argument to --tmax. ' 

381 'Expected format is "%s"' % tfmt) 

382 

383 target_deltat = None 

384 if options.downsample is not None: 

385 try: 

386 target_deltat = 1.0 / float(options.downsample) 

387 except Exception: 

388 die('invalid argument to --downsample') 

389 

390 replacements = [] 

391 for k, rename_k, in [ 

392 ('network', options.rename_network), 

393 ('station', options.rename_station), 

394 ('location', options.rename_location), 

395 ('channel', options.rename_channel)]: 

396 

397 for patrep in rename_k: 

398 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

399 if not m: 

400 die('invalid argument to --rename-%s. ' 

401 'Expected format is /PATTERN/REPLACEMENT/' % k) 

402 

403 replacements.append((k, m.group(1), m.group(2))) 

404 

405 sx = None 

406 if options.station_xml_fns: 

407 sxs = [] 

408 for station_xml_fn in options.station_xml_fns: 

409 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

410 

411 sx = stationxml.primitive_merge(sxs) 

412 

413 events = [] 

414 for event_fn in options.event_fns: 

415 events.extend(model.load_events(event_fn)) 

416 

417 p = pile.make_pile( 

418 paths=args, 

419 selector=None, 

420 regex=options.regex, 

421 fileformat=options.format, 

422 cachedirname=options.cache_dir, 

423 show_progress=not options.quiet) 

424 

425 if p.tmin is None: 

426 die('data selection is empty') 

427 

428 if tinc == 'auto': 

429 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

430 

431 if options.snap: 

432 if tmin is None: 

433 tmin = p.tmin 

434 

435 if tinc is not None: 

436 tmin = int(math.floor(tmin / tinc)) * tinc 

437 

438 output_path = options.output_path 

439 output_dir = options.output_dir 

440 

441 if output_path and not output_dir and os.path.isdir(output_path): 

442 output_dir = output_path # compat. with old behaviour 

443 

444 if output_dir and not output_path: 

445 output_path = 'trace_%(network)s-%(station)s-' \ 

446 '%(location)s-%(channel)s_%(wmin)s.' + \ 

447 options.output_format 

448 

449 if output_dir and output_path: 

450 output_path = pjoin(output_dir, output_path) 

451 

452 if not output_path: 

453 die('--output not given') 

454 

455 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

456 tfade_factor = 2.0 

457 ffade_factor = 1.5 

458 if options.output_quantity: 

459 tpad = 2*tfade_factor/fmin 

460 else: 

461 tpad = 0. 

462 

463 if target_deltat is not None: 

464 tpad += target_deltat * 10. 

465 

466 if tinc is None: 

467 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

468 / p.get_deltatmin() > 100000000.: 

469 die('use --tinc=huge to really produce such large output files ' 

470 'or use --tinc=INC to split into smaller files.') 

471 

472 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

473 

474 if options.traversal == 'channel-by-channel': 

475 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

476 

477 elif options.traversal == 'station-by-station': 

478 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

479 

480 else: 

481 it = p.chopper(**kwargs) 

482 

483 abort = [] 

484 

485 def got_sigint(signum, frame): 

486 abort.append(True) 

487 

488 old = signal.signal(signal.SIGINT, got_sigint) 

489 

490 save_kwargs = {} 

491 if options.output_format == 'mseed': 

492 save_kwargs['record_length'] = options.record_length 

493 save_kwargs['steim'] = options.steim 

494 

495 for batch in it: 

496 traces = batch.traces 

497 if traces: 

498 twmin = batch.tmin 

499 twmax = batch.tmax 

500 logger.info('processing %s - %s, %i traces' % 

501 (tts(twmin), tts(twmax), len(traces))) 

502 

503 if options.sample_snap: 

504 for tr in traces: 

505 tr.snap(interpolate=options.sample_snap == 'interpolate') 

506 

507 if target_deltat is not None: 

508 out_traces = [] 

509 for tr in traces: 

510 try: 

511 tr.downsample_to( 

512 target_deltat, snap=True, demean=False) 

513 

514 if options.output_data_type == 'same': 

515 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

516 

517 out_traces.append(tr) 

518 

519 except (trace.TraceTooShort, trace.NoData): 

520 pass 

521 

522 traces = out_traces 

523 

524 if options.output_quantity: 

525 out_traces = [] 

526 tfade = tfade_factor / fmin 

527 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

528 for tr in traces: 

529 try: 

530 response = sx.get_pyrocko_response( 

531 tr.nslc_id, 

532 timespan=(tr.tmin, tr.tmax), 

533 fake_input_units=output_units[ 

534 options.output_quantity]) 

535 

536 rest_tr = tr.transfer( 

537 tfade, ftap, response, invert=True, demean=True) 

538 

539 out_traces.append(rest_tr) 

540 

541 except stationxml.NoResponseInformation as e: 

542 logger.warn( 

543 'Cannot restitute: %s (no response)' % str(e)) 

544 

545 except stationxml.MultipleResponseInformation as e: 

546 logger.warn( 

547 'Cannot restitute: %s (multiple responses found)' 

548 % str(e)) 

549 

550 except (trace.TraceTooShort, trace.NoData): 

551 logger.warn( 

552 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

553 

554 traces = out_traces 

555 

556 if options.scale is not None: 

557 for tr in traces: 

558 tr.ydata = options.scale * tr.ydata 

559 

560 if options.output_data_type != 'same': 

561 for tr in traces: 

562 tr.ydata = tr.ydata.astype( 

563 name_to_dtype[options.output_data_type]) 

564 

565 if replacements: 

566 for tr in traces: 

567 r = {} 

568 for k, pat, repl in replacements: 

569 oldval = getattr(tr, k) 

570 newval, n = re.subn(pat, repl, oldval) 

571 if n: 

572 r[k] = newval 

573 

574 tr.set_codes(**r) 

575 

576 if output_path: 

577 otraces = [] 

578 for tr in traces: 

579 try: 

580 otr = tr.chop(twmin, twmax, inplace=False) 

581 otraces.append(otr) 

582 except trace.NoData: 

583 pass 

584 

585 try: 

586 io.save(otraces, output_path, format=options.output_format, 

587 overwrite=options.force, 

588 additional=dict( 

589 wmin_year=tts(twmin, format='%Y'), 

590 wmin_month=tts(twmin, format='%m'), 

591 wmin_day=tts(twmin, format='%d'), 

592 wmin_jday=tts(twmin, format='%j'), 

593 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

594 wmax_year=tts(twmax, format='%Y'), 

595 wmax_month=tts(twmax, format='%m'), 

596 wmax_day=tts(twmax, format='%d'), 

597 wmax_jday=tts(twmax, format='%j'), 

598 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

599 **save_kwargs) 

600 except io.FileSaveError as e: 

601 die(str(e)) 

602 

603 if abort: 

604 break 

605 

606 signal.signal(signal.SIGINT, old) 

607 

608 if abort: 

609 die('interrupted.') 

610 

611 

612if __name__ == '__main__': 

613 main(sys.argv[1:])