1#!/usr/bin/env python 

2# http://pyrocko.org - GPLv3 

3# 

4# The Pyrocko Developers, 21st Century 

5# ---|P------/S----------~Lg---------- 

6 

7import sys 

8import re 

9import os 

10import logging 

11import signal 

12import math 

13from copy import copy 

14from optparse import OptionParser, Option, OptionValueError 

15 

16import numpy as num 

17 

18from pyrocko import util, config, pile, model, io, trace 

19from pyrocko.io import stationxml 

20 

21pjoin = os.path.join 

22 

23logger = logging.getLogger('pyrocko.apps.jackseis') 

24 

25program_name = 'jackseis' 

26 

27usage = program_name + ' <inputs> ... [options]' 

28 

29description = '''A simple tool to manipulate waveform archive data.''' 

30 

31tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

32tts = util.time_to_str 

33stt = util.str_to_time_fillup 

34 

35 

36def die(message): 

37 sys.exit('%s: error: %s' % (program_name, message)) 

38 

39 

40name_to_dtype = { 

41 'int32': num.int32, 

42 'int64': num.int64, 

43 'float32': num.float32, 

44 'float64': num.float64} 

45 

46 

47output_units = { 

48 'acceleration': 'M/S**2', 

49 'velocity': 'M/S', 

50 'displacement': 'M'} 

51 

52 

53def str_to_seconds(s): 

54 if s.endswith('s'): 

55 return float(s[:-1]) 

56 elif s.endswith('m'): 

57 return float(s[:-1])*60. 

58 elif s.endswith('h'): 

59 return float(s[:-1])*3600. 

60 elif s.endswith('d'): 

61 return float(s[:-1])*3600.*24. 

62 else: 

63 return float(s) 

64 

65 

66def nice_seconds_floor(s): 

67 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

68 p = s 

69 for x in nice: 

70 if s < x: 

71 return p 

72 

73 p = x 

74 

75 return s 

76 

77 

78def check_record_length(option, opt, value): 

79 try: 

80 reclen = int(value) 

81 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

82 return reclen 

83 except Exception: 

84 pass 

85 raise OptionValueError( 

86 'invalid record length %s. (choose from %s)' 

87 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

88 

89 

90def check_steim(option, opt, value): 

91 try: 

92 compression = int(value) 

93 if compression in (1, 2): 

94 return compression 

95 except Exception: 

96 pass 

97 raise OptionValueError( 

98 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

99 

100 

101class JackseisOptions(Option): 

102 TYPES = Option.TYPES + ('record_length', 'steim') 

103 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

104 TYPE_CHECKER['record_length'] = check_record_length 

105 TYPE_CHECKER['steim'] = check_steim 

106 

107 

108def process(get_pile, options): 

109 tinc = None 

110 if options.tinc is not None: 

111 if tinc in ('huge', 'auto'): 

112 tinc = options.tinc 

113 else: 

114 try: 

115 tinc = str_to_seconds(options.tinc) 

116 except Exception: 

117 die('invalid argument to --tinc') 

118 

119 tmin = None 

120 if options.tmin is not None: 

121 try: 

122 tmin = stt(options.tmin) 

123 except Exception: 

124 die('invalid argument to --tmin. ' 

125 'Expected format is "%s" % tfmt') 

126 

127 tmax = None 

128 if options.tmax is not None: 

129 try: 

130 tmax = stt(options.tmax) 

131 except Exception: 

132 die('invalid argument to --tmax. ' 

133 'Expected format is "%s"' % tfmt) 

134 

135 target_deltat = None 

136 if options.downsample is not None: 

137 try: 

138 target_deltat = 1.0 / float(options.downsample) 

139 except Exception: 

140 die('invalid argument to --downsample') 

141 

142 replacements = [] 

143 for k, rename_k, in [ 

144 ('network', options.rename_network), 

145 ('station', options.rename_station), 

146 ('location', options.rename_location), 

147 ('channel', options.rename_channel)]: 

148 

149 for patrep in rename_k: 

150 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

151 if not m: 

152 die('invalid argument to --rename-%s. ' 

153 'Expected format is /PATTERN/REPLACEMENT/' % k) 

154 

155 replacements.append((k, m.group(1), m.group(2))) 

156 

157 sx = None 

158 if options.station_xml_fns: 

159 sxs = [] 

160 for station_xml_fn in options.station_xml_fns: 

161 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

162 

163 sx = stationxml.primitive_merge(sxs) 

164 

165 events = [] 

166 for event_fn in options.event_fns: 

167 events.extend(model.load_events(event_fn)) 

168 

169 p = get_pile() 

170 

171 if p.tmin is None: 

172 die('data selection is empty') 

173 

174 if tinc == 'auto': 

175 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

176 

177 if options.snap: 

178 if tmin is None: 

179 tmin = p.tmin 

180 

181 if tinc is not None: 

182 tmin = int(math.floor(tmin / tinc)) * tinc 

183 

184 output_path = options.output_path 

185 output_dir = options.output_dir 

186 

187 if output_path and not output_dir and os.path.isdir(output_path): 

188 output_dir = output_path # compat. with old behaviour 

189 

190 if output_dir and not output_path: 

191 output_path = 'trace_%(network)s-%(station)s-' \ 

192 '%(location)s-%(channel)s_%(wmin)s.' + \ 

193 options.output_format 

194 

195 if output_dir and output_path: 

196 output_path = pjoin(output_dir, output_path) 

197 

198 if not output_path: 

199 die('--output not given') 

200 

201 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

202 tfade_factor = 2.0 

203 ffade_factor = 1.5 

204 if options.output_quantity: 

205 tpad = 2*tfade_factor/fmin 

206 else: 

207 tpad = 0. 

208 

209 if target_deltat is not None: 

210 tpad += target_deltat * 10. 

211 

212 if tinc is None: 

213 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

214 / p.get_deltatmin() > 100000000.: 

215 die('use --tinc=huge to really produce such large output files ' 

216 'or use --tinc=INC to split into smaller files.') 

217 

218 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

219 

220 if options.traversal == 'channel-by-channel': 

221 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

222 

223 elif options.traversal == 'station-by-station': 

224 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

225 

226 else: 

227 it = p.chopper(**kwargs) 

228 

229 abort = [] 

230 

231 def got_sigint(signum, frame): 

232 abort.append(True) 

233 

234 old = signal.signal(signal.SIGINT, got_sigint) 

235 

236 save_kwargs = {} 

237 if options.output_format == 'mseed': 

238 save_kwargs['record_length'] = options.record_length 

239 save_kwargs['steim'] = options.steim 

240 

241 def process_traces(batch): 

242 traces = batch.traces 

243 if traces: 

244 twmin = batch.tmin 

245 twmax = batch.tmax 

246 logger.info('processing %s - %s, %i traces' % 

247 (tts(twmin), tts(twmax), len(traces))) 

248 

249 if options.sample_snap: 

250 for tr in traces: 

251 tr.snap(interpolate=options.sample_snap == 'interpolate') 

252 

253 if target_deltat is not None: 

254 out_traces = [] 

255 for tr in traces: 

256 try: 

257 tr.downsample_to( 

258 target_deltat, 

259 snap=True, 

260 demean=False, 

261 allow_upsample_max=4) 

262 

263 if options.output_data_type == 'same': 

264 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

265 

266 out_traces.append(tr) 

267 

268 except (trace.TraceTooShort, trace.NoData): 

269 pass 

270 

271 traces = out_traces 

272 

273 if options.output_quantity: 

274 out_traces = [] 

275 tfade = tfade_factor / fmin 

276 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

277 for tr in traces: 

278 try: 

279 response = sx.get_pyrocko_response( 

280 tr.nslc_id, 

281 timespan=(tr.tmin, tr.tmax), 

282 fake_input_units=output_units[ 

283 options.output_quantity]) 

284 

285 rest_tr = tr.transfer( 

286 tfade, ftap, response, invert=True, demean=True) 

287 

288 out_traces.append(rest_tr) 

289 

290 except stationxml.NoResponseInformation as e: 

291 logger.warning( 

292 'Cannot restitute: %s (no response)' % str(e)) 

293 

294 except stationxml.MultipleResponseInformation as e: 

295 logger.warning( 

296 'Cannot restitute: %s (multiple responses found)' 

297 % str(e)) 

298 

299 except (trace.TraceTooShort, trace.NoData): 

300 logger.warning( 

301 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

302 

303 traces = out_traces 

304 

305 if options.scale is not None: 

306 for tr in traces: 

307 tr.ydata = options.scale * tr.ydata 

308 

309 if options.output_data_type != 'same': 

310 for tr in traces: 

311 tr.ydata = tr.ydata.astype( 

312 name_to_dtype[options.output_data_type]) 

313 

314 if replacements: 

315 for tr in traces: 

316 r = {} 

317 for k, pat, repl in replacements: 

318 oldval = getattr(tr, k) 

319 newval, n = re.subn(pat, repl, oldval) 

320 if n: 

321 r[k] = newval 

322 

323 tr.set_codes(**r) 

324 

325 if output_path: 

326 otraces = [] 

327 for tr in traces: 

328 try: 

329 otr = tr.chop(twmin, twmax, inplace=False) 

330 otraces.append(otr) 

331 except trace.NoData: 

332 pass 

333 

334 try: 

335 io.save(otraces, output_path, format=options.output_format, 

336 overwrite=options.force, 

337 additional=dict( 

338 wmin_year=tts(twmin, format='%Y'), 

339 wmin_month=tts(twmin, format='%m'), 

340 wmin_day=tts(twmin, format='%d'), 

341 wmin_jday=tts(twmin, format='%j'), 

342 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

343 wmax_year=tts(twmax, format='%Y'), 

344 wmax_month=tts(twmax, format='%m'), 

345 wmax_day=tts(twmax, format='%d'), 

346 wmax_jday=tts(twmax, format='%j'), 

347 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

348 **save_kwargs) 

349 except io.FileSaveError as e: 

350 die(str(e)) 

351 

352 for batch in it: 

353 process_traces(batch) 

354 if abort: 

355 break 

356 

357 signal.signal(signal.SIGINT, old) 

358 

359 if abort: 

360 die('interrupted.') 

361 

362 

363def main(args=None): 

364 if args is None: 

365 args = sys.argv[1:] 

366 

367 parser = OptionParser( 

368 usage=usage, 

369 description=description, 

370 option_class=JackseisOptions, 

371 formatter=util.BetterHelpFormatter()) 

372 

373 parser.add_option( 

374 '--format', 

375 dest='format', 

376 default='detect', 

377 choices=io.allowed_formats('load'), 

378 help='assume input files are of given FORMAT. Choices: %s' % 

379 io.allowed_formats('load', 'cli_help', 'detect')) 

380 

381 parser.add_option( 

382 '--pattern', 

383 dest='regex', 

384 metavar='REGEX', 

385 help='only include files whose paths match REGEX') 

386 

387 parser.add_option( 

388 '--stationxml', 

389 dest='station_xml_fns', 

390 action='append', 

391 default=[], 

392 metavar='STATIONXML', 

393 help='read station metadata from file STATIONXML') 

394 

395 parser.add_option( 

396 '--event', '--events', 

397 dest='event_fns', 

398 action='append', 

399 default=[], 

400 metavar='EVENT', 

401 help='read event information from file EVENT') 

402 

403 parser.add_option( 

404 '--cache', 

405 dest='cache_dir', 

406 default=config.config().cache_dir, 

407 metavar='DIR', 

408 help='use directory DIR to cache trace metadata ' 

409 "(default='%default')") 

410 

411 parser.add_option( 

412 '--quiet', 

413 dest='quiet', 

414 action='store_true', 

415 default=False, 

416 help='disable output of progress information') 

417 

418 parser.add_option( 

419 '--debug', 

420 dest='debug', 

421 action='store_true', 

422 default=False, 

423 help='print debugging information to stderr') 

424 

425 parser.add_option( 

426 '--tmin', 

427 dest='tmin', 

428 help='start time as "%s"' % tfmt) 

429 

430 parser.add_option( 

431 '--tmax', 

432 dest='tmax', 

433 help='end time as "%s"' % tfmt) 

434 

435 parser.add_option( 

436 '--tinc', 

437 dest='tinc', 

438 help='set time length of output files [s] or "auto" to automatically ' 

439 'choose an appropriate time increment or "huge" to allow to use ' 

440 'a lot of system memory to merge input traces into huge output ' 

441 'files.') 

442 

443 parser.add_option( 

444 '--downsample', 

445 dest='downsample', 

446 metavar='RATE', 

447 help='downsample to RATE [Hz]') 

448 

449 parser.add_option( 

450 '--scale', 

451 dest='scale', 

452 type=float, 

453 metavar='FACTOR', 

454 help='scale seismograms with given FACTOR. Changes data type to ' 

455 'float64. Use --output-data-type to change again after scaling.') 

456 

457 parser.add_option( 

458 '--output', 

459 dest='output_path', 

460 metavar='TEMPLATE', 

461 help='set output path to TEMPLATE. Available placeholders ' 

462 'are %n: network, %s: station, %l: location, %c: channel, ' 

463 '%b: begin time, %e: end time, %j: julian day of year. The ' 

464 'following additional placeholders use the window begin and end ' 

465 'times rather than trace begin and end times (to suppress ' 

466 'producing many small files for gappy traces), %(wmin_year)s, ' 

467 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

468 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

469 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'") 

470 

471 parser.add_option( 

472 '--output-dir', 

473 metavar='TEMPLATE', 

474 dest='output_dir', 

475 help='set output directory to TEMPLATE (see --output for details) ' 

476 'and automatically choose filenames. ' 

477 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

478 

479 parser.add_option( 

480 '--output-format', 

481 dest='output_format', 

482 default='mseed', 

483 choices=io.allowed_formats('save'), 

484 help='set output file format. Choices: %s' % 

485 io.allowed_formats('save', 'cli_help', 'mseed')) 

486 

487 parser.add_option( 

488 '--force', 

489 dest='force', 

490 action='store_true', 

491 default=False, 

492 help='force overwriting of existing files') 

493 

494 parser.add_option( 

495 '--no-snap', 

496 dest='snap', 

497 action='store_false', 

498 default=True, 

499 help='do not start output files at even multiples of file length') 

500 

501 parser.add_option( 

502 '--traversal', 

503 dest='traversal', 

504 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

505 default='station-by-station', 

506 help='set traversal order for traces processing. ' 

507 "Choices are 'station-by-station' [default], " 

508 "'channel-by-channel', and 'chronological'. Chronological " 

509 'traversal uses more processing memory but makes it possible to ' 

510 'join multiple stations into single output files') 

511 

512 parser.add_option( 

513 '--rename-network', 

514 action='append', 

515 default=[], 

516 dest='rename_network', 

517 metavar='/PATTERN/REPLACEMENT/', 

518 help='update network code, can be given more than once') 

519 

520 parser.add_option( 

521 '--rename-station', 

522 action='append', 

523 default=[], 

524 dest='rename_station', 

525 metavar='/PATTERN/REPLACEMENT/', 

526 help='update station code, can be given more than once') 

527 

528 parser.add_option( 

529 '--rename-location', 

530 action='append', 

531 default=[], 

532 dest='rename_location', 

533 metavar='/PATTERN/REPLACEMENT/', 

534 help='update location code, can be given more than once') 

535 

536 parser.add_option( 

537 '--rename-channel', 

538 action='append', 

539 default=[], 

540 dest='rename_channel', 

541 metavar='/PATTERN/REPLACEMENT/', 

542 help='update channel code, can be given more than once') 

543 

544 parser.add_option( 

545 '--output-data-type', 

546 dest='output_data_type', 

547 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

548 default='same', 

549 metavar='DTYPE', 

550 help='set data type. Choices: same [default], int32, ' 

551 'int64, float32, float64. The output file format must support ' 

552 'the given type.') 

553 

554 parser.add_option( 

555 '--output-steim', 

556 dest='steim', 

557 type='steim', 

558 default=2, 

559 metavar='STEIM_COMPRESSION', 

560 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

561 'Default is STEIM-2, which can compress full range int32. ' 

562 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

563 

564 parser.add_option( 

565 '--output-record-length', 

566 dest='record_length', 

567 type='record_length', 

568 default=4096, 

569 metavar='RECORD_LENGTH', 

570 help='set the mseed record length in bytes. Choices: %s. ' 

571 'Default is 4096 bytes, which is commonly used for archiving.' 

572 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

573 

574 quantity_choices = ('acceleration', 'velocity', 'displacement') 

575 parser.add_option( 

576 '--output-quantity', 

577 dest='output_quantity', 

578 choices=quantity_choices, 

579 help='deconvolve instrument transfer function. Choices: %s' 

580 % ', '.join(quantity_choices)) 

581 

582 parser.add_option( 

583 '--restitution-frequency-band', 

584 default='0.001,100.0', 

585 dest='str_fmin_fmax', 

586 metavar='FMIN,FMAX', 

587 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

588 'Default: "%default"') 

589 

590 sample_snap_choices = ('shift', 'interpolate') 

591 parser.add_option( 

592 '--sample-snap', 

593 dest='sample_snap', 

594 choices=sample_snap_choices, 

595 help='shift/interpolate traces so that samples are at even multiples ' 

596 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

597 

598 (options, args) = parser.parse_args(args) 

599 

600 if len(args) == 0: 

601 parser.print_help() 

602 sys.exit(1) 

603 

604 if options.debug: 

605 util.setup_logging(program_name, 'debug') 

606 elif options.quiet: 

607 util.setup_logging(program_name, 'warning') 

608 else: 

609 util.setup_logging(program_name, 'info') 

610 

611 def get_pile(): 

612 return pile.make_pile( 

613 paths=args, 

614 selector=None, 

615 regex=options.regex, 

616 fileformat=options.format, 

617 cachedirname=options.cache_dir, 

618 show_progress=not options.quiet) 

619 

620 return process(get_pile, options) 

621 

622 

623if __name__ == '__main__': 

624 main()