1#!/usr/bin/env python 

2# http://pyrocko.org - GPLv3 

3# 

4# The Pyrocko Developers, 21st Century 

5# ---|P------/S----------~Lg---------- 

6 

7import sys 

8import re 

9import os 

10import logging 

11import signal 

12import math 

13from copy import copy 

14from optparse import OptionParser, Option, OptionValueError 

15 

16import numpy as num 

17 

18from pyrocko import util, config, pile, model, io, trace 

19from pyrocko.io import stationxml 

20 

21pjoin = os.path.join 

22 

23logger = logging.getLogger('pyrocko.apps.jackseis') 

24 

25program_name = 'jackseis' 

26 

27usage = program_name + ' <inputs> ... [options]' 

28 

29description = '''A simple tool to manipulate waveform archive data.''' 

30 

31tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

32tts = util.time_to_str 

33stt = util.str_to_time_fillup 

34 

35 

36def die(message): 

37 sys.exit('%s: error: %s' % (program_name, message)) 

38 

39 

40name_to_dtype = { 

41 'int32': num.int32, 

42 'int64': num.int64, 

43 'float32': num.float32, 

44 'float64': num.float64} 

45 

46 

47output_units = { 

48 'acceleration': 'M/S**2', 

49 'velocity': 'M/S', 

50 'displacement': 'M'} 

51 

52 

53def str_to_seconds(s): 

54 if s.endswith('s'): 

55 return float(s[:-1]) 

56 elif s.endswith('m'): 

57 return float(s[:-1])*60. 

58 elif s.endswith('h'): 

59 return float(s[:-1])*3600. 

60 elif s.endswith('d'): 

61 return float(s[:-1])*3600.*24. 

62 else: 

63 return float(s) 

64 

65 

66def nice_seconds_floor(s): 

67 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

68 p = s 

69 for x in nice: 

70 if s < x: 

71 return p 

72 

73 p = x 

74 

75 return s 

76 

77 

78def check_record_length(option, opt, value): 

79 try: 

80 reclen = int(value) 

81 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

82 return reclen 

83 except Exception: 

84 pass 

85 raise OptionValueError( 

86 'invalid record length %s. (choose from %s)' 

87 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

88 

89 

90def check_steim(option, opt, value): 

91 try: 

92 compression = int(value) 

93 if compression in (1, 2): 

94 return compression 

95 except Exception: 

96 pass 

97 raise OptionValueError( 

98 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

99 

100 

101class JackseisOptions(Option): 

102 TYPES = Option.TYPES + ('record_length', 'steim') 

103 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

104 TYPE_CHECKER['record_length'] = check_record_length 

105 TYPE_CHECKER['steim'] = check_steim 

106 

107 

108def process(get_pile, options): 

109 tinc = None 

110 if options.tinc is not None: 

111 if tinc in ('huge', 'auto'): 

112 tinc = options.tinc 

113 else: 

114 try: 

115 tinc = str_to_seconds(options.tinc) 

116 except Exception: 

117 die('invalid argument to --tinc') 

118 

119 tmin = None 

120 if options.tmin is not None: 

121 try: 

122 tmin = stt(options.tmin) 

123 except Exception: 

124 die('invalid argument to --tmin. ' 

125 'Expected format is "%s" % tfmt') 

126 

127 tmax = None 

128 if options.tmax is not None: 

129 try: 

130 tmax = stt(options.tmax) 

131 except Exception: 

132 die('invalid argument to --tmax. ' 

133 'Expected format is "%s"' % tfmt) 

134 

135 target_deltat = None 

136 if options.downsample is not None: 

137 try: 

138 target_deltat = 1.0 / float(options.downsample) 

139 except Exception: 

140 die('invalid argument to --downsample') 

141 

142 replacements = [] 

143 for k, rename_k, in [ 

144 ('network', options.rename_network), 

145 ('station', options.rename_station), 

146 ('location', options.rename_location), 

147 ('channel', options.rename_channel)]: 

148 

149 for patrep in rename_k: 

150 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

151 if not m: 

152 die('invalid argument to --rename-%s. ' 

153 'Expected format is /PATTERN/REPLACEMENT/' % k) 

154 

155 replacements.append((k, m.group(1), m.group(2))) 

156 

157 sx = None 

158 if options.station_xml_fns: 

159 sxs = [] 

160 for station_xml_fn in options.station_xml_fns: 

161 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

162 

163 sx = stationxml.primitive_merge(sxs) 

164 

165 events = [] 

166 for event_fn in options.event_fns: 

167 events.extend(model.load_events(event_fn)) 

168 

169 p = get_pile() 

170 

171 if p.tmin is None: 

172 die('data selection is empty') 

173 

174 if tinc == 'auto': 

175 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

176 

177 if options.snap: 

178 if tmin is None: 

179 tmin = p.tmin 

180 

181 if tinc is not None: 

182 tmin = int(math.floor(tmin / tinc)) * tinc 

183 

184 output_path = options.output_path 

185 output_dir = options.output_dir 

186 

187 if output_path and not output_dir and os.path.isdir(output_path): 

188 output_dir = output_path # compat. with old behaviour 

189 

190 if output_dir and not output_path: 

191 output_path = 'trace_%(network)s-%(station)s-' \ 

192 '%(location)s-%(channel)s_%(wmin)s.' + \ 

193 options.output_format 

194 

195 if output_dir and output_path: 

196 output_path = pjoin(output_dir, output_path) 

197 

198 if not output_path: 

199 die('--output not given') 

200 

201 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

202 tfade_factor = 2.0 

203 ffade_factor = 1.5 

204 if options.output_quantity: 

205 tpad = 2*tfade_factor/fmin 

206 else: 

207 tpad = 0. 

208 

209 if target_deltat is not None: 

210 tpad += target_deltat * 10. 

211 

212 if tinc is None: 

213 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

214 / p.get_deltatmin() > 100000000.: 

215 die('use --tinc=huge to really produce such large output files ' 

216 'or use --tinc=INC to split into smaller files.') 

217 

218 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

219 

220 if options.traversal == 'channel-by-channel': 

221 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

222 

223 elif options.traversal == 'station-by-station': 

224 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

225 

226 else: 

227 it = p.chopper(**kwargs) 

228 

229 abort = [] 

230 

231 def got_sigint(signum, frame): 

232 abort.append(True) 

233 

234 old = signal.signal(signal.SIGINT, got_sigint) 

235 

236 save_kwargs = {} 

237 if options.output_format == 'mseed': 

238 save_kwargs['record_length'] = options.record_length 

239 save_kwargs['steim'] = options.steim 

240 

241 def process_traces(batch): 

242 traces = batch.traces 

243 if traces: 

244 twmin = batch.tmin 

245 twmax = batch.tmax 

246 logger.info('processing %s - %s, %i traces' % 

247 (tts(twmin), tts(twmax), len(traces))) 

248 

249 if options.sample_snap: 

250 for tr in traces: 

251 tr.snap(interpolate=options.sample_snap == 'interpolate') 

252 

253 if target_deltat is not None: 

254 out_traces = [] 

255 for tr in traces: 

256 try: 

257 tr.downsample_to( 

258 target_deltat, snap=True, demean=False) 

259 

260 if options.output_data_type == 'same': 

261 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

262 

263 out_traces.append(tr) 

264 

265 except (trace.TraceTooShort, trace.NoData): 

266 pass 

267 

268 traces = out_traces 

269 

270 if options.output_quantity: 

271 out_traces = [] 

272 tfade = tfade_factor / fmin 

273 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

274 for tr in traces: 

275 try: 

276 response = sx.get_pyrocko_response( 

277 tr.nslc_id, 

278 timespan=(tr.tmin, tr.tmax), 

279 fake_input_units=output_units[ 

280 options.output_quantity]) 

281 

282 rest_tr = tr.transfer( 

283 tfade, ftap, response, invert=True, demean=True) 

284 

285 out_traces.append(rest_tr) 

286 

287 except stationxml.NoResponseInformation as e: 

288 logger.warning( 

289 'Cannot restitute: %s (no response)' % str(e)) 

290 

291 except stationxml.MultipleResponseInformation as e: 

292 logger.warning( 

293 'Cannot restitute: %s (multiple responses found)' 

294 % str(e)) 

295 

296 except (trace.TraceTooShort, trace.NoData): 

297 logger.warning( 

298 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

299 

300 traces = out_traces 

301 

302 if options.scale is not None: 

303 for tr in traces: 

304 tr.ydata = options.scale * tr.ydata 

305 

306 if options.output_data_type != 'same': 

307 for tr in traces: 

308 tr.ydata = tr.ydata.astype( 

309 name_to_dtype[options.output_data_type]) 

310 

311 if replacements: 

312 for tr in traces: 

313 r = {} 

314 for k, pat, repl in replacements: 

315 oldval = getattr(tr, k) 

316 newval, n = re.subn(pat, repl, oldval) 

317 if n: 

318 r[k] = newval 

319 

320 tr.set_codes(**r) 

321 

322 if output_path: 

323 otraces = [] 

324 for tr in traces: 

325 try: 

326 otr = tr.chop(twmin, twmax, inplace=False) 

327 otraces.append(otr) 

328 except trace.NoData: 

329 pass 

330 

331 try: 

332 io.save(otraces, output_path, format=options.output_format, 

333 overwrite=options.force, 

334 additional=dict( 

335 wmin_year=tts(twmin, format='%Y'), 

336 wmin_month=tts(twmin, format='%m'), 

337 wmin_day=tts(twmin, format='%d'), 

338 wmin_jday=tts(twmin, format='%j'), 

339 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

340 wmax_year=tts(twmax, format='%Y'), 

341 wmax_month=tts(twmax, format='%m'), 

342 wmax_day=tts(twmax, format='%d'), 

343 wmax_jday=tts(twmax, format='%j'), 

344 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

345 **save_kwargs) 

346 except io.FileSaveError as e: 

347 die(str(e)) 

348 

349 for batch in it: 

350 process_traces(batch) 

351 if abort: 

352 break 

353 

354 signal.signal(signal.SIGINT, old) 

355 

356 if abort: 

357 die('interrupted.') 

358 

359 

360def main(args=None): 

361 if args is None: 

362 args = sys.argv[1:] 

363 

364 parser = OptionParser( 

365 usage=usage, 

366 description=description, 

367 option_class=JackseisOptions, 

368 formatter=util.BetterHelpFormatter()) 

369 

370 parser.add_option( 

371 '--format', 

372 dest='format', 

373 default='detect', 

374 choices=io.allowed_formats('load'), 

375 help='assume input files are of given FORMAT. Choices: %s' % 

376 io.allowed_formats('load', 'cli_help', 'detect')) 

377 

378 parser.add_option( 

379 '--pattern', 

380 dest='regex', 

381 metavar='REGEX', 

382 help='only include files whose paths match REGEX') 

383 

384 parser.add_option( 

385 '--stationxml', 

386 dest='station_xml_fns', 

387 action='append', 

388 default=[], 

389 metavar='STATIONXML', 

390 help='read station metadata from file STATIONXML') 

391 

392 parser.add_option( 

393 '--event', '--events', 

394 dest='event_fns', 

395 action='append', 

396 default=[], 

397 metavar='EVENT', 

398 help='read event information from file EVENT') 

399 

400 parser.add_option( 

401 '--cache', 

402 dest='cache_dir', 

403 default=config.config().cache_dir, 

404 metavar='DIR', 

405 help='use directory DIR to cache trace metadata ' 

406 "(default='%default')") 

407 

408 parser.add_option( 

409 '--quiet', 

410 dest='quiet', 

411 action='store_true', 

412 default=False, 

413 help='disable output of progress information') 

414 

415 parser.add_option( 

416 '--debug', 

417 dest='debug', 

418 action='store_true', 

419 default=False, 

420 help='print debugging information to stderr') 

421 

422 parser.add_option( 

423 '--tmin', 

424 dest='tmin', 

425 help='start time as "%s"' % tfmt) 

426 

427 parser.add_option( 

428 '--tmax', 

429 dest='tmax', 

430 help='end time as "%s"' % tfmt) 

431 

432 parser.add_option( 

433 '--tinc', 

434 dest='tinc', 

435 help='set time length of output files [s] or "auto" to automatically ' 

436 'choose an appropriate time increment or "huge" to allow to use ' 

437 'a lot of system memory to merge input traces into huge output ' 

438 'files.') 

439 

440 parser.add_option( 

441 '--downsample', 

442 dest='downsample', 

443 metavar='RATE', 

444 help='downsample to RATE [Hz]') 

445 

446 parser.add_option( 

447 '--scale', 

448 dest='scale', 

449 type=float, 

450 metavar='FACTOR', 

451 help='scale seismograms with given FACTOR. Changes data type to ' 

452 'float64. Use --output-data-type to change again after scaling.') 

453 

454 parser.add_option( 

455 '--output', 

456 dest='output_path', 

457 metavar='TEMPLATE', 

458 help='set output path to TEMPLATE. Available placeholders ' 

459 'are %n: network, %s: station, %l: location, %c: channel, ' 

460 '%b: begin time, %e: end time, %j: julian day of year. The ' 

461 'following additional placeholders use the window begin and end ' 

462 'times rather than trace begin and end times (to suppress ' 

463 'producing many small files for gappy traces), %(wmin_year)s, ' 

464 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

465 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

466 "%(wmax_jday)s. Example: --output='data/%s/trace-%s-%c.mseed'") 

467 

468 parser.add_option( 

469 '--output-dir', 

470 metavar='TEMPLATE', 

471 dest='output_dir', 

472 help='set output directory to TEMPLATE (see --output for details) ' 

473 'and automatically choose filenames. ' 

474 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

475 

476 parser.add_option( 

477 '--output-format', 

478 dest='output_format', 

479 default='mseed', 

480 choices=io.allowed_formats('save'), 

481 help='set output file format. Choices: %s' % 

482 io.allowed_formats('save', 'cli_help', 'mseed')) 

483 

484 parser.add_option( 

485 '--force', 

486 dest='force', 

487 action='store_true', 

488 default=False, 

489 help='force overwriting of existing files') 

490 

491 parser.add_option( 

492 '--no-snap', 

493 dest='snap', 

494 action='store_false', 

495 default=True, 

496 help='do not start output files at even multiples of file length') 

497 

498 parser.add_option( 

499 '--traversal', 

500 dest='traversal', 

501 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

502 default='station-by-station', 

503 help='set traversal order for traces processing. ' 

504 "Choices are 'station-by-station' [default], " 

505 "'channel-by-channel', and 'chronological'. Chronological " 

506 'traversal uses more processing memory but makes it possible to ' 

507 'join multiple stations into single output files') 

508 

509 parser.add_option( 

510 '--rename-network', 

511 action='append', 

512 default=[], 

513 dest='rename_network', 

514 metavar='/PATTERN/REPLACEMENT/', 

515 help='update network code, can be given more than once') 

516 

517 parser.add_option( 

518 '--rename-station', 

519 action='append', 

520 default=[], 

521 dest='rename_station', 

522 metavar='/PATTERN/REPLACEMENT/', 

523 help='update station code, can be given more than once') 

524 

525 parser.add_option( 

526 '--rename-location', 

527 action='append', 

528 default=[], 

529 dest='rename_location', 

530 metavar='/PATTERN/REPLACEMENT/', 

531 help='update location code, can be given more than once') 

532 

533 parser.add_option( 

534 '--rename-channel', 

535 action='append', 

536 default=[], 

537 dest='rename_channel', 

538 metavar='/PATTERN/REPLACEMENT/', 

539 help='update channel code, can be given more than once') 

540 

541 parser.add_option( 

542 '--output-data-type', 

543 dest='output_data_type', 

544 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

545 default='same', 

546 metavar='DTYPE', 

547 help='set data type. Choices: same [default], int32, ' 

548 'int64, float32, float64. The output file format must support ' 

549 'the given type.') 

550 

551 parser.add_option( 

552 '--output-steim', 

553 dest='steim', 

554 type='steim', 

555 default=2, 

556 metavar='STEIM_COMPRESSION', 

557 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

558 'Default is STEIM-2, which can compress full range int32. ' 

559 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

560 

561 parser.add_option( 

562 '--output-record-length', 

563 dest='record_length', 

564 type='record_length', 

565 default=4096, 

566 metavar='RECORD_LENGTH', 

567 help='set the mseed record length in bytes. Choices: %s. ' 

568 'Default is 4096 bytes, which is commonly used for archiving.' 

569 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

570 

571 quantity_choices = ('acceleration', 'velocity', 'displacement') 

572 parser.add_option( 

573 '--output-quantity', 

574 dest='output_quantity', 

575 choices=quantity_choices, 

576 help='deconvolve instrument transfer function. Choices: %s' 

577 % ', '.join(quantity_choices)) 

578 

579 parser.add_option( 

580 '--restitution-frequency-band', 

581 default='0.001,100.0', 

582 dest='str_fmin_fmax', 

583 metavar='FMIN,FMAX', 

584 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

585 'Default: "%default"') 

586 

587 sample_snap_choices = ('shift', 'interpolate') 

588 parser.add_option( 

589 '--sample-snap', 

590 dest='sample_snap', 

591 choices=sample_snap_choices, 

592 help='shift/interpolate traces so that samples are at even multiples ' 

593 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

594 

595 (options, args) = parser.parse_args(args) 

596 

597 if len(args) == 0: 

598 parser.print_help() 

599 sys.exit(1) 

600 

601 if options.debug: 

602 util.setup_logging(program_name, 'debug') 

603 elif options.quiet: 

604 util.setup_logging(program_name, 'warning') 

605 else: 

606 util.setup_logging(program_name, 'info') 

607 

608 def get_pile(): 

609 return pile.make_pile( 

610 paths=args, 

611 selector=None, 

612 regex=options.regex, 

613 fileformat=options.format, 

614 cachedirname=options.cache_dir, 

615 show_progress=not options.quiet) 

616 

617 return process(get_pile, options) 

618 

619 

620if __name__ == '__main__': 

621 main()