1#!/usr/bin/env python 

2# http://pyrocko.org - GPLv3 

3# 

4# The Pyrocko Developers, 21st Century 

5# ---|P------/S----------~Lg---------- 

6from __future__ import absolute_import, division 

7 

8import sys 

9import re 

10import os 

11import logging 

12import signal 

13import math 

14from copy import copy 

15from optparse import OptionParser, Option, OptionValueError 

16 

17import numpy as num 

18 

19from pyrocko import util, config, pile, model, io, trace 

20from pyrocko.io import stationxml 

21 

22pjoin = os.path.join 

23 

24logger = logging.getLogger('pyrocko.apps.jackseis') 

25 

26program_name = 'jackseis' 

27 

28usage = program_name + ' <inputs> ... [options]' 

29 

30description = '''A simple tool to manipulate waveform archive data.''' 

31 

32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

33tts = util.time_to_str 

34stt = util.str_to_time_fillup 

35 

36 

37def die(message): 

38 sys.exit('%s: error: %s' % (program_name, message)) 

39 

40 

41name_to_dtype = { 

42 'int32': num.int32, 

43 'int64': num.int64, 

44 'float32': num.float32, 

45 'float64': num.float64} 

46 

47 

48output_units = { 

49 'acceleration': 'M/S**2', 

50 'velocity': 'M/S', 

51 'displacement': 'M'} 

52 

53 

54def str_to_seconds(s): 

55 if s.endswith('s'): 

56 return float(s[:-1]) 

57 elif s.endswith('m'): 

58 return float(s[:-1])*60. 

59 elif s.endswith('h'): 

60 return float(s[:-1])*3600. 

61 elif s.endswith('d'): 

62 return float(s[:-1])*3600.*24. 

63 else: 

64 return float(s) 

65 

66 

67def nice_seconds_floor(s): 

68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

69 p = s 

70 for x in nice: 

71 if s < x: 

72 return p 

73 

74 p = x 

75 

76 return s 

77 

78 

79def check_record_length(option, opt, value): 

80 try: 

81 reclen = int(value) 

82 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

83 return reclen 

84 except Exception: 

85 pass 

86 raise OptionValueError( 

87 'invalid record length %s. (choose from %s)' 

88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

89 

90 

91def check_steim(option, opt, value): 

92 try: 

93 compression = int(value) 

94 if compression in (1, 2): 

95 return compression 

96 except Exception: 

97 pass 

98 raise OptionValueError( 

99 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

100 

101 

102class JackseisOptions(Option): 

103 TYPES = Option.TYPES + ('record_length', 'steim') 

104 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

105 TYPE_CHECKER['record_length'] = check_record_length 

106 TYPE_CHECKER['steim'] = check_steim 

107 

108 

109def process(get_pile, options): 

110 tinc = None 

111 if options.tinc is not None: 

112 if tinc in ('huge', 'auto'): 

113 tinc = options.tinc 

114 else: 

115 try: 

116 tinc = str_to_seconds(options.tinc) 

117 except Exception: 

118 die('invalid argument to --tinc') 

119 

120 tmin = None 

121 if options.tmin is not None: 

122 try: 

123 tmin = stt(options.tmin) 

124 except Exception: 

125 die('invalid argument to --tmin. ' 

126 'Expected format is "%s" % tfmt') 

127 

128 tmax = None 

129 if options.tmax is not None: 

130 try: 

131 tmax = stt(options.tmax) 

132 except Exception: 

133 die('invalid argument to --tmax. ' 

134 'Expected format is "%s"' % tfmt) 

135 

136 target_deltat = None 

137 if options.downsample is not None: 

138 try: 

139 target_deltat = 1.0 / float(options.downsample) 

140 except Exception: 

141 die('invalid argument to --downsample') 

142 

143 replacements = [] 

144 for k, rename_k, in [ 

145 ('network', options.rename_network), 

146 ('station', options.rename_station), 

147 ('location', options.rename_location), 

148 ('channel', options.rename_channel)]: 

149 

150 for patrep in rename_k: 

151 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

152 if not m: 

153 die('invalid argument to --rename-%s. ' 

154 'Expected format is /PATTERN/REPLACEMENT/' % k) 

155 

156 replacements.append((k, m.group(1), m.group(2))) 

157 

158 sx = None 

159 if options.station_xml_fns: 

160 sxs = [] 

161 for station_xml_fn in options.station_xml_fns: 

162 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

163 

164 sx = stationxml.primitive_merge(sxs) 

165 

166 events = [] 

167 for event_fn in options.event_fns: 

168 events.extend(model.load_events(event_fn)) 

169 

170 p = get_pile() 

171 

172 if p.tmin is None: 

173 die('data selection is empty') 

174 

175 if tinc == 'auto': 

176 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

177 

178 if options.snap: 

179 if tmin is None: 

180 tmin = p.tmin 

181 

182 if tinc is not None: 

183 tmin = int(math.floor(tmin / tinc)) * tinc 

184 

185 output_path = options.output_path 

186 output_dir = options.output_dir 

187 

188 if output_path and not output_dir and os.path.isdir(output_path): 

189 output_dir = output_path # compat. with old behaviour 

190 

191 if output_dir and not output_path: 

192 output_path = 'trace_%(network)s-%(station)s-' \ 

193 '%(location)s-%(channel)s_%(wmin)s.' + \ 

194 options.output_format 

195 

196 if output_dir and output_path: 

197 output_path = pjoin(output_dir, output_path) 

198 

199 if not output_path: 

200 die('--output not given') 

201 

202 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

203 tfade_factor = 2.0 

204 ffade_factor = 1.5 

205 if options.output_quantity: 

206 tpad = 2*tfade_factor/fmin 

207 else: 

208 tpad = 0. 

209 

210 if target_deltat is not None: 

211 tpad += target_deltat * 10. 

212 

213 if tinc is None: 

214 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

215 / p.get_deltatmin() > 100000000.: 

216 die('use --tinc=huge to really produce such large output files ' 

217 'or use --tinc=INC to split into smaller files.') 

218 

219 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

220 

221 if options.traversal == 'channel-by-channel': 

222 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

223 

224 elif options.traversal == 'station-by-station': 

225 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

226 

227 else: 

228 it = p.chopper(**kwargs) 

229 

230 abort = [] 

231 

232 def got_sigint(signum, frame): 

233 abort.append(True) 

234 

235 old = signal.signal(signal.SIGINT, got_sigint) 

236 

237 save_kwargs = {} 

238 if options.output_format == 'mseed': 

239 save_kwargs['record_length'] = options.record_length 

240 save_kwargs['steim'] = options.steim 

241 

242 def process_traces(batch): 

243 traces = batch.traces 

244 if traces: 

245 twmin = batch.tmin 

246 twmax = batch.tmax 

247 logger.info('processing %s - %s, %i traces' % 

248 (tts(twmin), tts(twmax), len(traces))) 

249 

250 if options.sample_snap: 

251 for tr in traces: 

252 tr.snap(interpolate=options.sample_snap == 'interpolate') 

253 

254 if target_deltat is not None: 

255 out_traces = [] 

256 for tr in traces: 

257 try: 

258 tr.downsample_to( 

259 target_deltat, snap=True, demean=False) 

260 

261 if options.output_data_type == 'same': 

262 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

263 

264 out_traces.append(tr) 

265 

266 except (trace.TraceTooShort, trace.NoData): 

267 pass 

268 

269 traces = out_traces 

270 

271 if options.output_quantity: 

272 out_traces = [] 

273 tfade = tfade_factor / fmin 

274 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

275 for tr in traces: 

276 try: 

277 response = sx.get_pyrocko_response( 

278 tr.nslc_id, 

279 timespan=(tr.tmin, tr.tmax), 

280 fake_input_units=output_units[ 

281 options.output_quantity]) 

282 

283 rest_tr = tr.transfer( 

284 tfade, ftap, response, invert=True, demean=True) 

285 

286 out_traces.append(rest_tr) 

287 

288 except stationxml.NoResponseInformation as e: 

289 logger.warning( 

290 'Cannot restitute: %s (no response)' % str(e)) 

291 

292 except stationxml.MultipleResponseInformation as e: 

293 logger.warning( 

294 'Cannot restitute: %s (multiple responses found)' 

295 % str(e)) 

296 

297 except (trace.TraceTooShort, trace.NoData): 

298 logger.warning( 

299 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

300 

301 traces = out_traces 

302 

303 if options.output_data_type != 'same': 

304 for tr in traces: 

305 tr.ydata = tr.ydata.astype( 

306 name_to_dtype[options.output_data_type]) 

307 

308 if replacements: 

309 for tr in traces: 

310 r = {} 

311 for k, pat, repl in replacements: 

312 oldval = getattr(tr, k) 

313 newval, n = re.subn(pat, repl, oldval) 

314 if n: 

315 r[k] = newval 

316 

317 tr.set_codes(**r) 

318 

319 if output_path: 

320 otraces = [] 

321 for tr in traces: 

322 try: 

323 otr = tr.chop(twmin, twmax, inplace=False) 

324 otraces.append(otr) 

325 except trace.NoData: 

326 pass 

327 

328 try: 

329 io.save(otraces, output_path, format=options.output_format, 

330 overwrite=options.force, 

331 additional=dict( 

332 wmin_year=tts(twmin, format='%Y'), 

333 wmin_month=tts(twmin, format='%m'), 

334 wmin_day=tts(twmin, format='%d'), 

335 wmin_jday=tts(twmin, format='%j'), 

336 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

337 wmax_year=tts(twmax, format='%Y'), 

338 wmax_month=tts(twmax, format='%m'), 

339 wmax_day=tts(twmax, format='%d'), 

340 wmax_jday=tts(twmax, format='%j'), 

341 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

342 **save_kwargs) 

343 except io.FileSaveError as e: 

344 die(str(e)) 

345 

346 for batch in it: 

347 process_traces(batch) 

348 if abort: 

349 break 

350 

351 signal.signal(signal.SIGINT, old) 

352 

353 if abort: 

354 die('interrupted.') 

355 

356 

357def main(args=None): 

358 if args is None: 

359 args = sys.argv[1:] 

360 

361 parser = OptionParser( 

362 usage=usage, 

363 description=description, 

364 option_class=JackseisOptions, 

365 formatter=util.BetterHelpFormatter()) 

366 

367 parser.add_option( 

368 '--format', 

369 dest='format', 

370 default='detect', 

371 choices=io.allowed_formats('load'), 

372 help='assume input files are of given FORMAT. Choices: %s' % 

373 io.allowed_formats('load', 'cli_help', 'detect')) 

374 

375 parser.add_option( 

376 '--pattern', 

377 dest='regex', 

378 metavar='REGEX', 

379 help='only include files whose paths match REGEX') 

380 

381 parser.add_option( 

382 '--stationxml', 

383 dest='station_xml_fns', 

384 action='append', 

385 default=[], 

386 metavar='STATIONXML', 

387 help='read station metadata from file STATIONXML') 

388 

389 parser.add_option( 

390 '--event', '--events', 

391 dest='event_fns', 

392 action='append', 

393 default=[], 

394 metavar='EVENT', 

395 help='read event information from file EVENT') 

396 

397 parser.add_option( 

398 '--cache', 

399 dest='cache_dir', 

400 default=config.config().cache_dir, 

401 metavar='DIR', 

402 help='use directory DIR to cache trace metadata ' 

403 '(default=\'%default\')') 

404 

405 parser.add_option( 

406 '--quiet', 

407 dest='quiet', 

408 action='store_true', 

409 default=False, 

410 help='disable output of progress information') 

411 

412 parser.add_option( 

413 '--debug', 

414 dest='debug', 

415 action='store_true', 

416 default=False, 

417 help='print debugging information to stderr') 

418 

419 parser.add_option( 

420 '--tmin', 

421 dest='tmin', 

422 help='start time as "%s"' % tfmt) 

423 

424 parser.add_option( 

425 '--tmax', 

426 dest='tmax', 

427 help='end time as "%s"' % tfmt) 

428 

429 parser.add_option( 

430 '--tinc', 

431 dest='tinc', 

432 help='set time length of output files [s] or "auto" to automatically ' 

433 'choose an appropriate time increment or "huge" to allow to use ' 

434 'a lot of system memory to merge input traces into huge output ' 

435 'files.') 

436 

437 parser.add_option( 

438 '--downsample', 

439 dest='downsample', 

440 metavar='RATE', 

441 help='downsample to RATE [Hz]') 

442 

443 parser.add_option( 

444 '--output', 

445 dest='output_path', 

446 metavar='TEMPLATE', 

447 help='set output path to TEMPLATE. Available placeholders ' 

448 'are %n: network, %s: station, %l: location, %c: channel, ' 

449 '%b: begin time, %e: end time, %j: julian day of year. The ' 

450 'following additional placeholders use the window begin and end ' 

451 'times rather than trace begin and end times (to suppress ' 

452 'producing many small files for gappy traces), %(wmin_year)s, ' 

453 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

454 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

455 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'') 

456 

457 parser.add_option( 

458 '--output-dir', 

459 metavar='TEMPLATE', 

460 dest='output_dir', 

461 help='set output directory to TEMPLATE (see --output for details) ' 

462 'and automatically choose filenames. ' 

463 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

464 

465 parser.add_option( 

466 '--output-format', 

467 dest='output_format', 

468 default='mseed', 

469 choices=io.allowed_formats('save'), 

470 help='set output file format. Choices: %s' % 

471 io.allowed_formats('save', 'cli_help', 'mseed')) 

472 

473 parser.add_option( 

474 '--force', 

475 dest='force', 

476 action='store_true', 

477 default=False, 

478 help='force overwriting of existing files') 

479 

480 parser.add_option( 

481 '--no-snap', 

482 dest='snap', 

483 action='store_false', 

484 default=True, 

485 help='do not start output files at even multiples of file length') 

486 

487 parser.add_option( 

488 '--traversal', 

489 dest='traversal', 

490 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

491 default='station-by-station', 

492 help='set traversal order for traces processing. ' 

493 'Choices are \'station-by-station\' [default], ' 

494 '\'channel-by-channel\', and \'chronological\'. Chronological ' 

495 'traversal uses more processing memory but makes it possible to ' 

496 'join multiple stations into single output files') 

497 

498 parser.add_option( 

499 '--rename-network', 

500 action='append', 

501 default=[], 

502 dest='rename_network', 

503 metavar='/PATTERN/REPLACEMENT/', 

504 help='update network code, can be given more than once') 

505 

506 parser.add_option( 

507 '--rename-station', 

508 action='append', 

509 default=[], 

510 dest='rename_station', 

511 metavar='/PATTERN/REPLACEMENT/', 

512 help='update station code, can be given more than once') 

513 

514 parser.add_option( 

515 '--rename-location', 

516 action='append', 

517 default=[], 

518 dest='rename_location', 

519 metavar='/PATTERN/REPLACEMENT/', 

520 help='update location code, can be given more than once') 

521 

522 parser.add_option( 

523 '--rename-channel', 

524 action='append', 

525 default=[], 

526 dest='rename_channel', 

527 metavar='/PATTERN/REPLACEMENT/', 

528 help='update channel code, can be given more than once') 

529 

530 parser.add_option( 

531 '--output-data-type', 

532 dest='output_data_type', 

533 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

534 default='same', 

535 metavar='DTYPE', 

536 help='set data type. Choices: same [default], int32, ' 

537 'int64, float32, float64. The output file format must support ' 

538 'the given type.') 

539 

540 parser.add_option( 

541 '--output-steim', 

542 dest='steim', 

543 type='steim', 

544 default=2, 

545 metavar='STEIM_COMPRESSION', 

546 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

547 'Default is STEIM-2, which can compress full range int32. ' 

548 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

549 

550 parser.add_option( 

551 '--output-record-length', 

552 dest='record_length', 

553 type='record_length', 

554 default=4096, 

555 metavar='RECORD_LENGTH', 

556 help='set the mseed record length in bytes. Choices: %s. ' 

557 'Default is 4096 bytes, which is commonly used for archiving.' 

558 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

559 

560 quantity_choices = ('acceleration', 'velocity', 'displacement') 

561 parser.add_option( 

562 '--output-quantity', 

563 dest='output_quantity', 

564 choices=quantity_choices, 

565 help='deconvolve instrument transfer function. Choices: %s' 

566 % ', '.join(quantity_choices)) 

567 

568 parser.add_option( 

569 '--restitution-frequency-band', 

570 default='0.001,100.0', 

571 dest='str_fmin_fmax', 

572 metavar='FMIN,FMAX', 

573 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

574 'Default: "%default"') 

575 

576 sample_snap_choices = ('shift', 'interpolate') 

577 parser.add_option( 

578 '--sample-snap', 

579 dest='sample_snap', 

580 choices=sample_snap_choices, 

581 help='shift/interpolate traces so that samples are at even multiples ' 

582 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

583 

584 (options, args) = parser.parse_args(args) 

585 

586 if len(args) == 0: 

587 parser.print_help() 

588 sys.exit(1) 

589 

590 if options.debug: 

591 util.setup_logging(program_name, 'debug') 

592 elif options.quiet: 

593 util.setup_logging(program_name, 'warning') 

594 else: 

595 util.setup_logging(program_name, 'info') 

596 

597 def get_pile(): 

598 return pile.make_pile( 

599 paths=args, 

600 selector=None, 

601 regex=options.regex, 

602 fileformat=options.format, 

603 cachedirname=options.cache_dir, 

604 show_progress=not options.quiet) 

605 

606 return process(get_pile, options) 

607 

608 

609if __name__ == '__main__': 

610 main(sys.argv[1:])