1#!/usr/bin/env python 

2# http://pyrocko.org - GPLv3 

3# 

4# The Pyrocko Developers, 21st Century 

5# ---|P------/S----------~Lg---------- 

6from __future__ import absolute_import, division 

7 

8import sys 

9import re 

10import os 

11import logging 

12import signal 

13import math 

14from copy import copy 

15from optparse import OptionParser, Option, OptionValueError 

16 

17import numpy as num 

18 

19from pyrocko import util, config, pile, model, io, trace 

20from pyrocko.io import stationxml 

21 

22pjoin = os.path.join 

23 

24logger = logging.getLogger('pyrocko.apps.jackseis') 

25 

26program_name = 'jackseis' 

27 

28usage = program_name + ' <inputs> ... [options]' 

29 

30description = '''A simple tool to manipulate waveform archive data.''' 

31 

32tfmt = 'YYYY-mm-dd HH:MM:SS[.xxx]' 

33tts = util.time_to_str 

34stt = util.str_to_time 

35 

36 

37def die(message): 

38 sys.exit('%s: error: %s' % (program_name, message)) 

39 

40 

41name_to_dtype = { 

42 'int32': num.int32, 

43 'int64': num.int64, 

44 'float32': num.float32, 

45 'float64': num.float64} 

46 

47 

48output_units = { 

49 'acceleration': 'M/S**2', 

50 'velocity': 'M/S', 

51 'displacement': 'M'} 

52 

53 

54def str_to_seconds(s): 

55 if s.endswith('s'): 

56 return float(s[:-1]) 

57 elif s.endswith('m'): 

58 return float(s[:-1])*60. 

59 elif s.endswith('h'): 

60 return float(s[:-1])*3600. 

61 elif s.endswith('d'): 

62 return float(s[:-1])*3600.*24. 

63 else: 

64 return float(s) 

65 

66 

67def nice_seconds_floor(s): 

68 nice = [1., 10., 60., 600., 3600., 3.*3600., 12*3600., 24*3600., 48*3600.] 

69 p = s 

70 for x in nice: 

71 if s < x: 

72 return p 

73 

74 p = x 

75 

76 return s 

77 

78 

79def check_record_length(option, opt, value): 

80 try: 

81 reclen = int(value) 

82 if reclen in io.mseed.VALID_RECORD_LENGTHS: 

83 return reclen 

84 except Exception: 

85 pass 

86 raise OptionValueError( 

87 'invalid record length %s. (choose from %s)' 

88 % (reclen, ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS))) 

89 

90 

91def check_steim(option, opt, value): 

92 try: 

93 compression = int(value) 

94 if compression in (1, 2): 

95 return compression 

96 except Exception: 

97 pass 

98 raise OptionValueError( 

99 'invalid STEIM compression %s. (choose from 1, 2)' % compression) 

100 

101 

102class JackseisOptions(Option): 

103 TYPES = Option.TYPES + ('record_length', 'steim') 

104 TYPE_CHECKER = copy(Option.TYPE_CHECKER) 

105 TYPE_CHECKER['record_length'] = check_record_length 

106 TYPE_CHECKER['steim'] = check_steim 

107 

108 

109def main(args=None): 

110 if args is None: 

111 args = sys.argv[1:] 

112 

113 parser = OptionParser( 

114 usage=usage, 

115 description=description, 

116 option_class=JackseisOptions, 

117 formatter=util.BetterHelpFormatter()) 

118 

119 parser.add_option( 

120 '--format', 

121 dest='format', 

122 default='detect', 

123 choices=io.allowed_formats('load'), 

124 help='assume input files are of given FORMAT. Choices: %s' % 

125 io.allowed_formats('load', 'cli_help', 'detect')) 

126 

127 parser.add_option( 

128 '--pattern', 

129 dest='regex', 

130 metavar='REGEX', 

131 help='only include files whose paths match REGEX') 

132 

133 parser.add_option( 

134 '--stationxml', 

135 dest='station_xml_fns', 

136 action='append', 

137 default=[], 

138 metavar='STATIONXML', 

139 help='read station metadata from file STATIONXML') 

140 

141 parser.add_option( 

142 '--event', '--events', 

143 dest='event_fns', 

144 action='append', 

145 default=[], 

146 metavar='EVENT', 

147 help='read event information from file EVENT') 

148 

149 parser.add_option( 

150 '--cache', 

151 dest='cache_dir', 

152 default=config.config().cache_dir, 

153 metavar='DIR', 

154 help='use directory DIR to cache trace metadata ' 

155 '(default=\'%default\')') 

156 

157 parser.add_option( 

158 '--quiet', 

159 dest='quiet', 

160 action='store_true', 

161 default=False, 

162 help='disable output of progress information') 

163 

164 parser.add_option( 

165 '--debug', 

166 dest='debug', 

167 action='store_true', 

168 default=False, 

169 help='print debugging information to stderr') 

170 

171 parser.add_option( 

172 '--tmin', 

173 dest='tmin', 

174 help='start time as "%s"' % tfmt) 

175 

176 parser.add_option( 

177 '--tmax', 

178 dest='tmax', 

179 help='end time as "%s"' % tfmt) 

180 

181 parser.add_option( 

182 '--tinc', 

183 dest='tinc', 

184 help='set time length of output files [s] or "auto" to automatically ' 

185 'choose an appropriate time increment or "huge" to allow to use ' 

186 'a lot of system memory to merge input traces into huge output ' 

187 'files.') 

188 

189 parser.add_option( 

190 '--downsample', 

191 dest='downsample', 

192 metavar='RATE', 

193 help='downsample to RATE [Hz]') 

194 

195 parser.add_option( 

196 '--output', 

197 dest='output_path', 

198 metavar='TEMPLATE', 

199 help='set output path to TEMPLATE. Available placeholders ' 

200 'are %n: network, %s: station, %l: location, %c: channel, ' 

201 '%b: begin time, %e: end time, %j: julian day of year. The ' 

202 'following additional placeholders use the window begin and end ' 

203 'times rather than trace begin and end times (to suppress ' 

204 'producing many small files for gappy traces), %(wmin_year)s, ' 

205 '%(wmin_month)s, %(wmin_day)s, %(wmin)s, %(wmin_jday)s, ' 

206 '%(wmax_year)s, %(wmax_month)s, %(wmax_day)s, %(wmax)s, ' 

207 '%(wmax_jday)s. Example: --output=\'data/%s/trace-%s-%c.mseed\'') 

208 

209 parser.add_option( 

210 '--output-dir', 

211 metavar='TEMPLATE', 

212 dest='output_dir', 

213 help='set output directory to TEMPLATE (see --output for details) ' 

214 'and automatically choose filenames. ' 

215 'Produces files like TEMPLATE/NET-STA-LOC-CHA_BEGINTIME.FORMAT') 

216 

217 parser.add_option( 

218 '--output-format', 

219 dest='output_format', 

220 default='mseed', 

221 choices=io.allowed_formats('save'), 

222 help='set output file format. Choices: %s' % 

223 io.allowed_formats('save', 'cli_help', 'mseed')) 

224 

225 parser.add_option( 

226 '--force', 

227 dest='force', 

228 action='store_true', 

229 default=False, 

230 help='force overwriting of existing files') 

231 

232 parser.add_option( 

233 '--no-snap', 

234 dest='snap', 

235 action='store_false', 

236 default=True, 

237 help='do not start output files at even multiples of file length') 

238 

239 parser.add_option( 

240 '--traversal', 

241 dest='traversal', 

242 choices=('station-by-station', 'channel-by-channel', 'chronological'), 

243 default='station-by-station', 

244 help='set traversal order for traces processing. ' 

245 'Choices are \'station-by-station\' [default], ' 

246 '\'channel-by-channel\', and \'chronological\'. Chronological ' 

247 'traversal uses more processing memory but makes it possible to ' 

248 'join multiple stations into single output files') 

249 

250 parser.add_option( 

251 '--rename-network', 

252 action='append', 

253 default=[], 

254 dest='rename_network', 

255 metavar='/PATTERN/REPLACEMENT/', 

256 help='update network code, can be given more than once') 

257 

258 parser.add_option( 

259 '--rename-station', 

260 action='append', 

261 default=[], 

262 dest='rename_station', 

263 metavar='/PATTERN/REPLACEMENT/', 

264 help='update station code, can be given more than once') 

265 

266 parser.add_option( 

267 '--rename-location', 

268 action='append', 

269 default=[], 

270 dest='rename_location', 

271 metavar='/PATTERN/REPLACEMENT/', 

272 help='update location code, can be given more than once') 

273 

274 parser.add_option( 

275 '--rename-channel', 

276 action='append', 

277 default=[], 

278 dest='rename_channel', 

279 metavar='/PATTERN/REPLACEMENT/', 

280 help='update channel code, can be given more than once') 

281 

282 parser.add_option( 

283 '--output-data-type', 

284 dest='output_data_type', 

285 choices=('same', 'int32', 'int64', 'float32', 'float64'), 

286 default='same', 

287 metavar='DTYPE', 

288 help='set data type. Choices: same [default], int32, ' 

289 'int64, float32, float64. The output file format must support ' 

290 'the given type.') 

291 

292 parser.add_option( 

293 '--output-steim', 

294 dest='steim', 

295 type='steim', 

296 default=2, 

297 metavar='STEIM_COMPRESSION', 

298 help='set the mseed STEIM compression. Choices: 1 or 2. ' 

299 'Default is STEIM-2, which can compress full range int32. ' 

300 'NOTE: STEIM-2 is limited to 30 bit dynamic range.') 

301 

302 parser.add_option( 

303 '--output-record-length', 

304 dest='record_length', 

305 type='record_length', 

306 default=4096, 

307 metavar='RECORD_LENGTH', 

308 help='set the mseed record length in bytes. Choices: %s. ' 

309 'Default is 4096 bytes, which is commonly used for archiving.' 

310 % ', '.join(str(b) for b in io.mseed.VALID_RECORD_LENGTHS)) 

311 

312 quantity_choices = ('acceleration', 'velocity', 'displacement') 

313 parser.add_option( 

314 '--output-quantity', 

315 dest='output_quantity', 

316 choices=quantity_choices, 

317 help='deconvolve instrument transfer function. Choices: %s' 

318 % ', '.join(quantity_choices)) 

319 

320 parser.add_option( 

321 '--restitution-frequency-band', 

322 default='0.001,100.0', 

323 dest='str_fmin_fmax', 

324 metavar='FMIN,FMAX', 

325 help='frequency band for instrument deconvolution as FMIN,FMAX in Hz. ' 

326 'Default: "%default"') 

327 

328 sample_snap_choices = ('shift', 'interpolate') 

329 parser.add_option( 

330 '--sample-snap', 

331 dest='sample_snap', 

332 choices=sample_snap_choices, 

333 help='shift/interpolate traces so that samples are at even multiples ' 

334 'of sampling rate. Choices: %s' % ', '.join(sample_snap_choices)) 

335 

336 (options, args) = parser.parse_args(args) 

337 

338 if len(args) == 0: 

339 parser.print_help() 

340 sys.exit(1) 

341 

342 if options.debug: 

343 util.setup_logging(program_name, 'debug') 

344 elif options.quiet: 

345 util.setup_logging(program_name, 'warning') 

346 else: 

347 util.setup_logging(program_name, 'info') 

348 

349 tinc = None 

350 if options.tinc is not None: 

351 if tinc in ('huge', 'auto'): 

352 tinc = options.tinc 

353 else: 

354 try: 

355 tinc = str_to_seconds(options.tinc) 

356 except Exception: 

357 die('invalid argument to --tinc') 

358 

359 tmin = None 

360 if options.tmin is not None: 

361 try: 

362 tmin = stt(options.tmin) 

363 except Exception: 

364 die('invalid argument to --tmin. ' 

365 'Expected format is ""') 

366 

367 tmax = None 

368 if options.tmax is not None: 

369 try: 

370 tmax = stt(options.tmax) 

371 except Exception: 

372 die('invalid argument to --tmax. ' 

373 'Expected format is "%s"' % tfmt) 

374 

375 target_deltat = None 

376 if options.downsample is not None: 

377 try: 

378 target_deltat = 1.0 / float(options.downsample) 

379 except Exception: 

380 die('invalid argument to --downsample') 

381 

382 replacements = [] 

383 for k, rename_k, in [ 

384 ('network', options.rename_network), 

385 ('station', options.rename_station), 

386 ('location', options.rename_location), 

387 ('channel', options.rename_channel)]: 

388 

389 for patrep in rename_k: 

390 m = re.match(r'/([^/]+)/([^/]*)/', patrep) 

391 if not m: 

392 die('invalid argument to --rename-%s. ' 

393 'Expected format is /PATTERN/REPLACEMENT/' % k) 

394 

395 replacements.append((k, m.group(1), m.group(2))) 

396 

397 sx = None 

398 if options.station_xml_fns: 

399 sxs = [] 

400 for station_xml_fn in options.station_xml_fns: 

401 sxs.append(stationxml.load_xml(filename=station_xml_fn)) 

402 

403 sx = stationxml.primitive_merge(sxs) 

404 

405 events = [] 

406 for event_fn in options.event_fns: 

407 events.extend(model.load_events(event_fn)) 

408 

409 p = pile.make_pile( 

410 paths=args, 

411 selector=None, 

412 regex=options.regex, 

413 fileformat=options.format, 

414 cachedirname=options.cache_dir, 

415 show_progress=not options.quiet) 

416 

417 if p.tmin is None: 

418 die('data selection is empty') 

419 

420 if tinc == 'auto': 

421 tinc = nice_seconds_floor(p.get_deltatmin() * 1000000.) 

422 

423 if options.snap: 

424 if tmin is None: 

425 tmin = p.tmin 

426 

427 if tinc is not None: 

428 tmin = int(math.floor(tmin / tinc)) * tinc 

429 

430 output_path = options.output_path 

431 output_dir = options.output_dir 

432 

433 if output_path and not output_dir and os.path.isdir(output_path): 

434 output_dir = output_path # compat. with old behaviour 

435 

436 if output_dir and not output_path: 

437 output_path = 'trace_%(network)s-%(station)s-' \ 

438 '%(location)s-%(channel)s_%(wmin)s.' + \ 

439 options.output_format 

440 

441 if output_dir and output_path: 

442 output_path = pjoin(output_dir, output_path) 

443 

444 if not output_path: 

445 die('--output not given') 

446 

447 fmin, fmax = map(float, options.str_fmin_fmax.split(',')) 

448 tfade_factor = 2.0 

449 ffade_factor = 1.5 

450 if options.output_quantity: 

451 tpad = 2*tfade_factor/fmin 

452 else: 

453 tpad = 0. 

454 

455 if target_deltat is not None: 

456 tpad += target_deltat * 10. 

457 

458 if tinc is None: 

459 if ((tmax or p.tmax) - (tmin or p.tmin)) \ 

460 / p.get_deltatmin() > 100000000.: 

461 die('use --tinc=huge to really produce such large output files ' 

462 'or use --tinc=INC to split into smaller files.') 

463 

464 kwargs = dict(tmin=tmin, tmax=tmax, tinc=tinc, tpad=tpad, style='batch') 

465 

466 if options.traversal == 'channel-by-channel': 

467 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id, **kwargs) 

468 

469 elif options.traversal == 'station-by-station': 

470 it = p.chopper_grouped(gather=lambda tr: tr.nslc_id[:2], **kwargs) 

471 

472 else: 

473 it = p.chopper(**kwargs) 

474 

475 abort = [] 

476 

477 def got_sigint(signum, frame): 

478 abort.append(True) 

479 

480 old = signal.signal(signal.SIGINT, got_sigint) 

481 

482 save_kwargs = {} 

483 if options.output_format == 'mseed': 

484 save_kwargs['record_length'] = options.record_length 

485 save_kwargs['steim'] = options.steim 

486 

487 for batch in it: 

488 traces = batch.traces 

489 if traces: 

490 twmin = batch.tmin 

491 twmax = batch.tmax 

492 logger.info('processing %s - %s, %i traces' % 

493 (tts(twmin), tts(twmax), len(traces))) 

494 

495 if options.sample_snap: 

496 for tr in traces: 

497 tr.snap(interpolate=options.sample_snap == 'interpolate') 

498 

499 if target_deltat is not None: 

500 out_traces = [] 

501 for tr in traces: 

502 try: 

503 tr.downsample_to( 

504 target_deltat, snap=True, demean=False) 

505 

506 if options.output_data_type == 'same': 

507 tr.ydata = tr.ydata.astype(tr.ydata.dtype) 

508 

509 out_traces.append(tr) 

510 

511 except (trace.TraceTooShort, trace.NoData): 

512 pass 

513 

514 traces = out_traces 

515 

516 if options.output_quantity: 

517 out_traces = [] 

518 tfade = tfade_factor / fmin 

519 ftap = (fmin / ffade_factor, fmin, fmax, ffade_factor * fmax) 

520 for tr in traces: 

521 try: 

522 response = sx.get_pyrocko_response( 

523 tr.nslc_id, 

524 timespan=(tr.tmin, tr.tmax), 

525 fake_input_units=output_units[ 

526 options.output_quantity]) 

527 

528 rest_tr = tr.transfer( 

529 tfade, ftap, response, invert=True, demean=True) 

530 

531 out_traces.append(rest_tr) 

532 

533 except stationxml.NoResponseInformation as e: 

534 logger.warn( 

535 'Cannot restitute: %s (no response)' % str(e)) 

536 

537 except stationxml.MultipleResponseInformation as e: 

538 logger.warn( 

539 'Cannot restitute: %s (multiple responses found)' 

540 % str(e)) 

541 

542 except (trace.TraceTooShort, trace.NoData): 

543 logger.warn( 

544 'Trace too short: %s' % '.'.join(tr.nslc_id)) 

545 

546 traces = out_traces 

547 

548 if options.output_data_type != 'same': 

549 for tr in traces: 

550 tr.ydata = tr.ydata.astype( 

551 name_to_dtype[options.output_data_type]) 

552 

553 if replacements: 

554 for tr in traces: 

555 r = {} 

556 for k, pat, repl in replacements: 

557 oldval = getattr(tr, k) 

558 newval, n = re.subn(pat, repl, oldval) 

559 if n: 

560 r[k] = newval 

561 

562 tr.set_codes(**r) 

563 

564 if output_path: 

565 otraces = [] 

566 for tr in traces: 

567 try: 

568 otr = tr.chop(twmin, twmax, inplace=False) 

569 otraces.append(otr) 

570 except trace.NoData: 

571 pass 

572 

573 try: 

574 io.save(otraces, output_path, format=options.output_format, 

575 overwrite=options.force, 

576 additional=dict( 

577 wmin_year=tts(twmin, format='%Y'), 

578 wmin_month=tts(twmin, format='%m'), 

579 wmin_day=tts(twmin, format='%d'), 

580 wmin_jday=tts(twmin, format='%j'), 

581 wmin=tts(twmin, format='%Y-%m-%d_%H-%M-%S'), 

582 wmax_year=tts(twmax, format='%Y'), 

583 wmax_month=tts(twmax, format='%m'), 

584 wmax_day=tts(twmax, format='%d'), 

585 wmax_jday=tts(twmax, format='%j'), 

586 wmax=tts(twmax, format='%Y-%m-%d_%H-%M-%S')), 

587 **save_kwargs) 

588 except io.FileSaveError as e: 

589 die(str(e)) 

590 

591 if abort: 

592 break 

593 

594 signal.signal(signal.SIGINT, old) 

595 

596 if abort: 

597 die('interrupted.') 

598 

599 

600if __name__ == '__main__': 

601 main(sys.argv[1:])