1import glob 

2import copy 

3import os.path as op 

4import logging 

5import math 

6import numpy as num 

7 

8from collections import defaultdict 

9from pyrocko import util, pile, model, config, trace, \ 

10 marker as pmarker 

11from pyrocko.io.io_common import FileLoadError 

12from pyrocko.fdsn import enhanced_sacpz, station as fs 

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all, 

14 load_all) 

15 

16from pyrocko import gf 

17 

18from .meta import Path, HasPaths, expand_template, GrondError 

19 

20from .synthetic_tests import SyntheticTest 

21 

22guts_prefix = 'grond' 

23logger = logging.getLogger('grond.dataset') 

24 

25 

26def quote_paths(paths): 

27 return ', '.join('"%s"' % path for path in paths) 

28 

29 

30class InvalidObject(Exception): 

31 pass 

32 

33 

34class NotFound(Exception): 

35 def __init__(self, reason, codes=None, time_range=None): 

36 self.reason = reason 

37 self.time_range = time_range 

38 self.codes = codes 

39 

40 def __str__(self): 

41 s = self.reason 

42 if self.codes: 

43 s += ' (%s)' % '.'.join(self.codes) 

44 

45 if self.time_range: 

46 s += ' (%s - %s)' % ( 

47 util.time_to_str(self.time_range[0]), 

48 util.time_to_str(self.time_range[1])) 

49 

50 return s 

51 

52 

53class DatasetError(GrondError): 

54 pass 

55 

56 

57class StationCorrection(Object): 

58 codes = Tuple.T(4, String.T()) 

59 delay = Float.T() 

60 factor = Float.T() 

61 

62 

63def load_station_corrections(filename): 

64 scs = load_all(filename=filename) 

65 for sc in scs: 

66 assert isinstance(sc, StationCorrection) 

67 

68 return scs 

69 

70 

71def dump_station_corrections(station_corrections, filename): 

72 return dump_all(station_corrections, filename=filename) 

73 

74 

75class Dataset(object): 

76 

77 def __init__(self, event_name=None): 

78 self.events = [] 

79 self.pile = pile.Pile() 

80 self.stations = {} 

81 self.responses = defaultdict(list) 

82 self.responses_stationxml = [] 

83 self.clippings = {} 

84 self.blacklist = set() 

85 self.whitelist_nslc = None 

86 self.whitelist_nsl_xx = None 

87 self.whitelist = None 

88 self.station_corrections = {} 

89 self.station_factors = {} 

90 self.pick_markers = [] 

91 self.apply_correction_delays = True 

92 self.apply_correction_factors = True 

93 self.apply_displaced_sampling_workaround = False 

94 self.extend_incomplete = False 

95 self.clip_handling = 'by_nsl' 

96 self.kite_scenes = [] 

97 self.gnss_campaigns = [] 

98 self.synthetic_test = None 

99 self._picks = None 

100 self._cache = {} 

101 self._event_name = event_name 

102 

103 def empty_cache(self): 

104 self._cache = {} 

105 

106 def set_synthetic_test(self, synthetic_test): 

107 self.synthetic_test = synthetic_test 

108 

109 def add_stations( 

110 self, 

111 stations=None, 

112 pyrocko_stations_filename=None, 

113 stationxml_filenames=None): 

114 

115 if stations is not None: 

116 for station in stations: 

117 self.stations[station.nsl()] = station 

118 

119 if pyrocko_stations_filename is not None: 

120 logger.debug( 

121 'Loading stations from file "%s"...' % 

122 pyrocko_stations_filename) 

123 

124 for station in model.load_stations(pyrocko_stations_filename): 

125 self.stations[station.nsl()] = station 

126 

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0: 

128 

129 for stationxml_filename in stationxml_filenames: 

130 if not op.exists(stationxml_filename): 

131 continue 

132 

133 logger.debug( 

134 'Loading stations from StationXML file "%s"...' % 

135 stationxml_filename) 

136 

137 sx = fs.load_xml(filename=stationxml_filename) 

138 ev = self.get_event() 

139 stations = sx.get_pyrocko_stations(time=ev.time) 

140 if len(stations) == 0: 

141 logger.warning( 

142 'No stations found for time %s in file "%s".' % ( 

143 util.time_to_str(ev.time), stationxml_filename)) 

144 

145 for station in stations: 

146 logger.debug('Adding station: %s.%s.%s' % station.nsl()) 

147 channels = station.get_channels() 

148 if len(channels) == 1 and channels[0].name.endswith('Z'): 

149 logger.warning( 

150 'Station "%s" has vertical component' 

151 ' information only, adding mocked channels.' 

152 % station.nsl_string()) 

153 station.add_channel( 

154 model.Channel(channels[0].name[:-1] + 'N')) 

155 station.add_channel( 

156 model.Channel(channels[0].name[:-1] + 'E')) 

157 

158 self.stations[station.nsl()] = station 

159 

160 def add_events(self, events=None, filename=None): 

161 if events is not None: 

162 self.events.extend(events) 

163 

164 if filename is not None: 

165 logger.debug('Loading events from file "%s"...' % filename) 

166 try: 

167 events = model.load_events(filename) 

168 self.events.extend(events) 

169 logger.info( 

170 'Loading events from %s: %i events found.' % 

171 (filename, len(events))) 

172 except Exception as e: 

173 logger.warning('Could not load events from %s!', filename) 

174 raise e 

175 

176 def add_waveforms(self, paths, regex=None, fileformat='detect', 

177 show_progress=False): 

178 cachedirname = config.config().cache_dir 

179 

180 logger.debug('Selecting waveform files %s...' % quote_paths(paths)) 

181 fns = util.select_files(paths, include=regex, 

182 show_progress=show_progress) 

183 cache = pile.get_cache(cachedirname) 

184 logger.debug('Scanning waveform files %s...' % quote_paths(paths)) 

185 self.pile.load_files(sorted(fns), cache=cache, 

186 fileformat=fileformat, 

187 show_progress=show_progress) 

188 

189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): 

190 if sacpz_dirname: 

191 logger.debug( 

192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) 

193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname): 

194 self.responses[x.codes].append(x) 

195 

196 if stationxml_filenames: 

197 for stationxml_filename in stationxml_filenames: 

198 if not op.exists(stationxml_filename): 

199 continue 

200 

201 logger.debug( 

202 'Loading StationXML responses from "%s"...' % 

203 stationxml_filename) 

204 

205 self.responses_stationxml.append( 

206 fs.load_xml(filename=stationxml_filename)) 

207 

208 def add_clippings(self, markers_filename): 

209 markers = pmarker.load_markers(markers_filename) 

210 clippings = {} 

211 for marker in markers: 

212 nslc = marker.one_nslc() 

213 nsl = nslc[:3] 

214 if nsl not in clippings: 

215 clippings[nsl] = [] 

216 

217 if nslc not in clippings: 

218 clippings[nslc] = [] 

219 

220 clippings[nsl].append(marker.tmin) 

221 clippings[nslc].append(marker.tmin) 

222 

223 for k, times in clippings.items(): 

224 atimes = num.array(times, dtype=float) 

225 if k not in self.clippings: 

226 self.clippings[k] = atimes 

227 else: 

228 self.clippings[k] = num.concatenate(self.clippings, atimes) 

229 

230 def add_blacklist(self, blacklist=[], filenames=None): 

231 logger.debug('Loading blacklisted stations...') 

232 if filenames: 

233 blacklist = list(blacklist) 

234 for filename in filenames: 

235 if op.exists(filename): 

236 with open(filename, 'r') as f: 

237 blacklist.extend( 

238 s.strip() for s in f.read().splitlines()) 

239 else: 

240 logger.warning('No such blacklist file: %s' % filename) 

241 

242 for x in blacklist: 

243 if isinstance(x, str): 

244 x = tuple(x.split('.')) 

245 self.blacklist.add(x) 

246 

247 def add_whitelist(self, whitelist=[], filenames=None): 

248 logger.debug('Loading whitelisted stations...') 

249 if filenames: 

250 whitelist = list(whitelist) 

251 for filename in filenames: 

252 with open(filename, 'r') as f: 

253 whitelist.extend(s.strip() for s in f.read().splitlines()) 

254 

255 if self.whitelist_nslc is None: 

256 self.whitelist_nslc = set() 

257 self.whitelist = set() 

258 self.whitelist_nsl_xx = set() 

259 

260 for x in whitelist: 

261 if isinstance(x, str): 

262 x = tuple(x.split('.')) 

263 if len(x) == 4: 

264 self.whitelist_nslc.add(x) 

265 self.whitelist_nsl_xx.add(x[:3]) 

266 else: 

267 self.whitelist.add(x) 

268 

269 def add_station_corrections(self, filename): 

270 self.station_corrections.update( 

271 (sc.codes, sc) for sc in load_station_corrections(filename)) 

272 

273 def add_picks(self, filename): 

274 self.pick_markers.extend( 

275 pmarker.load_markers(filename)) 

276 

277 self._picks = None 

278 

279 def add_gnss_campaigns(self, paths): 

280 paths = util.select_files( 

281 paths, 

282 include=r'\.yml|\.yaml', 

283 show_progress=False) 

284 

285 for path in paths: 

286 self.add_gnss_campaign(filename=path) 

287 

288 def add_gnss_campaign(self, filename): 

289 try: 

290 from pyrocko.model import gnss # noqa 

291 except ImportError: 

292 raise ImportError('Module pyrocko.model.gnss not found,' 

293 ' please upgrade pyrocko!') 

294 logger.debug('Loading GNSS campaign from "%s"...' % filename) 

295 

296 campaign = load_all(filename=filename) 

297 self.gnss_campaigns.append(campaign[0]) 

298 

299 def add_kite_scenes(self, paths): 

300 logger.info('Loading kite InSAR scenes...') 

301 paths = util.select_files( 

302 paths, 

303 include=r'\.npz', 

304 show_progress=False) 

305 

306 for path in paths: 

307 self.add_kite_scene(filename=path) 

308 

309 if not self.kite_scenes: 

310 logger.warning('Could not find any kite scenes at %s.' % 

311 quote_paths(self.kite_scene_paths)) 

312 

313 def add_kite_scene(self, filename): 

314 try: 

315 from kite import Scene 

316 except ImportError: 

317 raise ImportError('Module kite could not be imported,' 

318 ' please install from https://pyrocko.org.') 

319 logger.debug('Loading kite scene from "%s"...' % filename) 

320 

321 scene = Scene.load(filename) 

322 scene._log.setLevel(logger.level) 

323 

324 try: 

325 self.get_kite_scene(scene.meta.scene_id) 

326 except NotFound: 

327 self.kite_scenes.append(scene) 

328 else: 

329 raise AttributeError('Kite scene_id not unique for "%s".' 

330 % filename) 

331 

332 def is_blacklisted(self, obj): 

333 try: 

334 nslc = self.get_nslc(obj) 

335 if nslc in self.blacklist: 

336 return True 

337 

338 except InvalidObject: 

339 pass 

340 

341 nsl = self.get_nsl(obj) 

342 return ( 

343 nsl in self.blacklist or 

344 nsl[1:2] in self.blacklist or 

345 nsl[:2] in self.blacklist) 

346 

347 def is_whitelisted(self, obj): 

348 if self.whitelist_nslc is None: 

349 return True 

350 

351 nsl = self.get_nsl(obj) 

352 

353 if ( 

354 nsl in self.whitelist or 

355 nsl[1:2] in self.whitelist or 

356 nsl[:2] in self.whitelist): 

357 

358 return True 

359 

360 try: 

361 nslc = self.get_nslc(obj) 

362 if nslc in self.whitelist_nslc: 

363 return True 

364 

365 except InvalidObject: 

366 return nsl in self.whitelist_nsl_xx 

367 

368 def has_clipping(self, nsl_or_nslc, tmin, tmax): 

369 if nsl_or_nslc not in self.clippings: 

370 return False 

371 

372 atimes = self.clippings[nsl_or_nslc] 

373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) 

374 

375 def get_nsl(self, obj): 

376 if isinstance(obj, trace.Trace): 

377 net, sta, loc, _ = obj.nslc_id 

378 elif isinstance(obj, model.Station): 

379 net, sta, loc = obj.nsl() 

380 elif isinstance(obj, gf.Target): 

381 net, sta, loc, _ = obj.codes 

382 elif isinstance(obj, tuple) and len(obj) in (3, 4): 

383 net, sta, loc = obj[:3] 

384 else: 

385 raise InvalidObject( 

386 'Cannot get nsl code from given object of type "%s".' 

387 % type(obj)) 

388 

389 return net, sta, loc 

390 

391 def get_nslc(self, obj): 

392 if isinstance(obj, trace.Trace): 

393 return obj.nslc_id 

394 elif isinstance(obj, gf.Target): 

395 return obj.codes 

396 elif isinstance(obj, tuple) and len(obj) == 4: 

397 return obj 

398 else: 

399 raise InvalidObject( 

400 'Cannot get nslc code from given object "%s"' % type(obj)) 

401 

402 def get_tmin_tmax(self, obj): 

403 if isinstance(obj, trace.Trace): 

404 return obj.tmin, obj.tmax 

405 else: 

406 raise InvalidObject( 

407 'Cannot get tmin and tmax from given object of type "%s"' % 

408 type(obj)) 

409 

410 def get_station(self, obj): 

411 if self.is_blacklisted(obj): 

412 raise NotFound('Station is blacklisted:', self.get_nsl(obj)) 

413 

414 if not self.is_whitelisted(obj): 

415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj)) 

416 

417 if isinstance(obj, model.Station): 

418 return obj 

419 

420 net, sta, loc = self.get_nsl(obj) 

421 

422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] 

423 for k in keys: 

424 if k in self.stations: 

425 return self.stations[k] 

426 

427 raise NotFound('No station information:', keys) 

428 

429 def get_stations(self): 

430 return [self.stations[k] for k in sorted(self.stations) 

431 if not self.is_blacklisted(self.stations[k]) 

432 and self.is_whitelisted(self.stations[k])] 

433 

434 def get_kite_scenes(self): 

435 return self.kite_scenes 

436 

437 def get_kite_scene(self, scene_id=None): 

438 if scene_id is None: 

439 if len(self.kite_scenes) == 0: 

440 raise AttributeError('No kite displacements defined.') 

441 return self.kite_scenes[0] 

442 else: 

443 for scene in self.kite_scenes: 

444 if scene.meta.scene_id == scene_id: 

445 return scene 

446 raise NotFound('No kite scene with id "%s" defined.' % scene_id) 

447 

448 def get_gnss_campaigns(self): 

449 return self.gnss_campaigns 

450 

451 def get_gnss_campaign(self, name): 

452 for camp in self.gnss_campaigns: 

453 if camp.name == name: 

454 return camp 

455 raise NotFound('GNSS campaign %s not found!' % name) 

456 

457 def get_response(self, obj, quantity='displacement'): 

458 if (self.responses is None or len(self.responses) == 0) \ 

459 and (self.responses_stationxml is None 

460 or len(self.responses_stationxml) == 0): 

461 

462 raise NotFound('No response information available.') 

463 

464 quantity_to_unit = { 

465 'displacement': 'M', 

466 'velocity': 'M/S', 

467 'acceleration': 'M/S**2'} 

468 

469 if self.is_blacklisted(obj): 

470 raise NotFound('Response is blacklisted:', self.get_nslc(obj)) 

471 

472 if not self.is_whitelisted(obj): 

473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj)) 

474 

475 net, sta, loc, cha = self.get_nslc(obj) 

476 tmin, tmax = self.get_tmin_tmax(obj) 

477 

478 keys_x = [ 

479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] 

480 

481 keys = [] 

482 for k in keys_x: 

483 if k not in keys: 

484 keys.append(k) 

485 

486 candidates = [] 

487 for k in keys: 

488 if k in self.responses: 

489 for x in self.responses[k]: 

490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): 

491 if quantity == 'displacement': 

492 candidates.append(x.response) 

493 elif quantity == 'velocity': 

494 candidates.append(trace.MultiplyResponse([ 

495 x.response, 

496 trace.DifferentiationResponse()])) 

497 elif quantity == 'acceleration': 

498 candidates.append(trace.MultiplyResponse([ 

499 x.response, 

500 trace.DifferentiationResponse(2)])) 

501 else: 

502 assert False 

503 

504 for sx in self.responses_stationxml: 

505 try: 

506 candidates.append( 

507 sx.get_pyrocko_response( 

508 (net, sta, loc, cha), 

509 timespan=(tmin, tmax), 

510 fake_input_units=quantity_to_unit[quantity])) 

511 

512 except (fs.NoResponseInformation, fs.MultipleResponseInformation): 

513 pass 

514 

515 if len(candidates) == 1: 

516 return candidates[0] 

517 

518 elif len(candidates) == 0: 

519 raise NotFound('No response found:', (net, sta, loc, cha)) 

520 else: 

521 raise NotFound('Multiple responses found:', (net, sta, loc, cha)) 

522 

523 def get_waveform_raw( 

524 self, obj, 

525 tmin, 

526 tmax, 

527 tpad=0., 

528 toffset_noise_extract=0., 

529 want_incomplete=False, 

530 extend_incomplete=False): 

531 

532 net, sta, loc, cha = self.get_nslc(obj) 

533 

534 if self.is_blacklisted((net, sta, loc, cha)): 

535 raise NotFound( 

536 'Waveform is blacklisted:', (net, sta, loc, cha)) 

537 

538 if not self.is_whitelisted((net, sta, loc, cha)): 

539 raise NotFound( 

540 'Waveform is not on whitelist:', (net, sta, loc, cha)) 

541 

542 if self.clip_handling == 'by_nsl': 

543 if self.has_clipping((net, sta, loc), tmin, tmax): 

544 raise NotFound( 

545 'Waveform clipped:', (net, sta, loc)) 

546 

547 elif self.clip_handling == 'by_nslc': 

548 if self.has_clipping((net, sta, loc, cha), tmin, tmax): 

549 raise NotFound( 

550 'Waveform clipped:', (net, sta, loc, cha)) 

551 

552 trs = self.pile.all( 

553 tmin=tmin+toffset_noise_extract, 

554 tmax=tmax+toffset_noise_extract, 

555 tpad=tpad, 

556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha), 

557 want_incomplete=want_incomplete or extend_incomplete) 

558 

559 if self.apply_displaced_sampling_workaround: 

560 for tr in trs: 

561 tr.snap() 

562 

563 if toffset_noise_extract != 0.0: 

564 for tr in trs: 

565 tr.shift(-toffset_noise_extract) 

566 

567 if extend_incomplete and len(trs) == 1: 

568 trs[0].extend( 

569 tmin + toffset_noise_extract - tpad, 

570 tmax + toffset_noise_extract + tpad, 

571 fillmethod='repeat') 

572 

573 if not want_incomplete and len(trs) != 1: 

574 if len(trs) == 0: 

575 message = 'Waveform missing or incomplete.' 

576 else: 

577 message = 'Waveform has gaps.' 

578 

579 raise NotFound( 

580 message, 

581 codes=(net, sta, loc, cha), 

582 time_range=( 

583 tmin + toffset_noise_extract - tpad, 

584 tmax + toffset_noise_extract + tpad)) 

585 

586 return trs 

587 

588 def get_waveform_restituted( 

589 self, 

590 obj, quantity='displacement', 

591 tmin=None, tmax=None, tpad=0., 

592 tfade=0., freqlimits=None, deltat=None, 

593 toffset_noise_extract=0., 

594 want_incomplete=False, 

595 extend_incomplete=False): 

596 

597 trs_raw = self.get_waveform_raw( 

598 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade, 

599 toffset_noise_extract=toffset_noise_extract, 

600 want_incomplete=want_incomplete, 

601 extend_incomplete=extend_incomplete) 

602 

603 trs_restituted = [] 

604 for tr in trs_raw: 

605 if deltat is not None: 

606 tr.downsample_to(deltat, snap=True, allow_upsample_max=5) 

607 tr.deltat = deltat 

608 

609 resp = self.get_response(tr, quantity=quantity) 

610 try: 

611 trs_restituted.append( 

612 tr.transfer( 

613 tfade=tfade, freqlimits=freqlimits, 

614 transfer_function=resp, invert=True, demean=True)) 

615 

616 except trace.InfiniteResponse: 

617 raise NotFound( 

618 'Instrument response deconvolution failed ' 

619 '(divide by zero)', tr.nslc_id) 

620 

621 return trs_restituted, trs_raw 

622 

623 def _get_projections( 

624 self, station, backazimuth, source, target, tmin, tmax): 

625 

626 # fill in missing channel information (happens when station file 

627 # does not contain any channel information) 

628 if not station.get_channels(): 

629 station = copy.deepcopy(station) 

630 

631 nsl = station.nsl() 

632 trs = self.pile.all( 

633 tmin=tmin, tmax=tmax, 

634 trace_selector=lambda tr: tr.nslc_id[:3] == nsl, 

635 load_data=False) 

636 

637 channels = list(set(tr.channel for tr in trs)) 

638 station.set_channels_by_name(*channels) 

639 

640 projections = [] 

641 projections.extend(station.guess_projections_to_enu( 

642 out_channels=('E', 'N', 'Z'))) 

643 

644 if source is not None and target is not None: 

645 backazimuth = source.azibazi_to(target)[1] 

646 

647 if backazimuth is not None: 

648 projections.extend(station.guess_projections_to_rtu( 

649 out_channels=('R', 'T', 'Z'), 

650 backazimuth=backazimuth)) 

651 

652 if not projections: 

653 raise NotFound( 

654 'Cannot determine projection of data components:', 

655 station.nsl()) 

656 

657 return projections 

658 

659 def _get_waveform( 

660 self, 

661 obj, quantity='displacement', 

662 tmin=None, tmax=None, tpad=0., 

663 tfade=0., freqlimits=None, deltat=None, cache=None, 

664 backazimuth=None, 

665 source=None, 

666 target=None, 

667 debug=False): 

668 

669 assert not debug or (debug and cache is None) 

670 

671 if cache is True: 

672 cache = self._cache 

673 

674 _, _, _, channel = self.get_nslc(obj) 

675 station = self.get_station(self.get_nsl(obj)) 

676 

677 nslc = station.nsl() + (channel,) 

678 

679 if self.is_blacklisted(nslc): 

680 raise NotFound( 

681 'Waveform is blacklisted:', nslc) 

682 

683 if not self.is_whitelisted(nslc): 

684 raise NotFound( 

685 'Waveform is not on whitelist:', nslc) 

686 

687 assert tmin is not None 

688 assert tmax is not None 

689 

690 tmin = float(tmin) 

691 tmax = float(tmax) 

692 

693 nslc = tuple(nslc) 

694 

695 cache_k = nslc + ( 

696 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) 

697 if cache is not None and (nslc + cache_k) in cache: 

698 obj = cache[nslc + cache_k] 

699 if isinstance(obj, Exception): 

700 raise obj 

701 elif obj is None: 

702 raise NotFound('Waveform not found!', nslc) 

703 else: 

704 return obj 

705 

706 syn_test = self.synthetic_test 

707 toffset_noise_extract = 0.0 

708 if syn_test: 

709 if not syn_test.respect_data_availability: 

710 if syn_test.real_noise_scale != 0.0: 

711 raise DatasetError( 

712 'respect_data_availability=False and ' 

713 'addition of real noise cannot be combined.') 

714 

715 tr = syn_test.get_waveform( 

716 nslc, tmin, tmax, 

717 tfade=tfade, 

718 freqlimits=freqlimits) 

719 

720 if cache is not None: 

721 cache[tr.nslc_id + cache_k] = tr 

722 

723 if debug: 

724 return [], [], [] 

725 else: 

726 return tr 

727 

728 if syn_test.real_noise_scale != 0.0: 

729 toffset_noise_extract = syn_test.real_noise_toffset 

730 

731 abs_delays = [] 

732 for ocha in 'ENZRT': 

733 sc = self.station_corrections.get(station.nsl() + (channel,), None) 

734 if sc: 

735 abs_delays.append(abs(sc.delay)) 

736 

737 if abs_delays: 

738 abs_delay_max = max(abs_delays) 

739 else: 

740 abs_delay_max = 0.0 

741 

742 projections = self._get_projections( 

743 station, backazimuth, source, target, tmin, tmax) 

744 

745 try: 

746 trs_projected = [] 

747 trs_restituted = [] 

748 trs_raw = [] 

749 exceptions = [] 

750 for matrix, in_channels, out_channels in projections: 

751 deps = trace.project_dependencies( 

752 matrix, in_channels, out_channels) 

753 

754 try: 

755 trs_restituted_group = [] 

756 trs_raw_group = [] 

757 if channel in deps: 

758 for cha in deps[channel]: 

759 trs_restituted_this, trs_raw_this = \ 

760 self.get_waveform_restituted( 

761 station.nsl() + (cha,), 

762 quantity=quantity, 

763 tmin=tmin, tmax=tmax, 

764 tpad=tpad+abs_delay_max, 

765 toffset_noise_extract=toffset_noise_extract, # noqa 

766 tfade=tfade, 

767 freqlimits=freqlimits, 

768 deltat=deltat, 

769 want_incomplete=debug, 

770 extend_incomplete=self.extend_incomplete) 

771 

772 trs_restituted_group.extend(trs_restituted_this) 

773 trs_raw_group.extend(trs_raw_this) 

774 

775 trs_projected.extend( 

776 trace.project( 

777 trs_restituted_group, matrix, 

778 in_channels, out_channels)) 

779 

780 trs_restituted.extend(trs_restituted_group) 

781 trs_raw.extend(trs_raw_group) 

782 

783 except NotFound as e: 

784 exceptions.append((in_channels, out_channels, e)) 

785 

786 if not trs_projected: 

787 err = [] 

788 for (in_channels, out_channels, e) in exceptions: 

789 sin = ', '.join(c.name for c in in_channels) 

790 sout = ', '.join(c.name for c in out_channels) 

791 err.append('(%s) -> (%s): %s' % (sin, sout, e)) 

792 

793 raise NotFound('\n'.join(err)) 

794 

795 for tr in trs_projected: 

796 sc = self.station_corrections.get(tr.nslc_id, None) 

797 if sc: 

798 if self.apply_correction_factors: 

799 tr.ydata /= sc.factor 

800 

801 if self.apply_correction_delays: 

802 tr.shift(-sc.delay) 

803 

804 if tmin is not None and tmax is not None: 

805 tr.chop(tmin, tmax) 

806 

807 if syn_test: 

808 trs_projected_synthetic = [] 

809 for tr in trs_projected: 

810 if tr.channel == channel: 

811 tr_syn = syn_test.get_waveform( 

812 tr.nslc_id, tmin, tmax, 

813 tfade=tfade, freqlimits=freqlimits) 

814 

815 if tr_syn: 

816 if syn_test.real_noise_scale != 0.0: 

817 tr_syn = tr_syn.copy() 

818 tr_noise = tr.copy() 

819 tr_noise.set_ydata( 

820 tr_noise.get_ydata() 

821 * syn_test.real_noise_scale) 

822 

823 tr_syn.add(tr_noise) 

824 

825 trs_projected_synthetic.append(tr_syn) 

826 

827 trs_projected = trs_projected_synthetic 

828 

829 if cache is not None: 

830 for tr in trs_projected: 

831 cache[tr.nslc_id + cache_k] = tr 

832 

833 tr_return = None 

834 for tr in trs_projected: 

835 if tr.channel == channel: 

836 tr_return = tr 

837 

838 if debug: 

839 return trs_projected, trs_restituted, trs_raw, tr_return 

840 

841 elif tr_return: 

842 return tr_return 

843 

844 else: 

845 raise NotFound( 

846 'waveform not available', station.nsl() + (channel,)) 

847 

848 except NotFound: 

849 if cache is not None: 

850 cache[nslc + cache_k] = None 

851 raise 

852 

853 def get_waveform(self, obj, tinc_cache=None, **kwargs): 

854 tmin = kwargs['tmin'] 

855 tmax = kwargs['tmax'] 

856 if tinc_cache is not None: 

857 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache 

858 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache 

859 else: 

860 tmin_r = tmin 

861 tmax_r = tmax 

862 

863 kwargs['tmin'] = tmin_r 

864 kwargs['tmax'] = tmax_r 

865 

866 if kwargs.get('debug', None): 

867 return self._get_waveform(obj, **kwargs) 

868 else: 

869 tr = self._get_waveform(obj, **kwargs) 

870 return tr.chop(tmin, tmax, inplace=False) 

871 

872 def get_events(self, magmin=None, event_names=None): 

873 evs = [] 

874 for ev in self.events: 

875 if ((magmin is None or ev.magnitude >= magmin) and 

876 (event_names is None or ev.name in event_names)): 

877 evs.append(ev) 

878 

879 return evs 

880 

881 def get_event_by_time(self, t, magmin=None): 

882 evs = self.get_events(magmin=magmin) 

883 ev_x = None 

884 for ev in evs: 

885 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): 

886 ev_x = ev 

887 

888 if not ev_x: 

889 raise NotFound( 

890 'No event information matching criteria (t=%s, magmin=%s).' % 

891 (t, magmin)) 

892 

893 return ev_x 

894 

895 def get_event(self): 

896 if self._event_name is None: 

897 raise NotFound('No main event selected in dataset!') 

898 

899 for ev in self.events: 

900 if ev.name == self._event_name: 

901 return ev 

902 

903 raise NotFound('No such event: %s' % self._event_name) 

904 

905 def get_picks(self): 

906 if self._picks is None: 

907 hash_to_name = {} 

908 names = set() 

909 for marker in self.pick_markers: 

910 if isinstance(marker, pmarker.EventMarker): 

911 name = marker.get_event().name 

912 if name in names: 

913 raise DatasetError( 

914 'Duplicate event name "%s" in picks.' % name) 

915 

916 names.add(name) 

917 hash_to_name[marker.get_event_hash()] = name 

918 

919 for ev in self.events: 

920 hash_to_name[ev.get_hash()] = ev.name 

921 

922 picks = {} 

923 for marker in self.pick_markers: 

924 if isinstance(marker, pmarker.PhaseMarker): 

925 ehash = marker.get_event_hash() 

926 

927 nsl = marker.one_nslc()[:3] 

928 phasename = marker.get_phasename() 

929 

930 if ehash is None or ehash not in hash_to_name: 

931 raise DatasetError( 

932 'Unassociated pick: %s.%s.%s, %s' % 

933 (nsl + (phasename, ))) 

934 

935 eventname = hash_to_name[ehash] 

936 

937 if (nsl, phasename, eventname) in picks: 

938 raise DatasetError( 

939 'Duplicate pick: %s.%s.%s, %s' % 

940 (nsl + (phasename, ))) 

941 

942 picks[nsl, phasename, eventname] = marker 

943 

944 self._picks = picks 

945 

946 return self._picks 

947 

948 def get_pick(self, eventname, obj, phasename): 

949 nsl = self.get_nsl(obj) 

950 return self.get_picks().get((nsl, phasename, eventname), None) 

951 

952 

953class DatasetConfig(HasPaths): 

954 ''' Configuration for a Grond `Dataset` object. ''' 

955 

956 stations_path = Path.T( 

957 optional=True, 

958 help='List of files with station coordinates in Pyrocko format.') 

959 stations_stationxml_paths = List.T( 

960 Path.T(), 

961 optional=True, 

962 help='List of files with station coordinates in StationXML format.') 

963 events_path = Path.T( 

964 optional=True, 

965 help='File with hypocenter information and possibly' 

966 ' reference solution') 

967 waveform_paths = List.T( 

968 Path.T(), 

969 optional=True, 

970 help='List of directories with raw waveform data') 

971 clippings_path = Path.T( 

972 optional=True) 

973 responses_sacpz_path = Path.T( 

974 optional=True, 

975 help='List of SACPZ response files for restitution of' 

976 ' the raw waveform data.') 

977 responses_stationxml_paths = List.T( 

978 Path.T(), 

979 optional=True, 

980 help='List of StationXML response files for restitution of' 

981 ' the raw waveform data.') 

982 station_corrections_path = Path.T( 

983 optional=True, 

984 help='File containing station correction informations.') 

985 apply_correction_factors = Bool.T( 

986 optional=True, 

987 default=True, 

988 help='Apply correction factors from station corrections.') 

989 apply_correction_delays = Bool.T( 

990 optional=True, 

991 default=True, 

992 help='Apply correction delays from station corrections.') 

993 apply_displaced_sampling_workaround = Bool.T( 

994 optional=True, 

995 default=False, 

996 help='Work around displaced sampling issues.') 

997 extend_incomplete = Bool.T( 

998 default=False, 

999 help='Extend incomplete seismic traces.') 

1000 picks_paths = List.T( 

1001 Path.T()) 

1002 blacklist_paths = List.T( 

1003 Path.T(), 

1004 help='List of text files with blacklisted stations.') 

1005 blacklist = List.T( 

1006 String.T(), 

1007 help='Stations/components to be excluded according to their STA, ' 

1008 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') 

1009 whitelist_paths = List.T( 

1010 Path.T(), 

1011 help='List of text files with whitelisted stations.') 

1012 whitelist = List.T( 

1013 String.T(), 

1014 optional=True, 

1015 help='If not None, list of stations/components to include according ' 

1016 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 

1017 'Note: ''when whitelisting on channel level, both, the raw and ' 

1018 'the processed channel codes have to be listed.') 

1019 synthetic_test = SyntheticTest.T( 

1020 optional=True) 

1021 

1022 kite_scene_paths = List.T( 

1023 Path.T(), 

1024 optional=True, 

1025 help='List of directories for the InSAR scenes.') 

1026 

1027 gnss_campaign_paths = List.T( 

1028 Path.T(), 

1029 optional=True, 

1030 help='List of directories for the GNSS campaign data.') 

1031 

1032 def __init__(self, *args, **kwargs): 

1033 HasPaths.__init__(self, *args, **kwargs) 

1034 self._ds = {} 

1035 

1036 def get_event_names(self): 

1037 logger.info('Loading events ...') 

1038 

1039 def extra(path): 

1040 return expand_template(path, dict( 

1041 event_name='*')) 

1042 

1043 def fp(path): 

1044 return self.expand_path(path, extra=extra) 

1045 

1046 def check_events(events, fn): 

1047 for ev in events: 

1048 if not ev.name: 

1049 logger.warning('Event in %s has no name!', fn) 

1050 return 

1051 if not ev.lat or not ev.lon: 

1052 logger.warning('Event %s has inconsistent coordinates!', 

1053 ev.name) 

1054 if not ev.depth: 

1055 logger.warning('Event %s has no depth!', ev.name) 

1056 if not ev.time: 

1057 logger.warning('Event %s has no time!', ev.name) 

1058 

1059 events = [] 

1060 events_path = fp(self.events_path) 

1061 fns = glob.glob(events_path) 

1062 if not fns: 

1063 raise DatasetError('No event files matching "%s".' % events_path) 

1064 

1065 for fn in fns: 

1066 logger.debug('Loading from file %s' % fn) 

1067 ev = model.load_events(filename=fn) 

1068 check_events(ev, fn) 

1069 

1070 events.extend(ev) 

1071 

1072 event_names = [ev_.name for ev_ in events] 

1073 event_names.sort() 

1074 return event_names 

1075 

1076 def get_dataset(self, event_name): 

1077 if event_name not in self._ds: 

1078 def extra(path): 

1079 return expand_template(path, dict( 

1080 event_name=event_name)) 

1081 

1082 def fp(path): 

1083 p = self.expand_path(path, extra=extra) 

1084 if p is None: 

1085 return None 

1086 

1087 if isinstance(p, list): 

1088 for path in p: 

1089 if not op.exists(path): 

1090 logger.warning('Path %s does not exist.' % path) 

1091 else: 

1092 if not op.exists(p): 

1093 logger.warning('Path %s does not exist.' % p) 

1094 

1095 return p 

1096 

1097 ds = Dataset(event_name) 

1098 try: 

1099 ds.add_events(filename=fp(self.events_path)) 

1100 

1101 ds.add_stations( 

1102 pyrocko_stations_filename=fp(self.stations_path), 

1103 stationxml_filenames=fp(self.stations_stationxml_paths)) 

1104 

1105 if self.waveform_paths: 

1106 ds.add_waveforms(paths=fp(self.waveform_paths)) 

1107 

1108 if self.kite_scene_paths: 

1109 ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) 

1110 

1111 if self.gnss_campaign_paths: 

1112 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) 

1113 

1114 if self.clippings_path: 

1115 ds.add_clippings(markers_filename=fp(self.clippings_path)) 

1116 

1117 if self.responses_sacpz_path: 

1118 ds.add_responses( 

1119 sacpz_dirname=fp(self.responses_sacpz_path)) 

1120 

1121 if self.responses_stationxml_paths: 

1122 ds.add_responses( 

1123 stationxml_filenames=fp( 

1124 self.responses_stationxml_paths)) 

1125 

1126 if self.station_corrections_path: 

1127 ds.add_station_corrections( 

1128 filename=fp(self.station_corrections_path)) 

1129 

1130 ds.apply_correction_factors = self.apply_correction_factors 

1131 ds.apply_correction_delays = self.apply_correction_delays 

1132 ds.apply_displaced_sampling_workaround = \ 

1133 self.apply_displaced_sampling_workaround 

1134 ds.extend_incomplete = self.extend_incomplete 

1135 

1136 for picks_path in self.picks_paths: 

1137 ds.add_picks( 

1138 filename=fp(picks_path)) 

1139 

1140 ds.add_blacklist(self.blacklist) 

1141 ds.add_blacklist(filenames=fp(self.blacklist_paths)) 

1142 if self.whitelist: 

1143 ds.add_whitelist(self.whitelist) 

1144 if self.whitelist_paths: 

1145 ds.add_whitelist(filenames=fp(self.whitelist_paths)) 

1146 

1147 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) 

1148 self._ds[event_name] = ds 

1149 except (FileLoadError, OSError) as e: 

1150 raise DatasetError(str(e)) 

1151 

1152 return self._ds[event_name] 

1153 

1154 

1155__all__ = ''' 

1156 Dataset 

1157 DatasetConfig 

1158 DatasetError 

1159 InvalidObject 

1160 NotFound 

1161 StationCorrection 

1162 load_station_corrections 

1163 dump_station_corrections 

1164'''.split()