Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

677 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-26 16:25 +0000

1import glob 

2import copy 

3import os.path as op 

4import logging 

5import math 

6import numpy as num 

7 

8from collections import defaultdict 

9from pyrocko import util, pile, model, config, trace, \ 

10 marker as pmarker 

11from pyrocko.io.io_common import FileLoadError 

12from pyrocko.fdsn import enhanced_sacpz, station as fs 

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all, 

14 load_all) 

15 

16from pyrocko import gf 

17 

18from .meta import Path, HasPaths, expand_template, GrondError 

19 

20from .synthetic_tests import SyntheticTest 

21 

22guts_prefix = 'grond' 

23logger = logging.getLogger('grond.dataset') 

24 

25 

26def quote_paths(paths): 

27 return ', '.join('"%s"' % path for path in paths) 

28 

29 

30class InvalidObject(Exception): 

31 pass 

32 

33 

34class NotFound(Exception): 

35 def __init__(self, reason, codes=None, time_range=None): 

36 self.reason = reason 

37 self.time_range = time_range 

38 self.codes = codes 

39 

40 def __str__(self): 

41 s = self.reason 

42 if self.codes: 

43 s += ' (%s)' % '.'.join(self.codes) 

44 

45 if self.time_range: 

46 s += ' (%s - %s)' % ( 

47 util.time_to_str(self.time_range[0]), 

48 util.time_to_str(self.time_range[1])) 

49 

50 return s 

51 

52 

53class DatasetError(GrondError): 

54 pass 

55 

56 

57class StationCorrection(Object): 

58 codes = Tuple.T(4, String.T()) 

59 delay = Float.T() 

60 factor = Float.T() 

61 

62 

63def load_station_corrections(filename): 

64 scs = load_all(filename=filename) 

65 for sc in scs: 

66 assert isinstance(sc, StationCorrection) 

67 

68 return scs 

69 

70 

71def dump_station_corrections(station_corrections, filename): 

72 return dump_all(station_corrections, filename=filename) 

73 

74 

75class Dataset(object): 

76 

77 def __init__(self, event_name=None): 

78 self.events = [] 

79 self.pile = pile.Pile() 

80 self.stations = {} 

81 self.responses = defaultdict(list) 

82 self.responses_stationxml = [] 

83 self.clippings = {} 

84 self.blacklist = set() 

85 self.whitelist_nslc = None 

86 self.whitelist_nsl_xx = None 

87 self.whitelist = None 

88 self.station_corrections = {} 

89 self.station_factors = {} 

90 self.pick_markers = [] 

91 self.apply_correction_delays = True 

92 self.apply_correction_factors = True 

93 self.apply_displaced_sampling_workaround = False 

94 self.extend_incomplete = False 

95 self.clip_handling = 'by_nsl' 

96 self.kite_scenes = [] 

97 self.gnss_campaigns = [] 

98 self.synthetic_test = None 

99 self._picks = None 

100 self._cache = {} 

101 self._event_name = event_name 

102 

103 def empty_cache(self): 

104 self._cache = {} 

105 

106 def set_synthetic_test(self, synthetic_test): 

107 self.synthetic_test = synthetic_test 

108 

109 def add_stations( 

110 self, 

111 stations=None, 

112 pyrocko_stations_filename=None, 

113 stationxml_filenames=None): 

114 

115 if stations is not None: 

116 for station in stations: 

117 self.stations[station.nsl()] = station 

118 

119 if pyrocko_stations_filename is not None: 

120 logger.debug( 

121 'Loading stations from file "%s"...' % 

122 pyrocko_stations_filename) 

123 

124 for station in model.load_stations(pyrocko_stations_filename): 

125 self.stations[station.nsl()] = station 

126 

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0: 

128 

129 for stationxml_filename in stationxml_filenames: 

130 if not op.exists(stationxml_filename): 

131 continue 

132 

133 logger.debug( 

134 'Loading stations from StationXML file "%s"...' % 

135 stationxml_filename) 

136 

137 sx = fs.load_xml(filename=stationxml_filename) 

138 ev = self.get_event() 

139 stations = sx.get_pyrocko_stations(time=ev.time) 

140 if len(stations) == 0: 

141 logger.warning( 

142 'No stations found for time %s in file "%s".' % ( 

143 util.time_to_str(ev.time), stationxml_filename)) 

144 

145 for station in stations: 

146 logger.debug('Adding station: %s.%s.%s' % station.nsl()) 

147 channels = station.get_channels() 

148 if len(channels) == 1 and channels[0].name.endswith('Z'): 

149 logger.warning( 

150 'Station "%s" has vertical component' 

151 ' information only, adding mocked channels.' 

152 % station.nsl_string()) 

153 station.add_channel( 

154 model.Channel(channels[0].name[:-1] + 'N')) 

155 station.add_channel( 

156 model.Channel(channels[0].name[:-1] + 'E')) 

157 

158 self.stations[station.nsl()] = station 

159 

160 def add_events(self, events=None, filename=None): 

161 if events is not None: 

162 self.events.extend(events) 

163 

164 if filename is not None: 

165 logger.debug('Loading events from file "%s"...' % filename) 

166 try: 

167 events = model.load_events(filename) 

168 self.events.extend(events) 

169 logger.info( 

170 'Loading events from %s: %i events found.' % 

171 (filename, len(events))) 

172 except Exception as e: 

173 logger.warning('Could not load events from %s!', filename) 

174 raise e 

175 

176 def add_waveforms(self, paths, regex=None, fileformat='detect', 

177 show_progress=False): 

178 cachedirname = config.config().cache_dir 

179 

180 logger.debug('Selecting waveform files %s...' % quote_paths(paths)) 

181 fns = util.select_files(paths, include=regex, 

182 show_progress=show_progress) 

183 cache = pile.get_cache(cachedirname) 

184 logger.debug('Scanning waveform files %s...' % quote_paths(paths)) 

185 self.pile.load_files(sorted(fns), cache=cache, 

186 fileformat=fileformat, 

187 show_progress=show_progress) 

188 

189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): 

190 if sacpz_dirname: 

191 logger.debug( 

192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) 

193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname): 

194 self.responses[x.codes].append(x) 

195 

196 if stationxml_filenames: 

197 for stationxml_filename in stationxml_filenames: 

198 if not op.exists(stationxml_filename): 

199 continue 

200 

201 logger.debug( 

202 'Loading StationXML responses from "%s"...' % 

203 stationxml_filename) 

204 

205 self.responses_stationxml.append( 

206 fs.load_xml(filename=stationxml_filename)) 

207 

208 def add_clippings(self, markers_filename): 

209 markers = pmarker.load_markers(markers_filename) 

210 clippings = {} 

211 for marker in markers: 

212 nslc = marker.one_nslc() 

213 nsl = nslc[:3] 

214 if nsl not in clippings: 

215 clippings[nsl] = [] 

216 

217 if nslc not in clippings: 

218 clippings[nslc] = [] 

219 

220 clippings[nsl].append(marker.tmin) 

221 clippings[nslc].append(marker.tmin) 

222 

223 for k, times in clippings.items(): 

224 atimes = num.array(times, dtype=float) 

225 if k not in self.clippings: 

226 self.clippings[k] = atimes 

227 else: 

228 self.clippings[k] = num.concatenate(self.clippings, atimes) 

229 

230 def add_blacklist(self, blacklist=[], filenames=None): 

231 logger.debug('Loading blacklisted stations...') 

232 if filenames: 

233 blacklist = list(blacklist) 

234 for filename in filenames: 

235 if op.exists(filename): 

236 with open(filename, 'r') as f: 

237 blacklist.extend( 

238 s.strip() for s in f.read().splitlines()) 

239 else: 

240 logger.warning('No such blacklist file: %s' % filename) 

241 

242 for x in blacklist: 

243 if isinstance(x, str): 

244 x = tuple(x.split('.')) 

245 self.blacklist.add(x) 

246 

247 def add_whitelist(self, whitelist=[], filenames=None): 

248 logger.debug('Loading whitelisted stations...') 

249 if filenames: 

250 whitelist = list(whitelist) 

251 for filename in filenames: 

252 with open(filename, 'r') as f: 

253 whitelist.extend(s.strip() for s in f.read().splitlines()) 

254 

255 if self.whitelist_nslc is None: 

256 self.whitelist_nslc = set() 

257 self.whitelist = set() 

258 self.whitelist_nsl_xx = set() 

259 

260 for x in whitelist: 

261 if isinstance(x, str): 

262 x = tuple(x.split('.')) 

263 if len(x) == 4: 

264 self.whitelist_nslc.add(x) 

265 self.whitelist_nsl_xx.add(x[:3]) 

266 else: 

267 self.whitelist.add(x) 

268 

269 def add_station_corrections(self, filename): 

270 self.station_corrections.update( 

271 (sc.codes, sc) for sc in load_station_corrections(filename)) 

272 

273 def add_picks(self, filename): 

274 self.pick_markers.extend( 

275 pmarker.load_markers(filename)) 

276 

277 self._picks = None 

278 

279 def add_gnss_campaigns(self, paths): 

280 paths = util.select_files( 

281 paths, 

282 include=r'\.yml|\.yaml', 

283 show_progress=False) 

284 

285 for path in paths: 

286 self.add_gnss_campaign(filename=path) 

287 

288 def add_gnss_campaign(self, filename): 

289 try: 

290 from pyrocko.model import gnss # noqa 

291 except ImportError: 

292 raise ImportError('Module pyrocko.model.gnss not found,' 

293 ' please upgrade pyrocko!') 

294 logger.debug('Loading GNSS campaign from "%s"...' % filename) 

295 

296 campaign = load_all(filename=filename) 

297 self.gnss_campaigns.append(campaign[0]) 

298 

299 def add_kite_scenes(self, paths): 

300 logger.info('Loading kite InSAR scenes...') 

301 paths = util.select_files( 

302 paths, 

303 include=r'\.npz', 

304 show_progress=False) 

305 

306 for path in paths: 

307 self.add_kite_scene(filename=path) 

308 

309 if not self.kite_scenes: 

310 logger.warning('Could not find any kite scenes at %s.' % 

311 quote_paths(self.kite_scene_paths)) 

312 

313 def add_kite_scene(self, filename): 

314 try: 

315 from kite import Scene 

316 except ImportError: 

317 raise ImportError('Module kite could not be imported,' 

318 ' please install from https://pyrocko.org.') 

319 logger.debug('Loading kite scene from "%s"...' % filename) 

320 

321 scene = Scene.load(filename) 

322 scene._log.setLevel(logger.level) 

323 

324 try: 

325 self.get_kite_scene(scene.meta.scene_id) 

326 except NotFound: 

327 self.kite_scenes.append(scene) 

328 else: 

329 raise AttributeError('Kite scene_id not unique for "%s".' 

330 % filename) 

331 

332 def is_blacklisted(self, obj): 

333 try: 

334 nslc = self.get_nslc(obj) 

335 if nslc in self.blacklist: 

336 return True 

337 

338 except InvalidObject: 

339 pass 

340 

341 nsl = self.get_nsl(obj) 

342 return ( 

343 nsl in self.blacklist or 

344 nsl[1:2] in self.blacklist or 

345 nsl[:2] in self.blacklist) 

346 

347 def is_whitelisted(self, obj): 

348 if self.whitelist_nslc is None: 

349 return True 

350 

351 nsl = self.get_nsl(obj) 

352 

353 if ( 

354 nsl in self.whitelist or 

355 nsl[1:2] in self.whitelist or 

356 nsl[:2] in self.whitelist): 

357 

358 return True 

359 

360 try: 

361 nslc = self.get_nslc(obj) 

362 if nslc in self.whitelist_nslc: 

363 return True 

364 

365 except InvalidObject: 

366 return nsl in self.whitelist_nsl_xx 

367 

368 def has_clipping(self, nsl_or_nslc, tmin, tmax): 

369 if nsl_or_nslc not in self.clippings: 

370 return False 

371 

372 atimes = self.clippings[nsl_or_nslc] 

373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) 

374 

375 def get_nsl(self, obj): 

376 if isinstance(obj, trace.Trace): 

377 net, sta, loc, _ = obj.nslc_id 

378 elif isinstance(obj, model.Station): 

379 net, sta, loc = obj.nsl() 

380 elif isinstance(obj, gf.Target): 

381 net, sta, loc, _ = obj.codes 

382 elif isinstance(obj, tuple) and len(obj) in (3, 4): 

383 net, sta, loc = obj[:3] 

384 else: 

385 raise InvalidObject( 

386 'Cannot get nsl code from given object of type "%s".' 

387 % type(obj)) 

388 

389 return net, sta, loc 

390 

391 def get_nslc(self, obj): 

392 if isinstance(obj, trace.Trace): 

393 return obj.nslc_id 

394 elif isinstance(obj, gf.Target): 

395 return obj.codes 

396 elif isinstance(obj, tuple) and len(obj) == 4: 

397 return obj 

398 else: 

399 raise InvalidObject( 

400 'Cannot get nslc code from given object "%s"' % type(obj)) 

401 

402 def get_tmin_tmax(self, obj): 

403 if isinstance(obj, trace.Trace): 

404 return obj.tmin, obj.tmax 

405 else: 

406 raise InvalidObject( 

407 'Cannot get tmin and tmax from given object of type "%s"' % 

408 type(obj)) 

409 

410 def get_station(self, obj): 

411 if self.is_blacklisted(obj): 

412 raise NotFound('Station is blacklisted:', self.get_nsl(obj)) 

413 

414 if not self.is_whitelisted(obj): 

415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj)) 

416 

417 if isinstance(obj, model.Station): 

418 return obj 

419 

420 net, sta, loc = self.get_nsl(obj) 

421 

422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] 

423 for k in keys: 

424 if k in self.stations: 

425 return self.stations[k] 

426 

427 raise NotFound('No station information:', keys) 

428 

429 def get_stations(self): 

430 return [self.stations[k] for k in sorted(self.stations) 

431 if not self.is_blacklisted(self.stations[k]) 

432 and self.is_whitelisted(self.stations[k])] 

433 

434 def get_kite_scenes(self): 

435 return self.kite_scenes 

436 

437 def get_kite_scene(self, scene_id=None): 

438 if scene_id is None: 

439 if len(self.kite_scenes) == 0: 

440 raise AttributeError('No kite displacements defined.') 

441 return self.kite_scenes[0] 

442 else: 

443 for scene in self.kite_scenes: 

444 if scene.meta.scene_id == scene_id: 

445 return scene 

446 raise NotFound('No kite scene with id "%s" defined.' % scene_id) 

447 

448 def get_gnss_campaigns(self): 

449 return self.gnss_campaigns 

450 

451 def get_gnss_campaign(self, name): 

452 for camp in self.gnss_campaigns: 

453 if camp.name == name: 

454 return camp 

455 raise NotFound('GNSS campaign %s not found!' % name) 

456 

457 def get_response(self, obj, quantity='displacement'): 

458 if (self.responses is None or len(self.responses) == 0) \ 

459 and (self.responses_stationxml is None 

460 or len(self.responses_stationxml) == 0): 

461 

462 raise NotFound('No response information available.') 

463 

464 quantity_to_unit = { 

465 'displacement': 'M', 

466 'velocity': 'M/S', 

467 'acceleration': 'M/S**2'} 

468 

469 if self.is_blacklisted(obj): 

470 raise NotFound('Response is blacklisted:', self.get_nslc(obj)) 

471 

472 if not self.is_whitelisted(obj): 

473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj)) 

474 

475 net, sta, loc, cha = self.get_nslc(obj) 

476 tmin, tmax = self.get_tmin_tmax(obj) 

477 

478 keys_x = [ 

479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] 

480 

481 keys = [] 

482 for k in keys_x: 

483 if k not in keys: 

484 keys.append(k) 

485 

486 candidates = [] 

487 for k in keys: 

488 if k in self.responses: 

489 for x in self.responses[k]: 

490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): 

491 if quantity == 'displacement': 

492 candidates.append(x.response) 

493 elif quantity == 'velocity': 

494 candidates.append(trace.MultiplyResponse([ 

495 x.response, 

496 trace.DifferentiationResponse()])) 

497 elif quantity == 'acceleration': 

498 candidates.append(trace.MultiplyResponse([ 

499 x.response, 

500 trace.DifferentiationResponse(2)])) 

501 else: 

502 assert False 

503 

504 for sx in self.responses_stationxml: 

505 try: 

506 candidates.append( 

507 sx.get_pyrocko_response( 

508 (net, sta, loc, cha), 

509 timespan=(tmin, tmax), 

510 fake_input_units=quantity_to_unit[quantity])) 

511 

512 except (fs.NoResponseInformation, fs.MultipleResponseInformation): 

513 pass 

514 

515 if len(candidates) == 1: 

516 return candidates[0] 

517 

518 elif len(candidates) == 0: 

519 raise NotFound('No response found:', (net, sta, loc, cha)) 

520 else: 

521 raise NotFound('Multiple responses found:', (net, sta, loc, cha)) 

522 

523 def get_waveform_raw( 

524 self, obj, 

525 tmin, 

526 tmax, 

527 tpad=0., 

528 toffset_noise_extract=0., 

529 want_incomplete=False, 

530 extend_incomplete=False): 

531 

532 net, sta, loc, cha = self.get_nslc(obj) 

533 

534 if self.is_blacklisted((net, sta, loc, cha)): 

535 raise NotFound( 

536 'Waveform is blacklisted:', (net, sta, loc, cha)) 

537 

538 if not self.is_whitelisted((net, sta, loc, cha)): 

539 raise NotFound( 

540 'Waveform is not on whitelist:', (net, sta, loc, cha)) 

541 

542 if self.clip_handling == 'by_nsl': 

543 if self.has_clipping((net, sta, loc), tmin, tmax): 

544 raise NotFound( 

545 'Waveform clipped:', (net, sta, loc)) 

546 

547 elif self.clip_handling == 'by_nslc': 

548 if self.has_clipping((net, sta, loc, cha), tmin, tmax): 

549 raise NotFound( 

550 'Waveform clipped:', (net, sta, loc, cha)) 

551 

552 trs = self.pile.all( 

553 tmin=tmin+toffset_noise_extract, 

554 tmax=tmax+toffset_noise_extract, 

555 tpad=tpad, 

556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha), 

557 want_incomplete=want_incomplete or extend_incomplete) 

558 

559 if self.apply_displaced_sampling_workaround: 

560 for tr in trs: 

561 tr.snap() 

562 

563 if toffset_noise_extract != 0.0: 

564 for tr in trs: 

565 tr.shift(-toffset_noise_extract) 

566 

567 if extend_incomplete and len(trs) == 1: 

568 trs[0].extend( 

569 tmin + toffset_noise_extract - tpad, 

570 tmax + toffset_noise_extract + tpad, 

571 fillmethod='repeat') 

572 

573 if not want_incomplete and len(trs) != 1: 

574 if len(trs) == 0: 

575 message = 'Waveform missing or incomplete.' 

576 else: 

577 message = 'Waveform has gaps.' 

578 

579 raise NotFound( 

580 message, 

581 codes=(net, sta, loc, cha), 

582 time_range=( 

583 tmin + toffset_noise_extract - tpad, 

584 tmax + toffset_noise_extract + tpad)) 

585 

586 return trs 

587 

588 def get_waveform_restituted( 

589 self, 

590 obj, quantity='displacement', 

591 tmin=None, tmax=None, tpad=0., 

592 tfade=0., freqlimits=None, deltat=None, 

593 toffset_noise_extract=0., 

594 want_incomplete=False, 

595 extend_incomplete=False): 

596 

597 if deltat is not None: 

598 tpad_downsample = deltat * 50. 

599 else: 

600 tpad_downsample = 0. 

601 

602 trs_raw = self.get_waveform_raw( 

603 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample, 

604 toffset_noise_extract=toffset_noise_extract, 

605 want_incomplete=want_incomplete, 

606 extend_incomplete=extend_incomplete) 

607 

608 trs_restituted = [] 

609 for tr in trs_raw: 

610 if deltat is not None: 

611 tr.downsample_to(deltat, snap=True, allow_upsample_max=5) 

612 tr.deltat = deltat 

613 

614 resp = self.get_response(tr, quantity=quantity) 

615 try: 

616 trs_restituted.append( 

617 tr.transfer( 

618 tfade=tfade, freqlimits=freqlimits, 

619 transfer_function=resp, invert=True, demean=True)) 

620 

621 except trace.InfiniteResponse: 

622 raise NotFound( 

623 'Instrument response deconvolution failed ' 

624 '(divide by zero)', tr.nslc_id) 

625 

626 return trs_restituted, trs_raw 

627 

628 def _get_projections( 

629 self, station, backazimuth, source, target, tmin, tmax): 

630 

631 # fill in missing channel information (happens when station file 

632 # does not contain any channel information) 

633 if not station.get_channels(): 

634 station = copy.deepcopy(station) 

635 

636 nsl = station.nsl() 

637 trs = self.pile.all( 

638 tmin=tmin, tmax=tmax, 

639 trace_selector=lambda tr: tr.nslc_id[:3] == nsl, 

640 load_data=False) 

641 

642 channels = list(set(tr.channel for tr in trs)) 

643 station.set_channels_by_name(*channels) 

644 

645 projections = [] 

646 projections.extend(station.guess_projections_to_enu( 

647 out_channels=('E', 'N', 'Z'))) 

648 

649 if source is not None and target is not None: 

650 backazimuth = source.azibazi_to(target)[1] 

651 

652 if backazimuth is not None: 

653 projections.extend(station.guess_projections_to_rtu( 

654 out_channels=('R', 'T', 'Z'), 

655 backazimuth=backazimuth)) 

656 

657 if not projections: 

658 raise NotFound( 

659 'Cannot determine projection of data components:', 

660 station.nsl()) 

661 

662 return projections 

663 

664 def _get_waveform( 

665 self, 

666 obj, quantity='displacement', 

667 tmin=None, tmax=None, tpad=0., 

668 tfade=0., freqlimits=None, deltat=None, cache=None, 

669 backazimuth=None, 

670 source=None, 

671 target=None, 

672 debug=False): 

673 

674 assert not debug or (debug and cache is None) 

675 

676 if cache is True: 

677 cache = self._cache 

678 

679 _, _, _, channel = self.get_nslc(obj) 

680 station = self.get_station(self.get_nsl(obj)) 

681 

682 nslc = station.nsl() + (channel,) 

683 

684 if self.is_blacklisted(nslc): 

685 raise NotFound( 

686 'Waveform is blacklisted:', nslc) 

687 

688 if not self.is_whitelisted(nslc): 

689 raise NotFound( 

690 'Waveform is not on whitelist:', nslc) 

691 

692 assert tmin is not None 

693 assert tmax is not None 

694 

695 tmin = float(tmin) 

696 tmax = float(tmax) 

697 

698 nslc = tuple(nslc) 

699 

700 cache_k = nslc + ( 

701 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) 

702 if cache is not None and (nslc + cache_k) in cache: 

703 obj = cache[nslc + cache_k] 

704 if isinstance(obj, Exception): 

705 raise obj 

706 elif obj is None: 

707 raise NotFound('Waveform not found!', nslc) 

708 else: 

709 return obj 

710 

711 syn_test = self.synthetic_test 

712 toffset_noise_extract = 0.0 

713 if syn_test: 

714 if not syn_test.respect_data_availability: 

715 if syn_test.real_noise_scale != 0.0: 

716 raise DatasetError( 

717 'respect_data_availability=False and ' 

718 'addition of real noise cannot be combined.') 

719 

720 tr = syn_test.get_waveform( 

721 nslc, tmin, tmax, 

722 tfade=tfade, 

723 freqlimits=freqlimits) 

724 

725 if cache is not None: 

726 cache[tr.nslc_id + cache_k] = tr 

727 

728 if debug: 

729 return [], [], [] 

730 else: 

731 return tr 

732 

733 if syn_test.real_noise_scale != 0.0: 

734 toffset_noise_extract = syn_test.real_noise_toffset 

735 

736 abs_delays = [] 

737 for ocha in 'ENZRT': 

738 sc = self.station_corrections.get(station.nsl() + (channel,), None) 

739 if sc: 

740 abs_delays.append(abs(sc.delay)) 

741 

742 if abs_delays: 

743 abs_delay_max = max(abs_delays) 

744 else: 

745 abs_delay_max = 0.0 

746 

747 projections = self._get_projections( 

748 station, backazimuth, source, target, tmin, tmax) 

749 

750 try: 

751 trs_projected = [] 

752 trs_restituted = [] 

753 trs_raw = [] 

754 exceptions = [] 

755 for matrix, in_channels, out_channels in projections: 

756 deps = trace.project_dependencies( 

757 matrix, in_channels, out_channels) 

758 

759 try: 

760 trs_restituted_group = [] 

761 trs_raw_group = [] 

762 if channel in deps: 

763 for cha in deps[channel]: 

764 trs_restituted_this, trs_raw_this = \ 

765 self.get_waveform_restituted( 

766 station.nsl() + (cha,), 

767 quantity=quantity, 

768 tmin=tmin, tmax=tmax, 

769 tpad=tpad+abs_delay_max, 

770 toffset_noise_extract=toffset_noise_extract, # noqa 

771 tfade=tfade, 

772 freqlimits=freqlimits, 

773 deltat=deltat, 

774 want_incomplete=debug, 

775 extend_incomplete=self.extend_incomplete) 

776 

777 trs_restituted_group.extend(trs_restituted_this) 

778 trs_raw_group.extend(trs_raw_this) 

779 

780 trs_projected.extend( 

781 trace.project( 

782 trs_restituted_group, matrix, 

783 in_channels, out_channels)) 

784 

785 trs_restituted.extend(trs_restituted_group) 

786 trs_raw.extend(trs_raw_group) 

787 

788 except NotFound as e: 

789 exceptions.append((in_channels, out_channels, e)) 

790 

791 if not trs_projected: 

792 err = [] 

793 for (in_channels, out_channels, e) in exceptions: 

794 sin = ', '.join(c.name for c in in_channels) 

795 sout = ', '.join(c.name for c in out_channels) 

796 err.append('(%s) -> (%s): %s' % (sin, sout, e)) 

797 

798 raise NotFound('\n'.join(err)) 

799 

800 for tr in trs_projected: 

801 sc = self.station_corrections.get(tr.nslc_id, None) 

802 if sc: 

803 if self.apply_correction_factors: 

804 tr.ydata /= sc.factor 

805 

806 if self.apply_correction_delays: 

807 tr.shift(-sc.delay) 

808 

809 if tmin is not None and tmax is not None: 

810 tr.chop(tmin, tmax) 

811 

812 if syn_test: 

813 trs_projected_synthetic = [] 

814 for tr in trs_projected: 

815 if tr.channel == channel: 

816 tr_syn = syn_test.get_waveform( 

817 tr.nslc_id, tmin, tmax, 

818 tfade=tfade, freqlimits=freqlimits) 

819 

820 if tr_syn: 

821 if syn_test.real_noise_scale != 0.0: 

822 tr_syn = tr_syn.copy() 

823 tr_noise = tr.copy() 

824 tr_noise.set_ydata( 

825 tr_noise.get_ydata() 

826 * syn_test.real_noise_scale) 

827 

828 tr_syn.add(tr_noise) 

829 

830 trs_projected_synthetic.append(tr_syn) 

831 

832 trs_projected = trs_projected_synthetic 

833 

834 if cache is not None: 

835 for tr in trs_projected: 

836 cache[tr.nslc_id + cache_k] = tr 

837 

838 tr_return = None 

839 for tr in trs_projected: 

840 if tr.channel == channel: 

841 tr_return = tr 

842 

843 if debug: 

844 return trs_projected, trs_restituted, trs_raw, tr_return 

845 

846 elif tr_return: 

847 return tr_return 

848 

849 else: 

850 raise NotFound( 

851 'waveform not available', station.nsl() + (channel,)) 

852 

853 except NotFound: 

854 if cache is not None: 

855 cache[nslc + cache_k] = None 

856 raise 

857 

858 def get_waveform(self, obj, tinc_cache=None, **kwargs): 

859 tmin = kwargs['tmin'] 

860 tmax = kwargs['tmax'] 

861 if tinc_cache is not None: 

862 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache 

863 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache 

864 else: 

865 tmin_r = tmin 

866 tmax_r = tmax 

867 

868 kwargs['tmin'] = tmin_r 

869 kwargs['tmax'] = tmax_r 

870 

871 if kwargs.get('debug', None): 

872 return self._get_waveform(obj, **kwargs) 

873 else: 

874 tr = self._get_waveform(obj, **kwargs) 

875 return tr.chop(tmin, tmax, inplace=False) 

876 

877 def get_events(self, magmin=None, event_names=None): 

878 evs = [] 

879 for ev in self.events: 

880 if ((magmin is None or ev.magnitude >= magmin) and 

881 (event_names is None or ev.name in event_names)): 

882 evs.append(ev) 

883 

884 return evs 

885 

886 def get_event_by_time(self, t, magmin=None): 

887 evs = self.get_events(magmin=magmin) 

888 ev_x = None 

889 for ev in evs: 

890 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): 

891 ev_x = ev 

892 

893 if not ev_x: 

894 raise NotFound( 

895 'No event information matching criteria (t=%s, magmin=%s).' % 

896 (t, magmin)) 

897 

898 return ev_x 

899 

900 def get_event(self): 

901 if self._event_name is None: 

902 raise NotFound('No main event selected in dataset!') 

903 

904 for ev in self.events: 

905 if ev.name == self._event_name: 

906 return ev 

907 

908 raise NotFound('No such event: %s' % self._event_name) 

909 

910 def get_picks(self): 

911 if self._picks is None: 

912 hash_to_name = {} 

913 names = set() 

914 for marker in self.pick_markers: 

915 if isinstance(marker, pmarker.EventMarker): 

916 name = marker.get_event().name 

917 if name in names: 

918 raise DatasetError( 

919 'Duplicate event name "%s" in picks.' % name) 

920 

921 names.add(name) 

922 hash_to_name[marker.get_event_hash()] = name 

923 

924 for ev in self.events: 

925 hash_to_name[ev.get_hash()] = ev.name 

926 

927 picks = {} 

928 for marker in self.pick_markers: 

929 if isinstance(marker, pmarker.PhaseMarker): 

930 ehash = marker.get_event_hash() 

931 

932 nsl = marker.one_nslc()[:3] 

933 phasename = marker.get_phasename() 

934 

935 if ehash is None or ehash not in hash_to_name: 

936 raise DatasetError( 

937 'Unassociated pick: %s.%s.%s, %s' % 

938 (nsl + (phasename, ))) 

939 

940 eventname = hash_to_name[ehash] 

941 

942 if (nsl, phasename, eventname) in picks: 

943 raise DatasetError( 

944 'Duplicate pick: %s.%s.%s, %s' % 

945 (nsl + (phasename, ))) 

946 

947 picks[nsl, phasename, eventname] = marker 

948 

949 self._picks = picks 

950 

951 return self._picks 

952 

953 def get_pick(self, eventname, obj, phasename): 

954 nsl = self.get_nsl(obj) 

955 return self.get_picks().get((nsl, phasename, eventname), None) 

956 

957 

958class DatasetConfig(HasPaths): 

959 ''' Configuration for a Grond `Dataset` object. ''' 

960 

961 stations_path = Path.T( 

962 optional=True, 

963 help='List of files with station coordinates in Pyrocko format.') 

964 stations_stationxml_paths = List.T( 

965 Path.T(), 

966 optional=True, 

967 help='List of files with station coordinates in StationXML format.') 

968 events_path = Path.T( 

969 optional=True, 

970 help='File with hypocenter information and possibly' 

971 ' reference solution') 

972 waveform_paths = List.T( 

973 Path.T(), 

974 optional=True, 

975 help='List of directories with raw waveform data') 

976 clippings_path = Path.T( 

977 optional=True) 

978 responses_sacpz_path = Path.T( 

979 optional=True, 

980 help='List of SACPZ response files for restitution of' 

981 ' the raw waveform data.') 

982 responses_stationxml_paths = List.T( 

983 Path.T(), 

984 optional=True, 

985 help='List of StationXML response files for restitution of' 

986 ' the raw waveform data.') 

987 station_corrections_path = Path.T( 

988 optional=True, 

989 help='File containing station correction informations.') 

990 apply_correction_factors = Bool.T( 

991 optional=True, 

992 default=True, 

993 help='Apply correction factors from station corrections.') 

994 apply_correction_delays = Bool.T( 

995 optional=True, 

996 default=True, 

997 help='Apply correction delays from station corrections.') 

998 apply_displaced_sampling_workaround = Bool.T( 

999 optional=True, 

1000 default=False, 

1001 help='Work around displaced sampling issues.') 

1002 extend_incomplete = Bool.T( 

1003 default=False, 

1004 help='Extend incomplete seismic traces.') 

1005 picks_paths = List.T( 

1006 Path.T()) 

1007 blacklist_paths = List.T( 

1008 Path.T(), 

1009 help='List of text files with blacklisted stations.') 

1010 blacklist = List.T( 

1011 String.T(), 

1012 help='Stations/components to be excluded according to their STA, ' 

1013 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') 

1014 whitelist_paths = List.T( 

1015 Path.T(), 

1016 help='List of text files with whitelisted stations.') 

1017 whitelist = List.T( 

1018 String.T(), 

1019 optional=True, 

1020 help='If not None, list of stations/components to include according ' 

1021 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 

1022 'Note: ''when whitelisting on channel level, both, the raw and ' 

1023 'the processed channel codes have to be listed.') 

1024 synthetic_test = SyntheticTest.T( 

1025 optional=True) 

1026 

1027 kite_scene_paths = List.T( 

1028 Path.T(), 

1029 optional=True, 

1030 help='List of directories for the InSAR scenes.') 

1031 

1032 gnss_campaign_paths = List.T( 

1033 Path.T(), 

1034 optional=True, 

1035 help='List of directories for the GNSS campaign data.') 

1036 

1037 def __init__(self, *args, **kwargs): 

1038 HasPaths.__init__(self, *args, **kwargs) 

1039 self._ds = {} 

1040 

1041 def get_event_names(self): 

1042 logger.info('Loading events ...') 

1043 

1044 def extra(path): 

1045 return expand_template(path, dict( 

1046 event_name='*')) 

1047 

1048 def fp(path): 

1049 return self.expand_path(path, extra=extra) 

1050 

1051 def check_events(events, fn): 

1052 for ev in events: 

1053 if not ev.name: 

1054 logger.warning('Event in %s has no name!', fn) 

1055 return 

1056 if not ev.lat or not ev.lon: 

1057 logger.warning('Event %s has inconsistent coordinates!', 

1058 ev.name) 

1059 if not ev.depth: 

1060 logger.warning('Event %s has no depth!', ev.name) 

1061 if not ev.time: 

1062 logger.warning('Event %s has no time!', ev.name) 

1063 

1064 events = [] 

1065 events_path = fp(self.events_path) 

1066 fns = glob.glob(events_path) 

1067 if not fns: 

1068 raise DatasetError('No event files matching "%s".' % events_path) 

1069 

1070 for fn in fns: 

1071 logger.debug('Loading from file %s' % fn) 

1072 ev = model.load_events(filename=fn) 

1073 check_events(ev, fn) 

1074 

1075 events.extend(ev) 

1076 

1077 event_names = [ev_.name for ev_ in events] 

1078 event_names.sort() 

1079 return event_names 

1080 

1081 def get_dataset(self, event_name): 

1082 if event_name not in self._ds: 

1083 def extra(path): 

1084 return expand_template(path, dict( 

1085 event_name=event_name)) 

1086 

1087 def fp(path): 

1088 p = self.expand_path(path, extra=extra) 

1089 if p is None: 

1090 return None 

1091 

1092 if isinstance(p, list): 

1093 for path in p: 

1094 if not op.exists(path): 

1095 logger.warning('Path %s does not exist.' % path) 

1096 else: 

1097 if not op.exists(p): 

1098 logger.warning('Path %s does not exist.' % p) 

1099 

1100 return p 

1101 

1102 ds = Dataset(event_name) 

1103 try: 

1104 ds.add_events(filename=fp(self.events_path)) 

1105 

1106 ds.add_stations( 

1107 pyrocko_stations_filename=fp(self.stations_path), 

1108 stationxml_filenames=fp(self.stations_stationxml_paths)) 

1109 

1110 if self.waveform_paths: 

1111 ds.add_waveforms(paths=fp(self.waveform_paths)) 

1112 

1113 if self.kite_scene_paths: 

1114 ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) 

1115 

1116 if self.gnss_campaign_paths: 

1117 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) 

1118 

1119 if self.clippings_path: 

1120 ds.add_clippings(markers_filename=fp(self.clippings_path)) 

1121 

1122 if self.responses_sacpz_path: 

1123 ds.add_responses( 

1124 sacpz_dirname=fp(self.responses_sacpz_path)) 

1125 

1126 if self.responses_stationxml_paths: 

1127 ds.add_responses( 

1128 stationxml_filenames=fp( 

1129 self.responses_stationxml_paths)) 

1130 

1131 if self.station_corrections_path: 

1132 ds.add_station_corrections( 

1133 filename=fp(self.station_corrections_path)) 

1134 

1135 ds.apply_correction_factors = self.apply_correction_factors 

1136 ds.apply_correction_delays = self.apply_correction_delays 

1137 ds.apply_displaced_sampling_workaround = \ 

1138 self.apply_displaced_sampling_workaround 

1139 ds.extend_incomplete = self.extend_incomplete 

1140 

1141 for picks_path in self.picks_paths: 

1142 ds.add_picks( 

1143 filename=fp(picks_path)) 

1144 

1145 ds.add_blacklist(self.blacklist) 

1146 ds.add_blacklist(filenames=fp(self.blacklist_paths)) 

1147 if self.whitelist: 

1148 ds.add_whitelist(self.whitelist) 

1149 if self.whitelist_paths: 

1150 ds.add_whitelist(filenames=fp(self.whitelist_paths)) 

1151 

1152 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) 

1153 self._ds[event_name] = ds 

1154 except (FileLoadError, OSError) as e: 

1155 raise DatasetError(str(e)) 

1156 

1157 return self._ds[event_name] 

1158 

1159 

1160__all__ = ''' 

1161 Dataset 

1162 DatasetConfig 

1163 DatasetError 

1164 InvalidObject 

1165 NotFound 

1166 StationCorrection 

1167 load_station_corrections 

1168 dump_station_corrections 

1169'''.split()