Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

681 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-11-01 12:39 +0000

1import glob 

2import copy 

3import os.path as op 

4import logging 

5import math 

6import numpy as num 

7 

8from collections import defaultdict 

9from pyrocko import util, pile, model, config, trace, \ 

10 marker as pmarker 

11from pyrocko.io.io_common import FileLoadError 

12from pyrocko.fdsn import enhanced_sacpz, station as fs 

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all, 

14 load_all) 

15 

16from pyrocko import gf 

17 

18from .meta import Path, HasPaths, expand_template, GrondError 

19 

20from .synthetic_tests import SyntheticTest 

21 

22guts_prefix = 'grond' 

23logger = logging.getLogger('grond.dataset') 

24 

25 

26def quote_paths(paths): 

27 return ', '.join('"%s"' % path for path in paths) 

28 

29 

30class InvalidObject(Exception): 

31 pass 

32 

33 

34class NotFound(Exception): 

35 def __init__(self, reason, codes=None, time_range=None): 

36 self.reason = reason 

37 self.time_range = time_range 

38 self.codes = codes 

39 

40 def __str__(self): 

41 s = self.reason 

42 if self.codes: 

43 s += ' (%s)' % '.'.join(self.codes) 

44 

45 if self.time_range: 

46 s += ' (%s - %s)' % ( 

47 util.time_to_str(self.time_range[0]), 

48 util.time_to_str(self.time_range[1])) 

49 

50 return s 

51 

52 

53class DatasetError(GrondError): 

54 pass 

55 

56 

57class StationCorrection(Object): 

58 codes = Tuple.T(4, String.T()) 

59 delay = Float.T() 

60 factor = Float.T() 

61 

62 

63def load_station_corrections(filename): 

64 scs = load_all(filename=filename) 

65 for sc in scs: 

66 assert isinstance(sc, StationCorrection) 

67 

68 return scs 

69 

70 

71def dump_station_corrections(station_corrections, filename): 

72 return dump_all(station_corrections, filename=filename) 

73 

74 

75class Dataset(object): 

76 

77 def __init__(self, event_name=None): 

78 self.events = [] 

79 self.pile = pile.Pile() 

80 self.stations = {} 

81 self.responses = defaultdict(list) 

82 self.responses_stationxml = [] 

83 self.clippings = {} 

84 self.blacklist = set() 

85 self.whitelist_nslc = None 

86 self.whitelist_nsl_xx = None 

87 self.whitelist = None 

88 self.station_corrections = {} 

89 self.station_factors = {} 

90 self.pick_markers = [] 

91 self.apply_correction_delays = True 

92 self.apply_correction_factors = True 

93 self.apply_displaced_sampling_workaround = False 

94 self.extend_incomplete = False 

95 self.clip_handling = 'by_nsl' 

96 self.kite_scenes = [] 

97 self.gnss_campaigns = [] 

98 self.synthetic_test = None 

99 self._picks = None 

100 self._cache = {} 

101 self._event_name = event_name 

102 

103 def empty_cache(self): 

104 self._cache = {} 

105 

106 def set_synthetic_test(self, synthetic_test): 

107 self.synthetic_test = synthetic_test 

108 

109 def add_stations( 

110 self, 

111 stations=None, 

112 pyrocko_stations_filename=None, 

113 stationxml_filenames=None): 

114 

115 if stations is not None: 

116 for station in stations: 

117 self.stations[station.nsl()] = station 

118 

119 if pyrocko_stations_filename is not None: 

120 logger.debug( 

121 'Loading stations from file "%s"...' % 

122 pyrocko_stations_filename) 

123 

124 for station in model.load_stations(pyrocko_stations_filename): 

125 self.stations[station.nsl()] = station 

126 

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0: 

128 

129 for stationxml_filename in stationxml_filenames: 

130 if not op.exists(stationxml_filename): 

131 continue 

132 

133 logger.debug( 

134 'Loading stations from StationXML file "%s"...' % 

135 stationxml_filename) 

136 

137 sx = fs.load_xml(filename=stationxml_filename) 

138 ev = self.get_event() 

139 try: 

140 stations = sx.get_pyrocko_stations( 

141 time=ev.time, sloppy=True) 

142 except TypeError: 

143 logger.warning( 

144 'The installed version of Pyrocko does not support ' 

145 'the keyword argument `sloppy` in method ' 

146 '`FDSNStationXML.get_pyrocko_stations`. Please ' 

147 'upgrade Pyrocko.') 

148 

149 stations = sx.get_pyrocko_stations(time=ev.time) 

150 

151 if len(stations) == 0: 

152 logger.warning( 

153 'No stations found for time %s in file "%s".' % ( 

154 util.time_to_str(ev.time), stationxml_filename)) 

155 

156 for station in stations: 

157 logger.debug('Adding station: %s.%s.%s' % station.nsl()) 

158 channels = station.get_channels() 

159 if len(channels) == 1 and channels[0].name.endswith('Z'): 

160 logger.warning( 

161 'Station "%s" has vertical component' 

162 ' information only, adding mocked channels.' 

163 % station.nsl_string()) 

164 station.add_channel( 

165 model.Channel(channels[0].name[:-1] + 'N')) 

166 station.add_channel( 

167 model.Channel(channels[0].name[:-1] + 'E')) 

168 

169 self.stations[station.nsl()] = station 

170 

171 def add_events(self, events=None, filename=None): 

172 if events is not None: 

173 self.events.extend(events) 

174 

175 if filename is not None: 

176 logger.debug('Loading events from file "%s"...' % filename) 

177 try: 

178 events = model.load_events(filename) 

179 self.events.extend(events) 

180 logger.info( 

181 'Loading events from %s: %i events found.' % 

182 (filename, len(events))) 

183 except Exception as e: 

184 logger.warning('Could not load events from %s!', filename) 

185 raise e 

186 

187 def add_waveforms(self, paths, regex=None, fileformat='detect', 

188 show_progress=False): 

189 cachedirname = config.config().cache_dir 

190 

191 logger.debug('Selecting waveform files %s...' % quote_paths(paths)) 

192 fns = util.select_files(paths, include=regex, 

193 show_progress=show_progress) 

194 cache = pile.get_cache(cachedirname) 

195 logger.debug('Scanning waveform files %s...' % quote_paths(paths)) 

196 self.pile.load_files(sorted(fns), cache=cache, 

197 fileformat=fileformat, 

198 show_progress=show_progress) 

199 

200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): 

201 if sacpz_dirname: 

202 logger.debug( 

203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) 

204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname): 

205 self.responses[x.codes].append(x) 

206 

207 if stationxml_filenames: 

208 for stationxml_filename in stationxml_filenames: 

209 if not op.exists(stationxml_filename): 

210 continue 

211 

212 logger.debug( 

213 'Loading StationXML responses from "%s"...' % 

214 stationxml_filename) 

215 

216 self.responses_stationxml.append( 

217 fs.load_xml(filename=stationxml_filename)) 

218 

219 def add_clippings(self, markers_filename): 

220 markers = pmarker.load_markers(markers_filename) 

221 clippings = {} 

222 for marker in markers: 

223 nslc = marker.one_nslc() 

224 nsl = nslc[:3] 

225 if nsl not in clippings: 

226 clippings[nsl] = [] 

227 

228 if nslc not in clippings: 

229 clippings[nslc] = [] 

230 

231 clippings[nsl].append(marker.tmin) 

232 clippings[nslc].append(marker.tmin) 

233 

234 for k, times in clippings.items(): 

235 atimes = num.array(times, dtype=float) 

236 if k not in self.clippings: 

237 self.clippings[k] = atimes 

238 else: 

239 self.clippings[k] = num.concatenate(self.clippings, atimes) 

240 

241 def add_blacklist(self, blacklist=[], filenames=None): 

242 logger.debug('Loading blacklisted stations...') 

243 if filenames: 

244 blacklist = list(blacklist) 

245 for filename in filenames: 

246 if op.exists(filename): 

247 with open(filename, 'r') as f: 

248 blacklist.extend( 

249 s.strip() for s in f.read().splitlines()) 

250 else: 

251 logger.warning('No such blacklist file: %s' % filename) 

252 

253 for x in blacklist: 

254 if isinstance(x, str): 

255 x = tuple(x.split('.')) 

256 self.blacklist.add(x) 

257 

258 def add_whitelist(self, whitelist=[], filenames=None): 

259 logger.debug('Loading whitelisted stations...') 

260 if filenames: 

261 whitelist = list(whitelist) 

262 for filename in filenames: 

263 with open(filename, 'r') as f: 

264 whitelist.extend(s.strip() for s in f.read().splitlines()) 

265 

266 if self.whitelist_nslc is None: 

267 self.whitelist_nslc = set() 

268 self.whitelist = set() 

269 self.whitelist_nsl_xx = set() 

270 

271 for x in whitelist: 

272 if isinstance(x, str): 

273 x = tuple(x.split('.')) 

274 if len(x) == 4: 

275 self.whitelist_nslc.add(x) 

276 self.whitelist_nsl_xx.add(x[:3]) 

277 else: 

278 self.whitelist.add(x) 

279 

280 def add_station_corrections(self, filename): 

281 self.station_corrections.update( 

282 (sc.codes, sc) for sc in load_station_corrections(filename)) 

283 

284 def add_picks(self, filename): 

285 self.pick_markers.extend( 

286 pmarker.load_markers(filename)) 

287 

288 self._picks = None 

289 

290 def add_gnss_campaigns(self, paths): 

291 paths = util.select_files( 

292 paths, 

293 include=r'\.yml|\.yaml', 

294 show_progress=False) 

295 

296 for path in paths: 

297 self.add_gnss_campaign(filename=path) 

298 

299 def add_gnss_campaign(self, filename): 

300 try: 

301 from pyrocko.model import gnss # noqa 

302 except ImportError: 

303 raise ImportError( 

304 'Module pyrocko.model.gnss not found. Please upgrade Pyrocko.') 

305 

306 logger.debug('Loading GNSS campaign from "%s"...' % filename) 

307 

308 campaign = load_all(filename=filename) 

309 self.gnss_campaigns.append(campaign[0]) 

310 

311 def add_kite_scenes(self, paths): 

312 logger.info('Loading kite InSAR scenes...') 

313 paths = util.select_files( 

314 paths, 

315 include=r'\.npz', 

316 show_progress=False) 

317 

318 for path in paths: 

319 self.add_kite_scene(filename=path) 

320 

321 if not self.kite_scenes: 

322 logger.warning('Could not find any kite scenes at %s.' % 

323 quote_paths(self.kite_scene_paths)) 

324 

325 def add_kite_scene(self, filename): 

326 try: 

327 from kite import Scene 

328 except ImportError: 

329 raise ImportError('Module kite could not be imported,' 

330 ' please install from https://pyrocko.org.') 

331 logger.debug('Loading kite scene from "%s"...' % filename) 

332 

333 scene = Scene.load(filename) 

334 scene._log.setLevel(logger.level) 

335 

336 try: 

337 self.get_kite_scene(scene.meta.scene_id) 

338 except NotFound: 

339 self.kite_scenes.append(scene) 

340 else: 

341 raise AttributeError('Kite scene_id not unique for "%s".' 

342 % filename) 

343 

344 def is_blacklisted(self, obj): 

345 try: 

346 nslc = self.get_nslc(obj) 

347 if nslc in self.blacklist: 

348 return True 

349 

350 except InvalidObject: 

351 pass 

352 

353 nsl = self.get_nsl(obj) 

354 return ( 

355 nsl in self.blacklist or 

356 nsl[1:2] in self.blacklist or 

357 nsl[:2] in self.blacklist) 

358 

359 def is_whitelisted(self, obj): 

360 if self.whitelist_nslc is None: 

361 return True 

362 

363 nsl = self.get_nsl(obj) 

364 

365 if ( 

366 nsl in self.whitelist or 

367 nsl[1:2] in self.whitelist or 

368 nsl[:2] in self.whitelist): 

369 

370 return True 

371 

372 try: 

373 nslc = self.get_nslc(obj) 

374 if nslc in self.whitelist_nslc: 

375 return True 

376 

377 except InvalidObject: 

378 return nsl in self.whitelist_nsl_xx 

379 

380 def has_clipping(self, nsl_or_nslc, tmin, tmax): 

381 if nsl_or_nslc not in self.clippings: 

382 return False 

383 

384 atimes = self.clippings[nsl_or_nslc] 

385 return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) 

386 

387 def get_nsl(self, obj): 

388 if isinstance(obj, trace.Trace): 

389 net, sta, loc, _ = obj.nslc_id 

390 elif isinstance(obj, model.Station): 

391 net, sta, loc = obj.nsl() 

392 elif isinstance(obj, gf.Target): 

393 net, sta, loc, _ = obj.codes 

394 elif isinstance(obj, tuple) and len(obj) in (3, 4): 

395 net, sta, loc = obj[:3] 

396 else: 

397 raise InvalidObject( 

398 'Cannot get nsl code from given object of type "%s".' 

399 % type(obj)) 

400 

401 return net, sta, loc 

402 

403 def get_nslc(self, obj): 

404 if isinstance(obj, trace.Trace): 

405 return obj.nslc_id 

406 elif isinstance(obj, gf.Target): 

407 return obj.codes 

408 elif isinstance(obj, tuple) and len(obj) == 4: 

409 return obj 

410 else: 

411 raise InvalidObject( 

412 'Cannot get nslc code from given object "%s"' % type(obj)) 

413 

414 def get_tmin_tmax(self, obj): 

415 if isinstance(obj, trace.Trace): 

416 return obj.tmin, obj.tmax 

417 else: 

418 raise InvalidObject( 

419 'Cannot get tmin and tmax from given object of type "%s"' % 

420 type(obj)) 

421 

422 def get_station(self, obj): 

423 if self.is_blacklisted(obj): 

424 raise NotFound('Station is blacklisted:', self.get_nsl(obj)) 

425 

426 if not self.is_whitelisted(obj): 

427 raise NotFound('Station is not on whitelist:', self.get_nsl(obj)) 

428 

429 if isinstance(obj, model.Station): 

430 return obj 

431 

432 net, sta, loc = self.get_nsl(obj) 

433 

434 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] 

435 for k in keys: 

436 if k in self.stations: 

437 return self.stations[k] 

438 

439 raise NotFound('No station information:', keys) 

440 

441 def get_stations(self): 

442 return [self.stations[k] for k in sorted(self.stations) 

443 if not self.is_blacklisted(self.stations[k]) 

444 and self.is_whitelisted(self.stations[k])] 

445 

446 def get_kite_scenes(self): 

447 return self.kite_scenes 

448 

449 def get_kite_scene(self, scene_id=None): 

450 if scene_id is None: 

451 if len(self.kite_scenes) == 0: 

452 raise AttributeError('No kite displacements defined.') 

453 return self.kite_scenes[0] 

454 else: 

455 for scene in self.kite_scenes: 

456 if scene.meta.scene_id == scene_id: 

457 return scene 

458 raise NotFound('No kite scene with id "%s" defined.' % scene_id) 

459 

460 def get_gnss_campaigns(self): 

461 return self.gnss_campaigns 

462 

463 def get_gnss_campaign(self, name): 

464 for camp in self.gnss_campaigns: 

465 if camp.name == name: 

466 return camp 

467 raise NotFound('GNSS campaign %s not found!' % name) 

468 

469 def get_response(self, obj, quantity='displacement'): 

470 if (self.responses is None or len(self.responses) == 0) \ 

471 and (self.responses_stationxml is None 

472 or len(self.responses_stationxml) == 0): 

473 

474 raise NotFound('No response information available.') 

475 

476 quantity_to_unit = { 

477 'displacement': 'M', 

478 'velocity': 'M/S', 

479 'acceleration': 'M/S**2'} 

480 

481 if self.is_blacklisted(obj): 

482 raise NotFound('Response is blacklisted:', self.get_nslc(obj)) 

483 

484 if not self.is_whitelisted(obj): 

485 raise NotFound('Response is not on whitelist:', self.get_nslc(obj)) 

486 

487 net, sta, loc, cha = self.get_nslc(obj) 

488 tmin, tmax = self.get_tmin_tmax(obj) 

489 

490 keys_x = [ 

491 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] 

492 

493 keys = [] 

494 for k in keys_x: 

495 if k not in keys: 

496 keys.append(k) 

497 

498 candidates = [] 

499 for k in keys: 

500 if k in self.responses: 

501 for x in self.responses[k]: 

502 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): 

503 if quantity == 'displacement': 

504 candidates.append(x.response) 

505 elif quantity == 'velocity': 

506 candidates.append(trace.MultiplyResponse([ 

507 x.response, 

508 trace.DifferentiationResponse()])) 

509 elif quantity == 'acceleration': 

510 candidates.append(trace.MultiplyResponse([ 

511 x.response, 

512 trace.DifferentiationResponse(2)])) 

513 else: 

514 assert False 

515 

516 for sx in self.responses_stationxml: 

517 try: 

518 candidates.append( 

519 sx.get_pyrocko_response( 

520 (net, sta, loc, cha), 

521 timespan=(tmin, tmax), 

522 fake_input_units=quantity_to_unit[quantity])) 

523 

524 except (fs.NoResponseInformation, fs.MultipleResponseInformation): 

525 pass 

526 

527 if len(candidates) == 1: 

528 return candidates[0] 

529 

530 elif len(candidates) == 0: 

531 raise NotFound('No response found:', (net, sta, loc, cha)) 

532 else: 

533 raise NotFound('Multiple responses found:', (net, sta, loc, cha)) 

534 

535 def get_waveform_raw( 

536 self, obj, 

537 tmin, 

538 tmax, 

539 tpad=0., 

540 toffset_noise_extract=0., 

541 want_incomplete=False, 

542 extend_incomplete=False): 

543 

544 net, sta, loc, cha = self.get_nslc(obj) 

545 

546 if self.is_blacklisted((net, sta, loc, cha)): 

547 raise NotFound( 

548 'Waveform is blacklisted:', (net, sta, loc, cha)) 

549 

550 if not self.is_whitelisted((net, sta, loc, cha)): 

551 raise NotFound( 

552 'Waveform is not on whitelist:', (net, sta, loc, cha)) 

553 

554 if self.clip_handling == 'by_nsl': 

555 if self.has_clipping((net, sta, loc), tmin, tmax): 

556 raise NotFound( 

557 'Waveform clipped:', (net, sta, loc)) 

558 

559 elif self.clip_handling == 'by_nslc': 

560 if self.has_clipping((net, sta, loc, cha), tmin, tmax): 

561 raise NotFound( 

562 'Waveform clipped:', (net, sta, loc, cha)) 

563 

564 trs = self.pile.all( 

565 tmin=tmin+toffset_noise_extract, 

566 tmax=tmax+toffset_noise_extract, 

567 tpad=tpad, 

568 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha), 

569 want_incomplete=want_incomplete or extend_incomplete) 

570 

571 if self.apply_displaced_sampling_workaround: 

572 for tr in trs: 

573 tr.snap() 

574 

575 if toffset_noise_extract != 0.0: 

576 for tr in trs: 

577 tr.shift(-toffset_noise_extract) 

578 

579 if extend_incomplete and len(trs) == 1: 

580 trs[0].extend( 

581 tmin + toffset_noise_extract - tpad, 

582 tmax + toffset_noise_extract + tpad, 

583 fillmethod='repeat') 

584 

585 if not want_incomplete and len(trs) != 1: 

586 if len(trs) == 0: 

587 message = 'Waveform missing or incomplete.' 

588 else: 

589 message = 'Waveform has gaps.' 

590 

591 raise NotFound( 

592 message, 

593 codes=(net, sta, loc, cha), 

594 time_range=( 

595 tmin + toffset_noise_extract - tpad, 

596 tmax + toffset_noise_extract + tpad)) 

597 

598 return trs 

599 

600 def get_waveform_restituted( 

601 self, 

602 obj, quantity='displacement', 

603 tmin=None, tmax=None, tpad=0., 

604 tfade=0., freqlimits=None, deltat=None, 

605 toffset_noise_extract=0., 

606 want_incomplete=False, 

607 extend_incomplete=False): 

608 

609 if deltat is not None: 

610 tpad_downsample = deltat * 50. 

611 else: 

612 tpad_downsample = 0. 

613 

614 trs_raw = self.get_waveform_raw( 

615 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample, 

616 toffset_noise_extract=toffset_noise_extract, 

617 want_incomplete=want_incomplete, 

618 extend_incomplete=extend_incomplete) 

619 

620 trs_restituted = [] 

621 for tr in trs_raw: 

622 if deltat is not None: 

623 tr.downsample_to(deltat, snap=True, allow_upsample_max=5) 

624 tr.deltat = deltat 

625 

626 resp = self.get_response(tr, quantity=quantity) 

627 try: 

628 trs_restituted.append( 

629 tr.transfer( 

630 tfade=tfade, freqlimits=freqlimits, 

631 transfer_function=resp, invert=True, demean=True)) 

632 

633 except trace.InfiniteResponse: 

634 raise NotFound( 

635 'Instrument response deconvolution failed ' 

636 '(divide by zero)', tr.nslc_id) 

637 

638 return trs_restituted, trs_raw 

639 

640 def _get_projections( 

641 self, station, backazimuth, source, target, tmin, tmax): 

642 

643 # fill in missing channel information (happens when station file 

644 # does not contain any channel information) 

645 if not station.get_channels(): 

646 station = copy.deepcopy(station) 

647 

648 nsl = station.nsl() 

649 trs = self.pile.all( 

650 tmin=tmin, tmax=tmax, 

651 trace_selector=lambda tr: tr.nslc_id[:3] == nsl, 

652 load_data=False) 

653 

654 channels = list(set(tr.channel for tr in trs)) 

655 station.set_channels_by_name(*channels) 

656 

657 projections = [] 

658 projections.extend(station.guess_projections_to_enu( 

659 out_channels=('E', 'N', 'Z'))) 

660 

661 if source is not None and target is not None: 

662 backazimuth = source.azibazi_to(target)[1] 

663 

664 if backazimuth is not None: 

665 projections.extend(station.guess_projections_to_rtu( 

666 out_channels=('R', 'T', 'Z'), 

667 backazimuth=backazimuth)) 

668 

669 if not projections: 

670 raise NotFound( 

671 'Cannot determine projection of data components:', 

672 station.nsl()) 

673 

674 return projections 

675 

676 def _get_waveform( 

677 self, 

678 obj, quantity='displacement', 

679 tmin=None, tmax=None, tpad=0., 

680 tfade=0., freqlimits=None, deltat=None, cache=None, 

681 backazimuth=None, 

682 source=None, 

683 target=None, 

684 debug=False): 

685 

686 assert not debug or (debug and cache is None) 

687 

688 if cache is True: 

689 cache = self._cache 

690 

691 _, _, _, channel = self.get_nslc(obj) 

692 station = self.get_station(self.get_nsl(obj)) 

693 

694 nslc = station.nsl() + (channel,) 

695 

696 if self.is_blacklisted(nslc): 

697 raise NotFound( 

698 'Waveform is blacklisted:', nslc) 

699 

700 if not self.is_whitelisted(nslc): 

701 raise NotFound( 

702 'Waveform is not on whitelist:', nslc) 

703 

704 assert tmin is not None 

705 assert tmax is not None 

706 

707 tmin = float(tmin) 

708 tmax = float(tmax) 

709 

710 nslc = tuple(nslc) 

711 

712 cache_k = nslc + ( 

713 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) 

714 if cache is not None and (nslc + cache_k) in cache: 

715 obj = cache[nslc + cache_k] 

716 if isinstance(obj, Exception): 

717 raise obj 

718 elif obj is None: 

719 raise NotFound('Waveform not found!', nslc) 

720 else: 

721 return obj 

722 

723 syn_test = self.synthetic_test 

724 toffset_noise_extract = 0.0 

725 if syn_test: 

726 if not syn_test.respect_data_availability: 

727 if syn_test.real_noise_scale != 0.0: 

728 raise DatasetError( 

729 'respect_data_availability=False and ' 

730 'addition of real noise cannot be combined.') 

731 

732 tr = syn_test.get_waveform( 

733 nslc, tmin, tmax, 

734 tfade=tfade, 

735 freqlimits=freqlimits) 

736 

737 if cache is not None: 

738 cache[tr.nslc_id + cache_k] = tr 

739 

740 if debug: 

741 return [], [], [] 

742 else: 

743 return tr 

744 

745 if syn_test.real_noise_scale != 0.0: 

746 toffset_noise_extract = syn_test.real_noise_toffset 

747 

748 abs_delays = [] 

749 for ocha in 'ENZRT': 

750 sc = self.station_corrections.get(station.nsl() + (channel,), None) 

751 if sc: 

752 abs_delays.append(abs(sc.delay)) 

753 

754 if abs_delays: 

755 abs_delay_max = max(abs_delays) 

756 else: 

757 abs_delay_max = 0.0 

758 

759 projections = self._get_projections( 

760 station, backazimuth, source, target, tmin, tmax) 

761 

762 try: 

763 trs_projected = [] 

764 trs_restituted = [] 

765 trs_raw = [] 

766 exceptions = [] 

767 for matrix, in_channels, out_channels in projections: 

768 deps = trace.project_dependencies( 

769 matrix, in_channels, out_channels) 

770 

771 try: 

772 trs_restituted_group = [] 

773 trs_raw_group = [] 

774 if channel in deps: 

775 for cha in deps[channel]: 

776 trs_restituted_this, trs_raw_this = \ 

777 self.get_waveform_restituted( 

778 station.nsl() + (cha,), 

779 quantity=quantity, 

780 tmin=tmin, tmax=tmax, 

781 tpad=tpad+abs_delay_max, 

782 toffset_noise_extract=toffset_noise_extract, # noqa 

783 tfade=tfade, 

784 freqlimits=freqlimits, 

785 deltat=deltat, 

786 want_incomplete=debug, 

787 extend_incomplete=self.extend_incomplete) 

788 

789 trs_restituted_group.extend(trs_restituted_this) 

790 trs_raw_group.extend(trs_raw_this) 

791 

792 trs_projected.extend( 

793 trace.project( 

794 trs_restituted_group, matrix, 

795 in_channels, out_channels)) 

796 

797 trs_restituted.extend(trs_restituted_group) 

798 trs_raw.extend(trs_raw_group) 

799 

800 except NotFound as e: 

801 exceptions.append((in_channels, out_channels, e)) 

802 

803 if not trs_projected: 

804 err = [] 

805 for (in_channels, out_channels, e) in exceptions: 

806 sin = ', '.join(c.name for c in in_channels) 

807 sout = ', '.join(c.name for c in out_channels) 

808 err.append('(%s) -> (%s): %s' % (sin, sout, e)) 

809 

810 raise NotFound('\n'.join(err)) 

811 

812 for tr in trs_projected: 

813 sc = self.station_corrections.get(tr.nslc_id, None) 

814 if sc: 

815 if self.apply_correction_factors: 

816 tr.ydata /= sc.factor 

817 

818 if self.apply_correction_delays: 

819 tr.shift(-sc.delay) 

820 

821 if tmin is not None and tmax is not None: 

822 tr.chop(tmin, tmax) 

823 

824 if syn_test: 

825 trs_projected_synthetic = [] 

826 for tr in trs_projected: 

827 if tr.channel == channel: 

828 tr_syn = syn_test.get_waveform( 

829 tr.nslc_id, tmin, tmax, 

830 tfade=tfade, freqlimits=freqlimits) 

831 

832 if tr_syn: 

833 if syn_test.real_noise_scale != 0.0: 

834 tr_syn = tr_syn.copy() 

835 tr_noise = tr.copy() 

836 tr_noise.set_ydata( 

837 tr_noise.get_ydata() 

838 * syn_test.real_noise_scale) 

839 

840 tr_syn.add(tr_noise) 

841 

842 trs_projected_synthetic.append(tr_syn) 

843 

844 trs_projected = trs_projected_synthetic 

845 

846 if cache is not None: 

847 for tr in trs_projected: 

848 cache[tr.nslc_id + cache_k] = tr 

849 

850 tr_return = None 

851 for tr in trs_projected: 

852 if tr.channel == channel: 

853 tr_return = tr 

854 

855 if debug: 

856 return trs_projected, trs_restituted, trs_raw, tr_return 

857 

858 elif tr_return: 

859 return tr_return 

860 

861 else: 

862 raise NotFound( 

863 'waveform not available', station.nsl() + (channel,)) 

864 

865 except NotFound: 

866 if cache is not None: 

867 cache[nslc + cache_k] = None 

868 raise 

869 

870 def get_waveform(self, obj, tinc_cache=None, **kwargs): 

871 tmin = kwargs['tmin'] 

872 tmax = kwargs['tmax'] 

873 if tinc_cache is not None: 

874 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache 

875 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache 

876 else: 

877 tmin_r = tmin 

878 tmax_r = tmax 

879 

880 kwargs['tmin'] = tmin_r 

881 kwargs['tmax'] = tmax_r 

882 

883 if kwargs.get('debug', None): 

884 return self._get_waveform(obj, **kwargs) 

885 else: 

886 tr = self._get_waveform(obj, **kwargs) 

887 return tr.chop(tmin, tmax, inplace=False) 

888 

889 def get_events(self, magmin=None, event_names=None): 

890 evs = [] 

891 for ev in self.events: 

892 if ((magmin is None or ev.magnitude >= magmin) and 

893 (event_names is None or ev.name in event_names)): 

894 evs.append(ev) 

895 

896 return evs 

897 

898 def get_event_by_time(self, t, magmin=None): 

899 evs = self.get_events(magmin=magmin) 

900 ev_x = None 

901 for ev in evs: 

902 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): 

903 ev_x = ev 

904 

905 if not ev_x: 

906 raise NotFound( 

907 'No event information matching criteria (t=%s, magmin=%s).' % 

908 (t, magmin)) 

909 

910 return ev_x 

911 

912 def get_event(self): 

913 if self._event_name is None: 

914 raise NotFound('No main event selected in dataset!') 

915 

916 for ev in self.events: 

917 if ev.name == self._event_name: 

918 return ev 

919 

920 raise NotFound('No such event: %s' % self._event_name) 

921 

922 def get_picks(self): 

923 if self._picks is None: 

924 hash_to_name = {} 

925 names = set() 

926 for marker in self.pick_markers: 

927 if isinstance(marker, pmarker.EventMarker): 

928 name = marker.get_event().name 

929 if name in names: 

930 raise DatasetError( 

931 'Duplicate event name "%s" in picks.' % name) 

932 

933 names.add(name) 

934 hash_to_name[marker.get_event_hash()] = name 

935 

936 for ev in self.events: 

937 hash_to_name[ev.get_hash()] = ev.name 

938 

939 picks = {} 

940 for marker in self.pick_markers: 

941 if isinstance(marker, pmarker.PhaseMarker): 

942 ehash = marker.get_event_hash() 

943 

944 nsl = marker.one_nslc()[:3] 

945 phasename = marker.get_phasename() 

946 

947 if ehash is None or ehash not in hash_to_name: 

948 raise DatasetError( 

949 'Unassociated pick: %s.%s.%s, %s' % 

950 (nsl + (phasename, ))) 

951 

952 eventname = hash_to_name[ehash] 

953 

954 if (nsl, phasename, eventname) in picks: 

955 raise DatasetError( 

956 'Duplicate pick: %s.%s.%s, %s' % 

957 (nsl + (phasename, ))) 

958 

959 picks[nsl, phasename, eventname] = marker 

960 

961 self._picks = picks 

962 

963 return self._picks 

964 

965 def get_pick(self, eventname, obj, phasename): 

966 nsl = self.get_nsl(obj) 

967 return self.get_picks().get((nsl, phasename, eventname), None) 

968 

969 

970class DatasetConfig(HasPaths): 

971 ''' Configuration for a Grond `Dataset` object. ''' 

972 

973 stations_path = Path.T( 

974 optional=True, 

975 help='List of files with station coordinates in Pyrocko format.') 

976 stations_stationxml_paths = List.T( 

977 Path.T(), 

978 optional=True, 

979 help='List of files with station coordinates in StationXML format.') 

980 events_path = Path.T( 

981 optional=True, 

982 help='File with hypocenter information and possibly' 

983 ' reference solution') 

984 waveform_paths = List.T( 

985 Path.T(), 

986 optional=True, 

987 help='List of directories with raw waveform data') 

988 clippings_path = Path.T( 

989 optional=True) 

990 responses_sacpz_path = Path.T( 

991 optional=True, 

992 help='List of SACPZ response files for restitution of' 

993 ' the raw waveform data.') 

994 responses_stationxml_paths = List.T( 

995 Path.T(), 

996 optional=True, 

997 help='List of StationXML response files for restitution of' 

998 ' the raw waveform data.') 

999 station_corrections_path = Path.T( 

1000 optional=True, 

1001 help='File containing station correction informations.') 

1002 apply_correction_factors = Bool.T( 

1003 optional=True, 

1004 default=True, 

1005 help='Apply correction factors from station corrections.') 

1006 apply_correction_delays = Bool.T( 

1007 optional=True, 

1008 default=True, 

1009 help='Apply correction delays from station corrections.') 

1010 apply_displaced_sampling_workaround = Bool.T( 

1011 optional=True, 

1012 default=False, 

1013 help='Work around displaced sampling issues.') 

1014 extend_incomplete = Bool.T( 

1015 default=False, 

1016 help='Extend incomplete seismic traces.') 

1017 picks_paths = List.T( 

1018 Path.T()) 

1019 blacklist_paths = List.T( 

1020 Path.T(), 

1021 help='List of text files with blacklisted stations.') 

1022 blacklist = List.T( 

1023 String.T(), 

1024 help='Stations/components to be excluded according to their STA, ' 

1025 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') 

1026 whitelist_paths = List.T( 

1027 Path.T(), 

1028 help='List of text files with whitelisted stations.') 

1029 whitelist = List.T( 

1030 String.T(), 

1031 optional=True, 

1032 help='If not None, list of stations/components to include according ' 

1033 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 

1034 'Note: ''when whitelisting on channel level, both, the raw and ' 

1035 'the processed channel codes have to be listed.') 

1036 synthetic_test = SyntheticTest.T( 

1037 optional=True) 

1038 

1039 kite_scene_paths = List.T( 

1040 Path.T(), 

1041 optional=True, 

1042 help='List of directories for the InSAR scenes.') 

1043 

1044 gnss_campaign_paths = List.T( 

1045 Path.T(), 

1046 optional=True, 

1047 help='List of directories for the GNSS campaign data.') 

1048 

1049 def __init__(self, *args, **kwargs): 

1050 HasPaths.__init__(self, *args, **kwargs) 

1051 self._ds = {} 

1052 

1053 def get_event_names(self): 

1054 logger.info('Loading events ...') 

1055 

1056 def extra(path): 

1057 return expand_template(path, dict( 

1058 event_name='*')) 

1059 

1060 def fp(path): 

1061 return self.expand_path(path, extra=extra) 

1062 

1063 def check_events(events, fn): 

1064 for ev in events: 

1065 if not ev.name: 

1066 logger.warning('Event in %s has no name!', fn) 

1067 return 

1068 if not ev.lat or not ev.lon: 

1069 logger.warning('Event %s has inconsistent coordinates!', 

1070 ev.name) 

1071 if not ev.depth: 

1072 logger.warning('Event %s has no depth!', ev.name) 

1073 if not ev.time: 

1074 logger.warning('Event %s has no time!', ev.name) 

1075 

1076 events = [] 

1077 events_path = fp(self.events_path) 

1078 fns = glob.glob(events_path) 

1079 if not fns: 

1080 raise DatasetError('No event files matching "%s".' % events_path) 

1081 

1082 for fn in fns: 

1083 logger.debug('Loading from file %s' % fn) 

1084 ev = model.load_events(filename=fn) 

1085 check_events(ev, fn) 

1086 

1087 events.extend(ev) 

1088 

1089 event_names = [ev_.name for ev_ in events] 

1090 event_names.sort() 

1091 return event_names 

1092 

1093 def get_dataset(self, event_name): 

1094 if event_name not in self._ds: 

1095 def extra(path): 

1096 return expand_template(path, dict( 

1097 event_name=event_name)) 

1098 

1099 def fp(path): 

1100 p = self.expand_path(path, extra=extra) 

1101 if p is None: 

1102 return None 

1103 

1104 if isinstance(p, list): 

1105 for path in p: 

1106 if not op.exists(path): 

1107 logger.warning('Path %s does not exist.' % path) 

1108 else: 

1109 if not op.exists(p): 

1110 logger.warning('Path %s does not exist.' % p) 

1111 

1112 return p 

1113 

1114 ds = Dataset(event_name) 

1115 try: 

1116 ds.add_events(filename=fp(self.events_path)) 

1117 

1118 ds.add_stations( 

1119 pyrocko_stations_filename=fp(self.stations_path), 

1120 stationxml_filenames=fp(self.stations_stationxml_paths)) 

1121 

1122 if self.waveform_paths: 

1123 ds.add_waveforms(paths=fp(self.waveform_paths)) 

1124 

1125 if self.kite_scene_paths: 

1126 ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) 

1127 

1128 if self.gnss_campaign_paths: 

1129 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) 

1130 

1131 if self.clippings_path: 

1132 ds.add_clippings(markers_filename=fp(self.clippings_path)) 

1133 

1134 if self.responses_sacpz_path: 

1135 ds.add_responses( 

1136 sacpz_dirname=fp(self.responses_sacpz_path)) 

1137 

1138 if self.responses_stationxml_paths: 

1139 ds.add_responses( 

1140 stationxml_filenames=fp( 

1141 self.responses_stationxml_paths)) 

1142 

1143 if self.station_corrections_path: 

1144 ds.add_station_corrections( 

1145 filename=fp(self.station_corrections_path)) 

1146 

1147 ds.apply_correction_factors = self.apply_correction_factors 

1148 ds.apply_correction_delays = self.apply_correction_delays 

1149 ds.apply_displaced_sampling_workaround = \ 

1150 self.apply_displaced_sampling_workaround 

1151 ds.extend_incomplete = self.extend_incomplete 

1152 

1153 for picks_path in self.picks_paths: 

1154 ds.add_picks( 

1155 filename=fp(picks_path)) 

1156 

1157 ds.add_blacklist(self.blacklist) 

1158 ds.add_blacklist(filenames=fp(self.blacklist_paths)) 

1159 if self.whitelist: 

1160 ds.add_whitelist(self.whitelist) 

1161 if self.whitelist_paths: 

1162 ds.add_whitelist(filenames=fp(self.whitelist_paths)) 

1163 

1164 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) 

1165 self._ds[event_name] = ds 

1166 except (FileLoadError, OSError) as e: 

1167 raise DatasetError(str(e)) 

1168 

1169 return self._ds[event_name] 

1170 

1171 

1172__all__ = ''' 

1173 Dataset 

1174 DatasetConfig 

1175 DatasetError 

1176 InvalidObject 

1177 NotFound 

1178 StationCorrection 

1179 load_station_corrections 

1180 dump_station_corrections 

1181'''.split()