Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

697 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-11-27 15:15 +0000

1# https://pyrocko.org/grond - GPLv3 

2# 

3# The Grond Developers, 21st Century 

4 

5import glob 

6import copy 

7import os.path as op 

8import logging 

9import math 

10import numpy as num 

11 

12from collections import defaultdict 

13from pyrocko import util, model, trace, marker as pmarker 

14from pyrocko.io.io_common import FileLoadError 

15from pyrocko.fdsn import enhanced_sacpz, station as fs 

16from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all, 

17 load_all) 

18from pyrocko import squirrel 

19 

20from pyrocko import gf 

21 

22from .meta import Path, HasPaths, expand_template, GrondError 

23 

24from .synthetic_tests import SyntheticTest 

25 

26guts_prefix = 'grond' 

27logger = logging.getLogger('grond.dataset') 

28 

29 

30def quote_paths(paths): 

31 return ', '.join('"%s"' % path for path in paths) 

32 

33 

34class InvalidObject(Exception): 

35 pass 

36 

37 

38class NotFound(Exception): 

39 def __init__(self, reason, codes=None, time_range=None): 

40 self.reason = reason 

41 self.time_range = time_range 

42 self.codes = codes 

43 

44 def __str__(self): 

45 s = self.reason 

46 if self.codes: 

47 s += ' (%s)' % '.'.join(self.codes) 

48 

49 if self.time_range: 

50 s += ' (%s - %s)' % ( 

51 util.time_to_str(self.time_range[0]), 

52 util.time_to_str(self.time_range[1])) 

53 

54 return s 

55 

56 

57class DatasetError(GrondError): 

58 pass 

59 

60 

61class StationCorrection(Object): 

62 codes = Tuple.T(4, String.T()) 

63 delay = Float.T() 

64 factor = Float.T() 

65 

66 

67def load_station_corrections(filename): 

68 scs = load_all(filename=filename) 

69 for sc in scs: 

70 assert isinstance(sc, StationCorrection) 

71 

72 return scs 

73 

74 

75def dump_station_corrections(station_corrections, filename): 

76 return dump_all(station_corrections, filename=filename) 

77 

78 

79class Dataset(object): 

80 

81 def __init__(self, event_name=None): 

82 self.events = [] 

83 self.squirrel = squirrel.Squirrel() 

84 self.stations = {} 

85 self.responses = defaultdict(list) 

86 self.responses_stationxml = [] 

87 self.clippings = {} 

88 self.exclude = set() 

89 self.include_nslc = None 

90 self.include_nsl_xx = None 

91 self.include = None 

92 self.station_corrections = {} 

93 self.station_factors = {} 

94 self.pick_markers = [] 

95 self.apply_correction_delays = True 

96 self.apply_correction_factors = True 

97 self.apply_displaced_sampling_workaround = False 

98 self.extend_incomplete = False 

99 self.clip_handling = 'by_nsl' 

100 self.kite_scenes = [] 

101 self.gnss_campaigns = [] 

102 self.synthetic_test = None 

103 self._picks = None 

104 self._cache = {} 

105 self._event_name = event_name 

106 

107 def close(self): 

108 del self.squirrel 

109 self.squirrel = None 

110 

111 def empty_cache(self): 

112 self._cache = {} 

113 

114 def set_synthetic_test(self, synthetic_test): 

115 self.synthetic_test = synthetic_test 

116 

117 def add_stations( 

118 self, 

119 stations=None, 

120 pyrocko_stations_filename=None, 

121 stationxml_filenames=None): 

122 

123 if stations is not None: 

124 for station in stations: 

125 self.stations[station.nsl()] = station 

126 

127 if pyrocko_stations_filename is not None: 

128 logger.debug( 

129 'Loading stations from file "%s"...' % 

130 pyrocko_stations_filename) 

131 

132 for station in model.load_stations(pyrocko_stations_filename): 

133 self.stations[station.nsl()] = station 

134 

135 if stationxml_filenames is not None and len(stationxml_filenames) > 0: 

136 

137 for stationxml_filename in stationxml_filenames: 

138 if not op.exists(stationxml_filename): 

139 continue 

140 

141 logger.debug( 

142 'Loading stations from StationXML file "%s"...' % 

143 stationxml_filename) 

144 

145 sx = fs.load_xml(filename=stationxml_filename) 

146 ev = self.get_event() 

147 try: 

148 stations = sx.get_pyrocko_stations( 

149 time=ev.time, sloppy=True) 

150 except TypeError: 

151 logger.warning( 

152 'The installed version of Pyrocko does not support ' 

153 'the keyword argument `sloppy` in method ' 

154 '`FDSNStationXML.get_pyrocko_stations`. Please ' 

155 'upgrade Pyrocko.') 

156 

157 stations = sx.get_pyrocko_stations(time=ev.time) 

158 

159 if len(stations) == 0: 

160 logger.warning( 

161 'No stations found for time %s in file "%s".' % ( 

162 util.time_to_str(ev.time), stationxml_filename)) 

163 

164 for station in stations: 

165 logger.debug('Adding station: %s.%s.%s' % station.nsl()) 

166 channels = station.get_channels() 

167 if len(channels) == 1 and channels[0].name.endswith('Z'): 

168 logger.warning( 

169 'Station "%s" has vertical component' 

170 ' information only, adding mocked channels.' 

171 % station.nsl_string()) 

172 station.add_channel( 

173 model.Channel(channels[0].name[:-1] + 'N')) 

174 station.add_channel( 

175 model.Channel(channels[0].name[:-1] + 'E')) 

176 

177 self.stations[station.nsl()] = station 

178 

179 def add_events(self, events=None, filename=None): 

180 if events is not None: 

181 self.events.extend(events) 

182 

183 if filename is not None: 

184 logger.debug('Loading events from file "%s"...' % filename) 

185 try: 

186 events = model.load_events(filename) 

187 self.events.extend(events) 

188 logger.info( 

189 'Loading events from %s: %i events found.' % 

190 (filename, len(events))) 

191 except Exception as e: 

192 logger.warning('Could not load events from %s!', filename) 

193 raise e 

194 

195 def add_waveforms(self, paths, regex=None, fileformat='detect', 

196 show_progress=False): 

197 

198 self.squirrel.add(paths) 

199 

200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): 

201 if sacpz_dirname: 

202 logger.debug( 

203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) 

204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname): 

205 self.responses[x.codes].append(x) 

206 

207 if stationxml_filenames: 

208 for stationxml_filename in stationxml_filenames: 

209 if not op.exists(stationxml_filename): 

210 continue 

211 

212 logger.debug( 

213 'Loading StationXML responses from "%s"...' % 

214 stationxml_filename) 

215 

216 self.responses_stationxml.append( 

217 fs.load_xml(filename=stationxml_filename)) 

218 

219 def add_clippings(self, markers_filename): 

220 markers = pmarker.load_markers(markers_filename) 

221 clippings = {} 

222 for marker in markers: 

223 nslc = marker.one_nslc() 

224 nsl = nslc[:3] 

225 if nsl not in clippings: 

226 clippings[nsl] = [] 

227 

228 if nslc not in clippings: 

229 clippings[nslc] = [] 

230 

231 clippings[nsl].append(marker.tmin) 

232 clippings[nslc].append(marker.tmin) 

233 

234 for k, times in clippings.items(): 

235 atimes = num.array(times, dtype=float) 

236 if k not in self.clippings: 

237 self.clippings[k] = atimes 

238 else: 

239 self.clippings[k] = num.concatenate(self.clippings, atimes) 

240 

241 def add_exclude(self, exclude=[], filenames=None): 

242 logger.debug('Loading exclude definitions...') 

243 if filenames: 

244 exclude = list(exclude) 

245 for filename in filenames: 

246 if op.exists(filename): 

247 with open(filename, 'r') as f: 

248 exclude.extend( 

249 s.strip() for s in f.read().splitlines()) 

250 else: 

251 logger.warning('No such exclude file: %s' % filename) 

252 

253 for x in exclude: 

254 if isinstance(x, str): 

255 x = tuple(x.split('.')) 

256 self.exclude.add(x) 

257 

258 def add_include(self, include=[], filenames=None): 

259 logger.debug('Loading included stations...') 

260 if filenames: 

261 include = list(include) 

262 for filename in filenames: 

263 with open(filename, 'r') as f: 

264 include.extend(s.strip() for s in f.read().splitlines()) 

265 

266 if self.include_nslc is None: 

267 self.include_nslc = set() 

268 self.include = set() 

269 self.include_nsl_xx = set() 

270 

271 for x in include: 

272 if isinstance(x, str): 

273 x = tuple(x.split('.')) 

274 if len(x) == 4: 

275 self.include_nslc.add(x) 

276 self.include_nsl_xx.add(x[:3]) 

277 else: 

278 self.include.add(x) 

279 

280 def add_station_corrections(self, filename): 

281 self.station_corrections.update( 

282 (sc.codes, sc) for sc in load_station_corrections(filename)) 

283 

284 def add_picks(self, filename): 

285 self.pick_markers.extend( 

286 pmarker.load_markers(filename)) 

287 

288 self._picks = None 

289 

290 def add_gnss_campaigns(self, paths): 

291 paths = util.select_files( 

292 paths, 

293 include=r'\.yml|\.yaml', 

294 show_progress=False) 

295 

296 for path in paths: 

297 self.add_gnss_campaign(filename=path) 

298 

299 def add_gnss_campaign(self, filename): 

300 from pyrocko.model import gnss # noqa 

301 

302 logger.debug('Loading GNSS campaign from "%s"...' % filename) 

303 

304 campaign = load_all(filename=filename) 

305 self.gnss_campaigns.append(campaign[0]) 

306 

307 def add_kite_scenes(self, paths): 

308 logger.info('Loading kite InSAR scenes...') 

309 paths = util.select_files( 

310 paths, 

311 include=r'\.npz', 

312 show_progress=False) 

313 

314 for path in paths: 

315 self.add_kite_scene(filename=path) 

316 

317 if not self.kite_scenes: 

318 logger.warning('Could not find any kite scenes at %s.' % 

319 quote_paths(self.kite_scene_paths)) 

320 

321 def add_kite_scene(self, filename): 

322 try: 

323 from kite import Scene 

324 except ImportError: 

325 raise ImportError('Module kite could not be imported,' 

326 ' please install from https://pyrocko.org.') 

327 logger.debug('Loading kite scene from "%s"...' % filename) 

328 

329 scene = Scene.load(filename) 

330 scene._log.setLevel(logger.level) 

331 

332 try: 

333 self.get_kite_scene(scene.meta.scene_id) 

334 except NotFound: 

335 self.kite_scenes.append(scene) 

336 else: 

337 raise AttributeError('Kite scene_id not unique for "%s".' 

338 % filename) 

339 

340 def is_excluded(self, obj): 

341 try: 

342 nslc = self.get_nslc(obj) 

343 if nslc in self.exclude: 

344 return True 

345 

346 except InvalidObject: 

347 pass 

348 

349 nsl = self.get_nsl(obj) 

350 return ( 

351 nsl in self.exclude or 

352 nsl[1:2] in self.exclude or 

353 nsl[:2] in self.exclude) 

354 

355 def is_included(self, obj): 

356 if self.include_nslc is None: 

357 return True 

358 

359 nsl = self.get_nsl(obj) 

360 

361 if ( 

362 nsl in self.include or 

363 nsl[1:2] in self.include or 

364 nsl[:2] in self.include): 

365 

366 return True 

367 

368 try: 

369 nslc = self.get_nslc(obj) 

370 if nslc in self.include_nslc: 

371 return True 

372 

373 except InvalidObject: 

374 return nsl in self.include_nsl_xx 

375 

376 def has_clipping(self, nsl_or_nslc, tmin, tmax): 

377 if nsl_or_nslc not in self.clippings: 

378 return False 

379 

380 atimes = self.clippings[nsl_or_nslc] 

381 return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) 

382 

383 def get_nsl(self, obj): 

384 if isinstance(obj, trace.Trace): 

385 net, sta, loc, _ = obj.nslc_id 

386 elif isinstance(obj, model.Station): 

387 net, sta, loc = obj.nsl() 

388 elif isinstance(obj, gf.Target): 

389 net, sta, loc, _ = obj.codes 

390 elif isinstance(obj, tuple) and len(obj) in (3, 4): 

391 net, sta, loc = obj[:3] 

392 else: 

393 raise InvalidObject( 

394 'Cannot get nsl code from given object of type "%s".' 

395 % type(obj)) 

396 

397 return net, sta, loc 

398 

399 def get_nslc(self, obj): 

400 if isinstance(obj, trace.Trace): 

401 return obj.nslc_id 

402 elif isinstance(obj, gf.Target): 

403 return obj.codes 

404 elif isinstance(obj, tuple) and len(obj) == 4: 

405 return obj 

406 else: 

407 raise InvalidObject( 

408 'Cannot get nslc code from given object "%s"' % type(obj)) 

409 

410 def get_tmin_tmax(self, obj): 

411 if isinstance(obj, trace.Trace): 

412 return obj.tmin, obj.tmax 

413 else: 

414 raise InvalidObject( 

415 'Cannot get tmin and tmax from given object of type "%s"' % 

416 type(obj)) 

417 

418 def get_station(self, obj): 

419 if self.is_excluded(obj): 

420 raise NotFound('Station is in exclude list:', self.get_nsl(obj)) 

421 

422 if not self.is_included(obj): 

423 raise NotFound( 

424 'Station is not on include list:', self.get_nsl(obj)) 

425 

426 if isinstance(obj, model.Station): 

427 return obj 

428 

429 net, sta, loc = self.get_nsl(obj) 

430 

431 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] 

432 for k in keys: 

433 if k in self.stations: 

434 return self.stations[k] 

435 

436 raise NotFound('No station information:', keys) 

437 

438 def get_stations(self): 

439 return [self.stations[k] for k in sorted(self.stations) 

440 if not self.is_excluded(self.stations[k]) 

441 and self.is_included(self.stations[k])] 

442 

443 def get_kite_scenes(self): 

444 return self.kite_scenes 

445 

446 def get_kite_scene(self, scene_id=None): 

447 if scene_id is None: 

448 if len(self.kite_scenes) == 0: 

449 raise AttributeError('No kite displacements defined.') 

450 return self.kite_scenes[0] 

451 else: 

452 for scene in self.kite_scenes: 

453 if scene.meta.scene_id == scene_id: 

454 return scene 

455 raise NotFound('No kite scene with id "%s" defined.' % scene_id) 

456 

457 def get_gnss_campaigns(self): 

458 return self.gnss_campaigns 

459 

460 def get_gnss_campaign(self, name): 

461 for camp in self.gnss_campaigns: 

462 if camp.name == name: 

463 return camp 

464 raise NotFound('GNSS campaign %s not found!' % name) 

465 

466 def get_response(self, obj, quantity='displacement'): 

467 if (self.responses is None or len(self.responses) == 0) \ 

468 and (self.responses_stationxml is None 

469 or len(self.responses_stationxml) == 0): 

470 

471 raise NotFound('No response information available.') 

472 

473 quantity_to_unit = { 

474 'displacement': 'M', 

475 'velocity': 'M/S', 

476 'acceleration': 'M/S**2', 

477 'rotation_displacement': 'RAD', 

478 'rotation_velocity': 'RAD/S', 

479 'rotation_acceleration': 'RAD/S**2'} 

480 

481 if self.is_excluded(obj): 

482 raise NotFound( 

483 'Response is in exclude list:', self.get_nslc(obj)) 

484 

485 if not self.is_included(obj): 

486 raise NotFound( 

487 'Response is not on include list:', self.get_nslc(obj)) 

488 

489 net, sta, loc, cha = self.get_nslc(obj) 

490 tmin, tmax = self.get_tmin_tmax(obj) 

491 

492 keys_x = [ 

493 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] 

494 

495 keys = [] 

496 for k in keys_x: 

497 if k not in keys: 

498 keys.append(k) 

499 

500 candidates = [] 

501 for k in keys: 

502 if k in self.responses: 

503 for x in self.responses[k]: 

504 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): 

505 if quantity in ( 

506 'displacement', 'rotation_displacement'): 

507 candidates.append(x.response) 

508 elif quantity in ( 

509 'velocity', 'rotation_velocity'): 

510 candidates.append(trace.MultiplyResponse([ 

511 x.response, 

512 trace.DifferentiationResponse()])) 

513 elif quantity in ( 

514 'acceleration', 'rotation_acceleration'): 

515 candidates.append(trace.MultiplyResponse([ 

516 x.response, 

517 trace.DifferentiationResponse(2)])) 

518 else: 

519 assert False 

520 

521 for sx in self.responses_stationxml: 

522 try: 

523 candidates.append( 

524 sx.get_pyrocko_response( 

525 (net, sta, loc, cha), 

526 timespan=(tmin, tmax), 

527 fake_input_units=quantity_to_unit[quantity])) 

528 

529 except (fs.NoResponseInformation, fs.MultipleResponseInformation): 

530 pass 

531 

532 if len(candidates) == 1: 

533 return candidates[0] 

534 

535 elif len(candidates) == 0: 

536 raise NotFound('No response found:', (net, sta, loc, cha)) 

537 else: 

538 raise NotFound('Multiple responses found:', (net, sta, loc, cha)) 

539 

540 def get_waveform_raw( 

541 self, obj, 

542 tmin, 

543 tmax, 

544 tpad=0., 

545 toffset_noise_extract=0., 

546 want_incomplete=False, 

547 extend_incomplete=False): 

548 

549 net, sta, loc, cha = self.get_nslc(obj) 

550 

551 if self.is_excluded((net, sta, loc, cha)): 

552 raise NotFound( 

553 'Waveform is in exclude list:', (net, sta, loc, cha)) 

554 

555 if not self.is_included((net, sta, loc, cha)): 

556 raise NotFound( 

557 'Waveform is not on include list:', (net, sta, loc, cha)) 

558 

559 if self.clip_handling == 'by_nsl': 

560 if self.has_clipping((net, sta, loc), tmin, tmax): 

561 raise NotFound( 

562 'Waveform clipped:', (net, sta, loc)) 

563 

564 elif self.clip_handling == 'by_nslc': 

565 if self.has_clipping((net, sta, loc, cha), tmin, tmax): 

566 raise NotFound( 

567 'Waveform clipped:', (net, sta, loc, cha)) 

568 

569 trs = self.squirrel.get_waveforms( 

570 tmin=tmin+toffset_noise_extract-tpad, 

571 tmax=tmax+toffset_noise_extract+tpad, 

572 codes=(net, sta, loc, cha), 

573 want_incomplete=want_incomplete or extend_incomplete) 

574 

575 if self.apply_displaced_sampling_workaround: 

576 for tr in trs: 

577 tr.snap() 

578 

579 if toffset_noise_extract != 0.0: 

580 for tr in trs: 

581 tr.shift(-toffset_noise_extract) 

582 

583 if extend_incomplete and len(trs) == 1: 

584 trs[0].extend( 

585 tmin + toffset_noise_extract - tpad, 

586 tmax + toffset_noise_extract + tpad, 

587 fillmethod='repeat') 

588 

589 if not want_incomplete and len(trs) != 1: 

590 if len(trs) == 0: 

591 message = 'Waveform missing or incomplete.' 

592 else: 

593 message = 'Waveform has gaps.' 

594 

595 raise NotFound( 

596 message, 

597 codes=(net, sta, loc, cha), 

598 time_range=( 

599 tmin + toffset_noise_extract - tpad, 

600 tmax + toffset_noise_extract + tpad)) 

601 

602 return trs 

603 

604 def get_waveform_restituted( 

605 self, 

606 obj, quantity='displacement', 

607 tmin=None, tmax=None, tpad=0., 

608 tfade=0., freqlimits=None, deltat=None, 

609 toffset_noise_extract=0., 

610 want_incomplete=False, 

611 extend_incomplete=False): 

612 

613 if deltat is not None: 

614 tpad_downsample = deltat * 50. 

615 else: 

616 tpad_downsample = 0. 

617 

618 trs_raw = self.get_waveform_raw( 

619 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample, 

620 toffset_noise_extract=toffset_noise_extract, 

621 want_incomplete=want_incomplete, 

622 extend_incomplete=extend_incomplete) 

623 

624 trs_restituted = [] 

625 for tr in trs_raw: 

626 if deltat is not None: 

627 try: 

628 tr.downsample_to(deltat, snap=True, allow_upsample_max=5) 

629 

630 except util.UnavailableDecimation: 

631 logger.warn('using resample instead of decimation') 

632 tr.resample(deltat) 

633 

634 tr.deltat = deltat 

635 

636 resp = self.get_response(tr, quantity=quantity) 

637 try: 

638 trs_restituted.append( 

639 tr.transfer( 

640 tfade=tfade, freqlimits=freqlimits, 

641 transfer_function=resp, invert=True, demean=True)) 

642 

643 except trace.InfiniteResponse: 

644 raise NotFound( 

645 'Instrument response deconvolution failed ' 

646 '(divide by zero)', tr.nslc_id) 

647 

648 return trs_restituted, trs_raw 

649 

650 def _get_projections( 

651 self, station, backazimuth, source, target, tmin, tmax): 

652 

653 # fill in missing channel information (happens when station file 

654 # does not contain any channel information) 

655 if not station.get_channels(): 

656 station = copy.deepcopy(station) 

657 

658 nsl = station.nsl() 

659 trs = self.squirrel.pile.all( 

660 tmin=tmin, tmax=tmax, 

661 trace_selector=lambda tr: tr.nslc_id[:3] == nsl, 

662 load_data=False) 

663 

664 channels = list(set(tr.channel for tr in trs)) 

665 station.set_channels_by_name(*channels) 

666 

667 projections = [] 

668 projections.extend(station.guess_projections_to_enu( 

669 out_channels=('E', 'N', 'Z'))) 

670 

671 if source is not None and target is not None: 

672 backazimuth = source.azibazi_to(target)[1] 

673 

674 if backazimuth is not None: 

675 projections.extend(station.guess_projections_to_rtu( 

676 out_channels=('R', 'T', 'Z'), 

677 backazimuth=backazimuth)) 

678 

679 if not projections: 

680 raise NotFound( 

681 'Cannot determine projection of data components:', 

682 station.nsl()) 

683 

684 return projections 

685 

686 def _get_waveform( 

687 self, 

688 obj, quantity='displacement', 

689 tmin=None, tmax=None, tpad=0., 

690 tfade=0., freqlimits=None, deltat=None, cache=None, 

691 backazimuth=None, 

692 source=None, 

693 target=None, 

694 debug=False): 

695 

696 assert not debug or (debug and cache is None) 

697 

698 if cache is True: 

699 cache = self._cache 

700 

701 _, _, _, channel = self.get_nslc(obj) 

702 station = self.get_station(self.get_nsl(obj)) 

703 

704 nslc = station.nsl() + (channel,) 

705 

706 if self.is_excluded(nslc): 

707 raise NotFound( 

708 'Waveform is excluded:', nslc) 

709 

710 if not self.is_included(nslc): 

711 raise NotFound( 

712 'Waveform is not on include list:', nslc) 

713 

714 assert tmin is not None 

715 assert tmax is not None 

716 

717 tmin = float(tmin) 

718 tmax = float(tmax) 

719 

720 nslc = tuple(nslc) 

721 

722 cache_k = nslc + ( 

723 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) 

724 if cache is not None and (nslc + cache_k) in cache: 

725 obj = cache[nslc + cache_k] 

726 if isinstance(obj, Exception): 

727 raise obj 

728 elif obj is None: 

729 raise NotFound('Waveform not found!', nslc) 

730 else: 

731 return obj 

732 

733 syn_test = self.synthetic_test 

734 toffset_noise_extract = 0.0 

735 if syn_test: 

736 if not syn_test.respect_data_availability: 

737 if syn_test.real_noise_scale != 0.0: 

738 raise DatasetError( 

739 'respect_data_availability=False and ' 

740 'addition of real noise cannot be combined.') 

741 

742 tr = syn_test.get_waveform( 

743 nslc, tmin, tmax, 

744 tfade=tfade, 

745 freqlimits=freqlimits) 

746 

747 if cache is not None: 

748 cache[tr.nslc_id + cache_k] = tr 

749 

750 if debug: 

751 return [], [], [], [] 

752 else: 

753 return tr 

754 

755 if syn_test.real_noise_scale != 0.0: 

756 toffset_noise_extract = syn_test.real_noise_toffset 

757 

758 abs_delays = [] 

759 for ocha in 'ENZRT': 

760 sc = self.station_corrections.get(station.nsl() + (channel,), None) 

761 if sc: 

762 abs_delays.append(abs(sc.delay)) 

763 

764 if abs_delays: 

765 abs_delay_max = max(abs_delays) 

766 else: 

767 abs_delay_max = 0.0 

768 

769 projections = self._get_projections( 

770 station, backazimuth, source, target, tmin, tmax) 

771 

772 try: 

773 trs_projected = [] 

774 trs_restituted = [] 

775 trs_raw = [] 

776 exceptions = [] 

777 for matrix, in_channels, out_channels in projections: 

778 deps = trace.project_dependencies( 

779 matrix, in_channels, out_channels) 

780 

781 try: 

782 trs_restituted_group = [] 

783 trs_raw_group = [] 

784 if channel in deps: 

785 for cha in deps[channel]: 

786 trs_restituted_this, trs_raw_this = \ 

787 self.get_waveform_restituted( 

788 station.nsl() + (cha,), 

789 quantity=quantity, 

790 tmin=tmin, tmax=tmax, 

791 tpad=tpad+abs_delay_max, 

792 toffset_noise_extract=toffset_noise_extract, # noqa 

793 tfade=tfade, 

794 freqlimits=freqlimits, 

795 deltat=deltat, 

796 want_incomplete=debug, 

797 extend_incomplete=self.extend_incomplete) 

798 

799 trs_restituted_group.extend(trs_restituted_this) 

800 trs_raw_group.extend(trs_raw_this) 

801 

802 trs_projected.extend( 

803 trace.project( 

804 trs_restituted_group, matrix, 

805 in_channels, out_channels)) 

806 

807 trs_restituted.extend(trs_restituted_group) 

808 trs_raw.extend(trs_raw_group) 

809 

810 except NotFound as e: 

811 exceptions.append((in_channels, out_channels, e)) 

812 

813 if not trs_projected: 

814 err = [] 

815 for (in_channels, out_channels, e) in exceptions: 

816 sin = ', '.join(c.name for c in in_channels) 

817 sout = ', '.join(c.name for c in out_channels) 

818 err.append('(%s) -> (%s): %s' % (sin, sout, e)) 

819 

820 raise NotFound('\n'.join(err)) 

821 

822 for tr in trs_projected: 

823 sc = self.station_corrections.get(tr.nslc_id, None) 

824 if sc: 

825 if self.apply_correction_factors: 

826 tr.ydata /= sc.factor 

827 

828 if self.apply_correction_delays: 

829 tr.shift(-sc.delay) 

830 

831 if tmin is not None and tmax is not None: 

832 tr.chop(tmin, tmax) 

833 

834 if syn_test: 

835 trs_projected_synthetic = [] 

836 for tr in trs_projected: 

837 if tr.channel == channel: 

838 tr_syn = syn_test.get_waveform( 

839 tr.nslc_id, tmin, tmax, 

840 tfade=tfade, freqlimits=freqlimits) 

841 

842 if tr_syn: 

843 if syn_test.real_noise_scale != 0.0: 

844 tr_syn = tr_syn.copy() 

845 tr_noise = tr.copy() 

846 tr_noise.set_ydata( 

847 tr_noise.get_ydata() 

848 * syn_test.real_noise_scale) 

849 

850 tr_syn.add(tr_noise) 

851 

852 trs_projected_synthetic.append(tr_syn) 

853 

854 trs_projected = trs_projected_synthetic 

855 

856 if cache is not None: 

857 for tr in trs_projected: 

858 cache[tr.nslc_id + cache_k] = tr 

859 

860 tr_return = None 

861 for tr in trs_projected: 

862 if tr.channel == channel: 

863 tr_return = tr 

864 

865 if debug: 

866 return trs_projected, trs_restituted, trs_raw, tr_return 

867 

868 elif tr_return: 

869 return tr_return 

870 

871 else: 

872 raise NotFound( 

873 'waveform not available', station.nsl() + (channel,)) 

874 

875 except NotFound: 

876 if cache is not None: 

877 cache[nslc + cache_k] = None 

878 raise 

879 

880 def get_waveform(self, obj, tinc_cache=None, **kwargs): 

881 tmin = kwargs['tmin'] 

882 tmax = kwargs['tmax'] 

883 if tinc_cache is not None: 

884 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache 

885 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache 

886 else: 

887 tmin_r = tmin 

888 tmax_r = tmax 

889 

890 kwargs['tmin'] = tmin_r 

891 kwargs['tmax'] = tmax_r 

892 

893 if kwargs.get('debug', None): 

894 return self._get_waveform(obj, **kwargs) 

895 else: 

896 tr = self._get_waveform(obj, **kwargs) 

897 return tr.chop(tmin, tmax, inplace=False) 

898 

899 def get_events(self, magmin=None, event_names=None): 

900 evs = [] 

901 for ev in self.events: 

902 if ((magmin is None or ev.magnitude >= magmin) and 

903 (event_names is None or ev.name in event_names)): 

904 evs.append(ev) 

905 

906 return evs 

907 

908 def get_event_by_time(self, t, magmin=None): 

909 evs = self.get_events(magmin=magmin) 

910 ev_x = None 

911 for ev in evs: 

912 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): 

913 ev_x = ev 

914 

915 if not ev_x: 

916 raise NotFound( 

917 'No event information matching criteria (t=%s, magmin=%s).' % 

918 (t, magmin)) 

919 

920 return ev_x 

921 

922 def get_event(self): 

923 if self._event_name is None: 

924 raise NotFound('No main event selected in dataset!') 

925 

926 for ev in self.events: 

927 if ev.name == self._event_name: 

928 return ev 

929 

930 raise NotFound('No such event: %s' % self._event_name) 

931 

932 def get_picks(self): 

933 if self._picks is None: 

934 hash_to_name = {} 

935 names = set() 

936 for marker in self.pick_markers: 

937 if isinstance(marker, pmarker.EventMarker): 

938 name = marker.get_event().name 

939 if name in names: 

940 raise DatasetError( 

941 'Duplicate event name "%s" in picks.' % name) 

942 

943 names.add(name) 

944 hash_to_name[marker.get_event_hash()] = name 

945 

946 for ev in self.events: 

947 hash_to_name[ev.get_hash()] = ev.name 

948 

949 picks = {} 

950 for marker in self.pick_markers: 

951 if isinstance(marker, pmarker.PhaseMarker): 

952 ehash = marker.get_event_hash() 

953 

954 nsl = marker.one_nslc()[:3] 

955 phasename = marker.get_phasename() 

956 

957 if ehash is None or ehash not in hash_to_name: 

958 raise DatasetError( 

959 'Unassociated pick: %s.%s.%s, %s' % 

960 (nsl + (phasename, ))) 

961 

962 eventname = hash_to_name[ehash] 

963 

964 if (nsl, phasename, eventname) in picks: 

965 raise DatasetError( 

966 'Duplicate pick: %s.%s.%s, %s' % 

967 (nsl + (phasename, ))) 

968 

969 picks[nsl, phasename, eventname] = marker 

970 

971 self._picks = picks 

972 

973 return self._picks 

974 

975 def get_pick(self, eventname, obj, phasename): 

976 nsl = self.get_nsl(obj) 

977 return self.get_picks().get((nsl, phasename, eventname), None) 

978 

979 

980class DatasetConfig(HasPaths): 

981 ''' Configuration for a Grond `Dataset` object. ''' 

982 

983 stations_path = Path.T( 

984 optional=True, 

985 help='List of files with station coordinates in Pyrocko format.') 

986 stations_stationxml_paths = List.T( 

987 Path.T(), 

988 optional=True, 

989 help='List of files with station coordinates in StationXML format.') 

990 events_path = Path.T( 

991 optional=True, 

992 help='File with hypocenter information and possibly' 

993 ' reference solution') 

994 waveform_paths = List.T( 

995 Path.T(), 

996 optional=True, 

997 help='List of directories with raw waveform data') 

998 clippings_path = Path.T( 

999 optional=True) 

1000 responses_sacpz_path = Path.T( 

1001 optional=True, 

1002 help='List of SACPZ response files for restitution of' 

1003 ' the raw waveform data.') 

1004 responses_stationxml_paths = List.T( 

1005 Path.T(), 

1006 optional=True, 

1007 help='List of StationXML response files for restitution of' 

1008 ' the raw waveform data.') 

1009 station_corrections_path = Path.T( 

1010 optional=True, 

1011 help='File containing station correction informations.') 

1012 apply_correction_factors = Bool.T( 

1013 optional=True, 

1014 default=True, 

1015 help='Apply correction factors from station corrections.') 

1016 apply_correction_delays = Bool.T( 

1017 optional=True, 

1018 default=True, 

1019 help='Apply correction delays from station corrections.') 

1020 apply_displaced_sampling_workaround = Bool.T( 

1021 optional=True, 

1022 default=False, 

1023 help='Work around displaced sampling issues.') 

1024 extend_incomplete = Bool.T( 

1025 default=False, 

1026 help='Extend incomplete seismic traces.') 

1027 picks_paths = List.T( 

1028 Path.T()) 

1029 exclude_paths = List.T( 

1030 Path.T(), 

1031 help='List of text files with excluded stations.') 

1032 exclude = List.T( 

1033 String.T(), 

1034 help='Stations/components to be excluded according to their STA, ' 

1035 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') 

1036 include_paths = List.T( 

1037 Path.T(), 

1038 help='List of text files with included stations.') 

1039 include = List.T( 

1040 String.T(), 

1041 optional=True, 

1042 help='If not None, list of stations/components to include according ' 

1043 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 

1044 'Note: ''when including on channel level, both, the raw and ' 

1045 'the processed channel codes have to be listed.') 

1046 

1047 synthetic_test = SyntheticTest.T( 

1048 optional=True) 

1049 

1050 kite_scene_paths = List.T( 

1051 Path.T(), 

1052 optional=True, 

1053 help='List of directories for the InSAR scenes.') 

1054 

1055 gnss_campaign_paths = List.T( 

1056 Path.T(), 

1057 optional=True, 

1058 help='List of directories for the GNSS campaign data.') 

1059 

1060 # for backward compatibility 

1061 blacklist = List.T(String.T(), optional=True) 

1062 whitelist = List.T(String.T(), optional=True) 

1063 blacklist_paths = List.T(Path.T(), optional=True) 

1064 whitelist_paths = List.T(Path.T(), optional=True) 

1065 

1066 def __init__(self, *args, **kwargs): 

1067 HasPaths.__init__(self, *args, **kwargs) 

1068 

1069 # for backward compatibility 

1070 if self.blacklist: 

1071 self.exclude.extend(self.blacklist) 

1072 self.blacklist = None 

1073 if self.whitelist: 

1074 self.include.extend(self.whitelist) 

1075 self.whitelist = None 

1076 if self.blacklist_paths: 

1077 self.exclude_paths.extend(self.blacklist_paths) 

1078 self.blacklist_paths = None 

1079 if self.whitelist_paths: 

1080 self.include_paths.extend(self.whitelist_paths) 

1081 self.whitelist_paths = None 

1082 

1083 self._ds = {} 

1084 

1085 def get_event_names(self): 

1086 logger.info('Loading events ...') 

1087 

1088 def extra(path): 

1089 return expand_template(path, dict( 

1090 event_name='*')) 

1091 

1092 def fp(path): 

1093 return self.expand_path(path, extra=extra) 

1094 

1095 def check_events(events, fn): 

1096 for ev in events: 

1097 if not ev.name: 

1098 logger.warning('Event in %s has no name!', fn) 

1099 return 

1100 if not ev.lat or not ev.lon: 

1101 logger.warning('Event %s has inconsistent coordinates!', 

1102 ev.name) 

1103 if not ev.depth: 

1104 logger.warning('Event %s has no depth!', ev.name) 

1105 if not ev.time: 

1106 logger.warning('Event %s has no time!', ev.name) 

1107 

1108 events = [] 

1109 events_path = fp(self.events_path) 

1110 fns = glob.glob(events_path) 

1111 if not fns: 

1112 raise DatasetError('No event files matching "%s".' % events_path) 

1113 

1114 for fn in fns: 

1115 logger.debug('Loading from file %s' % fn) 

1116 ev = model.load_events(filename=fn) 

1117 check_events(ev, fn) 

1118 

1119 events.extend(ev) 

1120 

1121 event_names = [ev_.name for ev_ in events] 

1122 event_names.sort() 

1123 return event_names 

1124 

1125 def get_dataset(self, event_name): 

1126 if event_name not in self._ds: 

1127 def extra(path): 

1128 return expand_template(path, dict( 

1129 event_name=event_name)) 

1130 

1131 def fp(path): 

1132 p = self.expand_path(path, extra=extra) 

1133 if p is None: 

1134 return None 

1135 

1136 if isinstance(p, list): 

1137 for path in p: 

1138 if not op.exists(path): 

1139 logger.warning('Path %s does not exist.' % path) 

1140 else: 

1141 if not op.exists(p): 

1142 logger.warning('Path %s does not exist.' % p) 

1143 

1144 return p 

1145 

1146 ds = Dataset(event_name) 

1147 try: 

1148 ds.add_events(filename=fp(self.events_path)) 

1149 

1150 ds.add_stations( 

1151 pyrocko_stations_filename=fp(self.stations_path), 

1152 stationxml_filenames=fp(self.stations_stationxml_paths)) 

1153 

1154 if self.waveform_paths: 

1155 ds.add_waveforms(paths=fp(self.waveform_paths)) 

1156 

1157 if self.kite_scene_paths: 

1158 ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) 

1159 

1160 if self.gnss_campaign_paths: 

1161 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) 

1162 

1163 if self.clippings_path: 

1164 ds.add_clippings(markers_filename=fp(self.clippings_path)) 

1165 

1166 if self.responses_sacpz_path: 

1167 ds.add_responses( 

1168 sacpz_dirname=fp(self.responses_sacpz_path)) 

1169 

1170 if self.responses_stationxml_paths: 

1171 ds.add_responses( 

1172 stationxml_filenames=fp( 

1173 self.responses_stationxml_paths)) 

1174 

1175 if self.station_corrections_path: 

1176 ds.add_station_corrections( 

1177 filename=fp(self.station_corrections_path)) 

1178 

1179 ds.apply_correction_factors = self.apply_correction_factors 

1180 ds.apply_correction_delays = self.apply_correction_delays 

1181 ds.apply_displaced_sampling_workaround = \ 

1182 self.apply_displaced_sampling_workaround 

1183 ds.extend_incomplete = self.extend_incomplete 

1184 

1185 for picks_path in self.picks_paths: 

1186 ds.add_picks( 

1187 filename=fp(picks_path)) 

1188 

1189 ds.add_exclude(self.exclude) 

1190 ds.add_exclude(filenames=fp(self.exclude_paths)) 

1191 if self.include: 

1192 ds.add_include(self.include) 

1193 if self.include_paths: 

1194 ds.add_include(filenames=fp(self.include_paths)) 

1195 

1196 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) 

1197 self._ds[event_name] = ds 

1198 except (FileLoadError, OSError) as e: 

1199 raise DatasetError(str(e)) 

1200 

1201 return self._ds[event_name] 

1202 

1203 

1204__all__ = ''' 

1205 Dataset 

1206 DatasetConfig 

1207 DatasetError 

1208 InvalidObject 

1209 NotFound 

1210 StationCorrection 

1211 load_station_corrections 

1212 dump_station_corrections 

1213'''.split()