1import glob 

2import copy 

3import os.path as op 

4import logging 

5import math 

6import numpy as num 

7 

8from collections import defaultdict 

9from pyrocko import util, model, trace, marker as pmarker 

10from pyrocko.io.io_common import FileLoadError 

11from pyrocko.fdsn import enhanced_sacpz, station as fs 

12from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all, 

13 load_all) 

14from pyrocko import squirrel 

15 

16from pyrocko import gf 

17 

18from .meta import Path, HasPaths, expand_template, GrondError 

19 

20from .synthetic_tests import SyntheticTest 

21 

22guts_prefix = 'grond' 

23logger = logging.getLogger('grond.dataset') 

24 

25 

26def quote_paths(paths): 

27 return ', '.join('"%s"' % path for path in paths) 

28 

29 

30class InvalidObject(Exception): 

31 pass 

32 

33 

34class NotFound(Exception): 

35 def __init__(self, reason, codes=None, time_range=None): 

36 self.reason = reason 

37 self.time_range = time_range 

38 self.codes = codes 

39 

40 def __str__(self): 

41 s = self.reason 

42 if self.codes: 

43 s += ' (%s)' % '.'.join(self.codes) 

44 

45 if self.time_range: 

46 s += ' (%s - %s)' % ( 

47 util.time_to_str(self.time_range[0]), 

48 util.time_to_str(self.time_range[1])) 

49 

50 return s 

51 

52 

53class DatasetError(GrondError): 

54 pass 

55 

56 

57class StationCorrection(Object): 

58 codes = Tuple.T(4, String.T()) 

59 delay = Float.T() 

60 factor = Float.T() 

61 

62 

63def load_station_corrections(filename): 

64 scs = load_all(filename=filename) 

65 for sc in scs: 

66 assert isinstance(sc, StationCorrection) 

67 

68 return scs 

69 

70 

71def dump_station_corrections(station_corrections, filename): 

72 return dump_all(station_corrections, filename=filename) 

73 

74 

75class Dataset(object): 

76 

77 def __init__(self, event_name=None): 

78 self.events = [] 

79 self.squirrel = squirrel.Squirrel() 

80 self.stations = {} 

81 self.responses = defaultdict(list) 

82 self.responses_stationxml = [] 

83 self.clippings = {} 

84 self.blacklist = set() 

85 self.whitelist_nslc = None 

86 self.whitelist_nsl_xx = None 

87 self.whitelist = None 

88 self.station_corrections = {} 

89 self.station_factors = {} 

90 self.pick_markers = [] 

91 self.apply_correction_delays = True 

92 self.apply_correction_factors = True 

93 self.apply_displaced_sampling_workaround = False 

94 self.extend_incomplete = False 

95 self.clip_handling = 'by_nsl' 

96 self.kite_scenes = [] 

97 self.gnss_campaigns = [] 

98 self.synthetic_test = None 

99 self._picks = None 

100 self._cache = {} 

101 self._event_name = event_name 

102 

103 def empty_cache(self): 

104 self._cache = {} 

105 

106 def set_synthetic_test(self, synthetic_test): 

107 self.synthetic_test = synthetic_test 

108 

109 def add_stations( 

110 self, 

111 stations=None, 

112 pyrocko_stations_filename=None, 

113 stationxml_filenames=None): 

114 

115 if stations is not None: 

116 for station in stations: 

117 self.stations[station.nsl()] = station 

118 

119 if pyrocko_stations_filename is not None: 

120 logger.debug( 

121 'Loading stations from file "%s"...' % 

122 pyrocko_stations_filename) 

123 

124 for station in model.load_stations(pyrocko_stations_filename): 

125 self.stations[station.nsl()] = station 

126 

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0: 

128 

129 for stationxml_filename in stationxml_filenames: 

130 if not op.exists(stationxml_filename): 

131 continue 

132 

133 logger.debug( 

134 'Loading stations from StationXML file "%s"...' % 

135 stationxml_filename) 

136 

137 sx = fs.load_xml(filename=stationxml_filename) 

138 ev = self.get_event() 

139 stations = sx.get_pyrocko_stations(time=ev.time) 

140 if len(stations) == 0: 

141 logger.warning( 

142 'No stations found for time %s in file "%s".' % ( 

143 util.time_to_str(ev.time), stationxml_filename)) 

144 

145 for station in stations: 

146 logger.debug('Adding station: %s.%s.%s' % station.nsl()) 

147 channels = station.get_channels() 

148 if len(channels) == 1 and channels[0].name.endswith('Z'): 

149 logger.warning( 

150 'Station "%s" has vertical component' 

151 ' information only, adding mocked channels.' 

152 % station.nsl_string()) 

153 station.add_channel( 

154 model.Channel(channels[0].name[:-1] + 'N')) 

155 station.add_channel( 

156 model.Channel(channels[0].name[:-1] + 'E')) 

157 

158 self.stations[station.nsl()] = station 

159 

160 def add_events(self, events=None, filename=None): 

161 if events is not None: 

162 self.events.extend(events) 

163 

164 if filename is not None: 

165 logger.debug('Loading events from file "%s"...' % filename) 

166 try: 

167 events = model.load_events(filename) 

168 self.events.extend(events) 

169 logger.info( 

170 'Loading events from %s: %i events found.' % 

171 (filename, len(events))) 

172 except Exception as e: 

173 logger.warning('Could not load events from %s!', filename) 

174 raise e 

175 

176 def add_waveforms(self, paths, regex=None, fileformat='detect', 

177 show_progress=False): 

178 

179 self.squirrel.add(paths) 

180 

181 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): 

182 if sacpz_dirname: 

183 logger.debug( 

184 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) 

185 for x in enhanced_sacpz.iload_dirname(sacpz_dirname): 

186 self.responses[x.codes].append(x) 

187 

188 if stationxml_filenames: 

189 for stationxml_filename in stationxml_filenames: 

190 if not op.exists(stationxml_filename): 

191 continue 

192 

193 logger.debug( 

194 'Loading StationXML responses from "%s"...' % 

195 stationxml_filename) 

196 

197 self.responses_stationxml.append( 

198 fs.load_xml(filename=stationxml_filename)) 

199 

200 def add_clippings(self, markers_filename): 

201 markers = pmarker.load_markers(markers_filename) 

202 clippings = {} 

203 for marker in markers: 

204 nslc = marker.one_nslc() 

205 nsl = nslc[:3] 

206 if nsl not in clippings: 

207 clippings[nsl] = [] 

208 

209 if nslc not in clippings: 

210 clippings[nslc] = [] 

211 

212 clippings[nsl].append(marker.tmin) 

213 clippings[nslc].append(marker.tmin) 

214 

215 for k, times in clippings.items(): 

216 atimes = num.array(times, dtype=float) 

217 if k not in self.clippings: 

218 self.clippings[k] = atimes 

219 else: 

220 self.clippings[k] = num.concatenate(self.clippings, atimes) 

221 

222 def add_blacklist(self, blacklist=[], filenames=None): 

223 logger.debug('Loading blacklisted stations...') 

224 if filenames: 

225 blacklist = list(blacklist) 

226 for filename in filenames: 

227 if op.exists(filename): 

228 with open(filename, 'r') as f: 

229 blacklist.extend( 

230 s.strip() for s in f.read().splitlines()) 

231 else: 

232 logger.warning('No such blacklist file: %s' % filename) 

233 

234 for x in blacklist: 

235 if isinstance(x, str): 

236 x = tuple(x.split('.')) 

237 self.blacklist.add(x) 

238 

239 def add_whitelist(self, whitelist=[], filenames=None): 

240 logger.debug('Loading whitelisted stations...') 

241 if filenames: 

242 whitelist = list(whitelist) 

243 for filename in filenames: 

244 with open(filename, 'r') as f: 

245 whitelist.extend(s.strip() for s in f.read().splitlines()) 

246 

247 if self.whitelist_nslc is None: 

248 self.whitelist_nslc = set() 

249 self.whitelist = set() 

250 self.whitelist_nsl_xx = set() 

251 

252 for x in whitelist: 

253 if isinstance(x, str): 

254 x = tuple(x.split('.')) 

255 if len(x) == 4: 

256 self.whitelist_nslc.add(x) 

257 self.whitelist_nsl_xx.add(x[:3]) 

258 else: 

259 self.whitelist.add(x) 

260 

261 def add_station_corrections(self, filename): 

262 self.station_corrections.update( 

263 (sc.codes, sc) for sc in load_station_corrections(filename)) 

264 

265 def add_picks(self, filename): 

266 self.pick_markers.extend( 

267 pmarker.load_markers(filename)) 

268 

269 self._picks = None 

270 

271 def add_gnss_campaigns(self, paths): 

272 paths = util.select_files( 

273 paths, 

274 include=r'\.yml|\.yaml', 

275 show_progress=False) 

276 

277 for path in paths: 

278 self.add_gnss_campaign(filename=path) 

279 

280 def add_gnss_campaign(self, filename): 

281 try: 

282 from pyrocko.model import gnss # noqa 

283 except ImportError: 

284 raise ImportError('Module pyrocko.model.gnss not found,' 

285 ' please upgrade pyrocko!') 

286 logger.debug('Loading GNSS campaign from "%s"...' % filename) 

287 

288 campaign = load_all(filename=filename) 

289 self.gnss_campaigns.append(campaign[0]) 

290 

291 def add_kite_scenes(self, paths): 

292 logger.info('Loading kite InSAR scenes...') 

293 paths = util.select_files( 

294 paths, 

295 include=r'\.npz', 

296 show_progress=False) 

297 

298 for path in paths: 

299 self.add_kite_scene(filename=path) 

300 

301 if not self.kite_scenes: 

302 logger.warning('Could not find any kite scenes at %s.' % 

303 quote_paths(self.kite_scene_paths)) 

304 

305 def add_kite_scene(self, filename): 

306 try: 

307 from kite import Scene 

308 except ImportError: 

309 raise ImportError('Module kite could not be imported,' 

310 ' please install from https://pyrocko.org.') 

311 logger.debug('Loading kite scene from "%s"...' % filename) 

312 

313 scene = Scene.load(filename) 

314 scene._log.setLevel(logger.level) 

315 

316 try: 

317 self.get_kite_scene(scene.meta.scene_id) 

318 except NotFound: 

319 self.kite_scenes.append(scene) 

320 else: 

321 raise AttributeError('Kite scene_id not unique for "%s".' 

322 % filename) 

323 

324 def is_blacklisted(self, obj): 

325 try: 

326 nslc = self.get_nslc(obj) 

327 if nslc in self.blacklist: 

328 return True 

329 

330 except InvalidObject: 

331 pass 

332 

333 nsl = self.get_nsl(obj) 

334 return ( 

335 nsl in self.blacklist or 

336 nsl[1:2] in self.blacklist or 

337 nsl[:2] in self.blacklist) 

338 

339 def is_whitelisted(self, obj): 

340 if self.whitelist_nslc is None: 

341 return True 

342 

343 nsl = self.get_nsl(obj) 

344 

345 if ( 

346 nsl in self.whitelist or 

347 nsl[1:2] in self.whitelist or 

348 nsl[:2] in self.whitelist): 

349 

350 return True 

351 

352 try: 

353 nslc = self.get_nslc(obj) 

354 if nslc in self.whitelist_nslc: 

355 return True 

356 

357 except InvalidObject: 

358 return nsl in self.whitelist_nsl_xx 

359 

360 def has_clipping(self, nsl_or_nslc, tmin, tmax): 

361 if nsl_or_nslc not in self.clippings: 

362 return False 

363 

364 atimes = self.clippings[nsl_or_nslc] 

365 return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) 

366 

367 def get_nsl(self, obj): 

368 if isinstance(obj, trace.Trace): 

369 net, sta, loc, _ = obj.nslc_id 

370 elif isinstance(obj, model.Station): 

371 net, sta, loc = obj.nsl() 

372 elif isinstance(obj, gf.Target): 

373 net, sta, loc, _ = obj.codes 

374 elif isinstance(obj, tuple) and len(obj) in (3, 4): 

375 net, sta, loc = obj[:3] 

376 else: 

377 raise InvalidObject( 

378 'Cannot get nsl code from given object of type "%s".' 

379 % type(obj)) 

380 

381 return net, sta, loc 

382 

383 def get_nslc(self, obj): 

384 if isinstance(obj, trace.Trace): 

385 return obj.nslc_id 

386 elif isinstance(obj, gf.Target): 

387 return obj.codes 

388 elif isinstance(obj, tuple) and len(obj) == 4: 

389 return obj 

390 else: 

391 raise InvalidObject( 

392 'Cannot get nslc code from given object "%s"' % type(obj)) 

393 

394 def get_tmin_tmax(self, obj): 

395 if isinstance(obj, trace.Trace): 

396 return obj.tmin, obj.tmax 

397 else: 

398 raise InvalidObject( 

399 'Cannot get tmin and tmax from given object of type "%s"' % 

400 type(obj)) 

401 

402 def get_station(self, obj): 

403 if self.is_blacklisted(obj): 

404 raise NotFound('Station is blacklisted:', self.get_nsl(obj)) 

405 

406 if not self.is_whitelisted(obj): 

407 raise NotFound('Station is not on whitelist:', self.get_nsl(obj)) 

408 

409 if isinstance(obj, model.Station): 

410 return obj 

411 

412 net, sta, loc = self.get_nsl(obj) 

413 

414 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] 

415 for k in keys: 

416 if k in self.stations: 

417 return self.stations[k] 

418 

419 raise NotFound('No station information:', keys) 

420 

421 def get_stations(self): 

422 return [self.stations[k] for k in sorted(self.stations) 

423 if not self.is_blacklisted(self.stations[k]) 

424 and self.is_whitelisted(self.stations[k])] 

425 

426 def get_kite_scenes(self): 

427 return self.kite_scenes 

428 

429 def get_kite_scene(self, scene_id=None): 

430 if scene_id is None: 

431 if len(self.kite_scenes) == 0: 

432 raise AttributeError('No kite displacements defined.') 

433 return self.kite_scenes[0] 

434 else: 

435 for scene in self.kite_scenes: 

436 if scene.meta.scene_id == scene_id: 

437 return scene 

438 raise NotFound('No kite scene with id "%s" defined.' % scene_id) 

439 

440 def get_gnss_campaigns(self): 

441 return self.gnss_campaigns 

442 

443 def get_gnss_campaign(self, name): 

444 for camp in self.gnss_campaigns: 

445 if camp.name == name: 

446 return camp 

447 raise NotFound('GNSS campaign %s not found!' % name) 

448 

449 def get_response(self, obj, quantity='displacement'): 

450 if (self.responses is None or len(self.responses) == 0) \ 

451 and (self.responses_stationxml is None 

452 or len(self.responses_stationxml) == 0): 

453 

454 raise NotFound('No response information available.') 

455 

456 quantity_to_unit = { 

457 'displacement': 'M', 

458 'velocity': 'M/S', 

459 'acceleration': 'M/S**2'} 

460 

461 if self.is_blacklisted(obj): 

462 raise NotFound('Response is blacklisted:', self.get_nslc(obj)) 

463 

464 if not self.is_whitelisted(obj): 

465 raise NotFound('Response is not on whitelist:', self.get_nslc(obj)) 

466 

467 net, sta, loc, cha = self.get_nslc(obj) 

468 tmin, tmax = self.get_tmin_tmax(obj) 

469 

470 keys_x = [ 

471 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] 

472 

473 keys = [] 

474 for k in keys_x: 

475 if k not in keys: 

476 keys.append(k) 

477 

478 candidates = [] 

479 for k in keys: 

480 if k in self.responses: 

481 for x in self.responses[k]: 

482 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): 

483 if quantity == 'displacement': 

484 candidates.append(x.response) 

485 elif quantity == 'velocity': 

486 candidates.append(trace.MultiplyResponse([ 

487 x.response, 

488 trace.DifferentiationResponse()])) 

489 elif quantity == 'acceleration': 

490 candidates.append(trace.MultiplyResponse([ 

491 x.response, 

492 trace.DifferentiationResponse(2)])) 

493 else: 

494 assert False 

495 

496 for sx in self.responses_stationxml: 

497 try: 

498 candidates.append( 

499 sx.get_pyrocko_response( 

500 (net, sta, loc, cha), 

501 timespan=(tmin, tmax), 

502 fake_input_units=quantity_to_unit[quantity])) 

503 

504 except (fs.NoResponseInformation, fs.MultipleResponseInformation): 

505 pass 

506 

507 if len(candidates) == 1: 

508 return candidates[0] 

509 

510 elif len(candidates) == 0: 

511 raise NotFound('No response found:', (net, sta, loc, cha)) 

512 else: 

513 raise NotFound('Multiple responses found:', (net, sta, loc, cha)) 

514 

515 def get_waveform_raw( 

516 self, obj, 

517 tmin, 

518 tmax, 

519 tpad=0., 

520 toffset_noise_extract=0., 

521 want_incomplete=False, 

522 extend_incomplete=False): 

523 

524 net, sta, loc, cha = self.get_nslc(obj) 

525 

526 if self.is_blacklisted((net, sta, loc, cha)): 

527 raise NotFound( 

528 'Waveform is blacklisted:', (net, sta, loc, cha)) 

529 

530 if not self.is_whitelisted((net, sta, loc, cha)): 

531 raise NotFound( 

532 'Waveform is not on whitelist:', (net, sta, loc, cha)) 

533 

534 if self.clip_handling == 'by_nsl': 

535 if self.has_clipping((net, sta, loc), tmin, tmax): 

536 raise NotFound( 

537 'Waveform clipped:', (net, sta, loc)) 

538 

539 elif self.clip_handling == 'by_nslc': 

540 if self.has_clipping((net, sta, loc, cha), tmin, tmax): 

541 raise NotFound( 

542 'Waveform clipped:', (net, sta, loc, cha)) 

543 

544 trs = self.squirrel.get_waveforms( 

545 tmin=tmin+toffset_noise_extract-tpad, 

546 tmax=tmax+toffset_noise_extract+tpad, 

547 codes=(net, sta, loc, cha), 

548 want_incomplete=want_incomplete or extend_incomplete) 

549 

550 if self.apply_displaced_sampling_workaround: 

551 for tr in trs: 

552 tr.snap() 

553 

554 if toffset_noise_extract != 0.0: 

555 for tr in trs: 

556 tr.shift(-toffset_noise_extract) 

557 

558 if extend_incomplete and len(trs) == 1: 

559 trs[0].extend( 

560 tmin + toffset_noise_extract - tpad, 

561 tmax + toffset_noise_extract + tpad, 

562 fillmethod='repeat') 

563 

564 if not want_incomplete and len(trs) != 1: 

565 if len(trs) == 0: 

566 message = 'Waveform missing or incomplete.' 

567 else: 

568 message = 'Waveform has gaps.' 

569 

570 raise NotFound( 

571 message, 

572 codes=(net, sta, loc, cha), 

573 time_range=( 

574 tmin + toffset_noise_extract - tpad, 

575 tmax + toffset_noise_extract + tpad)) 

576 

577 return trs 

578 

579 def get_waveform_restituted( 

580 self, 

581 obj, quantity='displacement', 

582 tmin=None, tmax=None, tpad=0., 

583 tfade=0., freqlimits=None, deltat=None, 

584 toffset_noise_extract=0., 

585 want_incomplete=False, 

586 extend_incomplete=False): 

587 

588 trs_raw = self.get_waveform_raw( 

589 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade, 

590 toffset_noise_extract=toffset_noise_extract, 

591 want_incomplete=want_incomplete, 

592 extend_incomplete=extend_incomplete) 

593 

594 trs_restituted = [] 

595 for tr in trs_raw: 

596 if deltat is not None: 

597 try: 

598 tr.downsample_to(deltat, snap=True, allow_upsample_max=5) 

599 

600 except util.UnavailableDecimation: 

601 logger.warn('using resample instead of decimation') 

602 tr.resample(deltat) 

603 

604 tr.deltat = deltat 

605 

606 resp = self.get_response(tr, quantity=quantity) 

607 try: 

608 trs_restituted.append( 

609 tr.transfer( 

610 tfade=tfade, freqlimits=freqlimits, 

611 transfer_function=resp, invert=True, demean=True)) 

612 

613 except trace.InfiniteResponse: 

614 raise NotFound( 

615 'Instrument response deconvolution failed ' 

616 '(divide by zero)', tr.nslc_id) 

617 

618 return trs_restituted, trs_raw 

619 

620 def _get_projections( 

621 self, station, backazimuth, source, target, tmin, tmax): 

622 

623 # fill in missing channel information (happens when station file 

624 # does not contain any channel information) 

625 if not station.get_channels(): 

626 station = copy.deepcopy(station) 

627 

628 nsl = station.nsl() 

629 trs = self.squirrel.pile.all( 

630 tmin=tmin, tmax=tmax, 

631 trace_selector=lambda tr: tr.nslc_id[:3] == nsl, 

632 load_data=False) 

633 

634 channels = list(set(tr.channel for tr in trs)) 

635 station.set_channels_by_name(*channels) 

636 

637 projections = [] 

638 projections.extend(station.guess_projections_to_enu( 

639 out_channels=('E', 'N', 'Z'))) 

640 

641 if source is not None and target is not None: 

642 backazimuth = source.azibazi_to(target)[1] 

643 

644 if backazimuth is not None: 

645 projections.extend(station.guess_projections_to_rtu( 

646 out_channels=('R', 'T', 'Z'), 

647 backazimuth=backazimuth)) 

648 

649 if not projections: 

650 raise NotFound( 

651 'Cannot determine projection of data components:', 

652 station.nsl()) 

653 

654 return projections 

655 

656 def _get_waveform( 

657 self, 

658 obj, quantity='displacement', 

659 tmin=None, tmax=None, tpad=0., 

660 tfade=0., freqlimits=None, deltat=None, cache=None, 

661 backazimuth=None, 

662 source=None, 

663 target=None, 

664 debug=False): 

665 

666 assert not debug or (debug and cache is None) 

667 

668 if cache is True: 

669 cache = self._cache 

670 

671 _, _, _, channel = self.get_nslc(obj) 

672 station = self.get_station(self.get_nsl(obj)) 

673 

674 nslc = station.nsl() + (channel,) 

675 

676 if self.is_blacklisted(nslc): 

677 raise NotFound( 

678 'Waveform is blacklisted:', nslc) 

679 

680 if not self.is_whitelisted(nslc): 

681 raise NotFound( 

682 'Waveform is not on whitelist:', nslc) 

683 

684 assert tmin is not None 

685 assert tmax is not None 

686 

687 tmin = float(tmin) 

688 tmax = float(tmax) 

689 

690 nslc = tuple(nslc) 

691 

692 cache_k = nslc + ( 

693 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) 

694 if cache is not None and (nslc + cache_k) in cache: 

695 obj = cache[nslc + cache_k] 

696 if isinstance(obj, Exception): 

697 raise obj 

698 elif obj is None: 

699 raise NotFound('Waveform not found!', nslc) 

700 else: 

701 return obj 

702 

703 syn_test = self.synthetic_test 

704 toffset_noise_extract = 0.0 

705 if syn_test: 

706 if not syn_test.respect_data_availability: 

707 if syn_test.real_noise_scale != 0.0: 

708 raise DatasetError( 

709 'respect_data_availability=False and ' 

710 'addition of real noise cannot be combined.') 

711 

712 tr = syn_test.get_waveform( 

713 nslc, tmin, tmax, 

714 tfade=tfade, 

715 freqlimits=freqlimits) 

716 

717 if cache is not None: 

718 cache[tr.nslc_id + cache_k] = tr 

719 

720 if debug: 

721 return [], [], [] 

722 else: 

723 return tr 

724 

725 if syn_test.real_noise_scale != 0.0: 

726 toffset_noise_extract = syn_test.real_noise_toffset 

727 

728 abs_delays = [] 

729 for ocha in 'ENZRT': 

730 sc = self.station_corrections.get(station.nsl() + (channel,), None) 

731 if sc: 

732 abs_delays.append(abs(sc.delay)) 

733 

734 if abs_delays: 

735 abs_delay_max = max(abs_delays) 

736 else: 

737 abs_delay_max = 0.0 

738 

739 projections = self._get_projections( 

740 station, backazimuth, source, target, tmin, tmax) 

741 

742 try: 

743 trs_projected = [] 

744 trs_restituted = [] 

745 trs_raw = [] 

746 exceptions = [] 

747 for matrix, in_channels, out_channels in projections: 

748 deps = trace.project_dependencies( 

749 matrix, in_channels, out_channels) 

750 

751 try: 

752 trs_restituted_group = [] 

753 trs_raw_group = [] 

754 if channel in deps: 

755 for cha in deps[channel]: 

756 trs_restituted_this, trs_raw_this = \ 

757 self.get_waveform_restituted( 

758 station.nsl() + (cha,), 

759 quantity=quantity, 

760 tmin=tmin, tmax=tmax, 

761 tpad=tpad+abs_delay_max, 

762 toffset_noise_extract=toffset_noise_extract, # noqa 

763 tfade=tfade, 

764 freqlimits=freqlimits, 

765 deltat=deltat, 

766 want_incomplete=debug, 

767 extend_incomplete=self.extend_incomplete) 

768 

769 trs_restituted_group.extend(trs_restituted_this) 

770 trs_raw_group.extend(trs_raw_this) 

771 

772 trs_projected.extend( 

773 trace.project( 

774 trs_restituted_group, matrix, 

775 in_channels, out_channels)) 

776 

777 trs_restituted.extend(trs_restituted_group) 

778 trs_raw.extend(trs_raw_group) 

779 

780 except NotFound as e: 

781 exceptions.append((in_channels, out_channels, e)) 

782 

783 if not trs_projected: 

784 err = [] 

785 for (in_channels, out_channels, e) in exceptions: 

786 sin = ', '.join(c.name for c in in_channels) 

787 sout = ', '.join(c.name for c in out_channels) 

788 err.append('(%s) -> (%s): %s' % (sin, sout, e)) 

789 

790 raise NotFound('\n'.join(err)) 

791 

792 for tr in trs_projected: 

793 sc = self.station_corrections.get(tr.nslc_id, None) 

794 if sc: 

795 if self.apply_correction_factors: 

796 tr.ydata /= sc.factor 

797 

798 if self.apply_correction_delays: 

799 tr.shift(-sc.delay) 

800 

801 if tmin is not None and tmax is not None: 

802 tr.chop(tmin, tmax) 

803 

804 if syn_test: 

805 trs_projected_synthetic = [] 

806 for tr in trs_projected: 

807 if tr.channel == channel: 

808 tr_syn = syn_test.get_waveform( 

809 tr.nslc_id, tmin, tmax, 

810 tfade=tfade, freqlimits=freqlimits) 

811 

812 if tr_syn: 

813 if syn_test.real_noise_scale != 0.0: 

814 tr_syn = tr_syn.copy() 

815 tr_noise = tr.copy() 

816 tr_noise.set_ydata( 

817 tr_noise.get_ydata() 

818 * syn_test.real_noise_scale) 

819 

820 tr_syn.add(tr_noise) 

821 

822 trs_projected_synthetic.append(tr_syn) 

823 

824 trs_projected = trs_projected_synthetic 

825 

826 if cache is not None: 

827 for tr in trs_projected: 

828 cache[tr.nslc_id + cache_k] = tr 

829 

830 tr_return = None 

831 for tr in trs_projected: 

832 if tr.channel == channel: 

833 tr_return = tr 

834 

835 if debug: 

836 return trs_projected, trs_restituted, trs_raw, tr_return 

837 

838 elif tr_return: 

839 return tr_return 

840 

841 else: 

842 raise NotFound( 

843 'waveform not available', station.nsl() + (channel,)) 

844 

845 except NotFound: 

846 if cache is not None: 

847 cache[nslc + cache_k] = None 

848 raise 

849 

850 def get_waveform(self, obj, tinc_cache=None, **kwargs): 

851 tmin = kwargs['tmin'] 

852 tmax = kwargs['tmax'] 

853 if tinc_cache is not None: 

854 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache 

855 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache 

856 else: 

857 tmin_r = tmin 

858 tmax_r = tmax 

859 

860 kwargs['tmin'] = tmin_r 

861 kwargs['tmax'] = tmax_r 

862 

863 if kwargs.get('debug', None): 

864 return self._get_waveform(obj, **kwargs) 

865 else: 

866 tr = self._get_waveform(obj, **kwargs) 

867 return tr.chop(tmin, tmax, inplace=False) 

868 

869 def get_events(self, magmin=None, event_names=None): 

870 evs = [] 

871 for ev in self.events: 

872 if ((magmin is None or ev.magnitude >= magmin) and 

873 (event_names is None or ev.name in event_names)): 

874 evs.append(ev) 

875 

876 return evs 

877 

878 def get_event_by_time(self, t, magmin=None): 

879 evs = self.get_events(magmin=magmin) 

880 ev_x = None 

881 for ev in evs: 

882 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): 

883 ev_x = ev 

884 

885 if not ev_x: 

886 raise NotFound( 

887 'No event information matching criteria (t=%s, magmin=%s).' % 

888 (t, magmin)) 

889 

890 return ev_x 

891 

892 def get_event(self): 

893 if self._event_name is None: 

894 raise NotFound('No main event selected in dataset!') 

895 

896 for ev in self.events: 

897 if ev.name == self._event_name: 

898 return ev 

899 

900 raise NotFound('No such event: %s' % self._event_name) 

901 

902 def get_picks(self): 

903 if self._picks is None: 

904 hash_to_name = {} 

905 names = set() 

906 for marker in self.pick_markers: 

907 if isinstance(marker, pmarker.EventMarker): 

908 name = marker.get_event().name 

909 if name in names: 

910 raise DatasetError( 

911 'Duplicate event name "%s" in picks.' % name) 

912 

913 names.add(name) 

914 hash_to_name[marker.get_event_hash()] = name 

915 

916 for ev in self.events: 

917 hash_to_name[ev.get_hash()] = ev.name 

918 

919 picks = {} 

920 for marker in self.pick_markers: 

921 if isinstance(marker, pmarker.PhaseMarker): 

922 ehash = marker.get_event_hash() 

923 

924 nsl = marker.one_nslc()[:3] 

925 phasename = marker.get_phasename() 

926 

927 if ehash is None or ehash not in hash_to_name: 

928 raise DatasetError( 

929 'Unassociated pick: %s.%s.%s, %s' % 

930 (nsl + (phasename, ))) 

931 

932 eventname = hash_to_name[ehash] 

933 

934 if (nsl, phasename, eventname) in picks: 

935 raise DatasetError( 

936 'Duplicate pick: %s.%s.%s, %s' % 

937 (nsl + (phasename, ))) 

938 

939 picks[nsl, phasename, eventname] = marker 

940 

941 self._picks = picks 

942 

943 return self._picks 

944 

945 def get_pick(self, eventname, obj, phasename): 

946 nsl = self.get_nsl(obj) 

947 return self.get_picks().get((nsl, phasename, eventname), None) 

948 

949 

950class DatasetConfig(HasPaths): 

951 ''' Configuration for a Grond `Dataset` object. ''' 

952 

953 stations_path = Path.T( 

954 optional=True, 

955 help='List of files with station coordinates in Pyrocko format.') 

956 stations_stationxml_paths = List.T( 

957 Path.T(), 

958 optional=True, 

959 help='List of files with station coordinates in StationXML format.') 

960 events_path = Path.T( 

961 optional=True, 

962 help='File with hypocenter information and possibly' 

963 ' reference solution') 

964 waveform_paths = List.T( 

965 Path.T(), 

966 optional=True, 

967 help='List of directories with raw waveform data') 

968 clippings_path = Path.T( 

969 optional=True) 

970 responses_sacpz_path = Path.T( 

971 optional=True, 

972 help='List of SACPZ response files for restitution of' 

973 ' the raw waveform data.') 

974 responses_stationxml_paths = List.T( 

975 Path.T(), 

976 optional=True, 

977 help='List of StationXML response files for restitution of' 

978 ' the raw waveform data.') 

979 station_corrections_path = Path.T( 

980 optional=True, 

981 help='File containing station correction informations.') 

982 apply_correction_factors = Bool.T( 

983 optional=True, 

984 default=True, 

985 help='Apply correction factors from station corrections.') 

986 apply_correction_delays = Bool.T( 

987 optional=True, 

988 default=True, 

989 help='Apply correction delays from station corrections.') 

990 apply_displaced_sampling_workaround = Bool.T( 

991 optional=True, 

992 default=False, 

993 help='Work around displaced sampling issues.') 

994 extend_incomplete = Bool.T( 

995 default=False, 

996 help='Extend incomplete seismic traces.') 

997 picks_paths = List.T( 

998 Path.T()) 

999 blacklist_paths = List.T( 

1000 Path.T(), 

1001 help='List of text files with blacklisted stations.') 

1002 blacklist = List.T( 

1003 String.T(), 

1004 help='Stations/components to be excluded according to their STA, ' 

1005 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') 

1006 whitelist_paths = List.T( 

1007 Path.T(), 

1008 help='List of text files with whitelisted stations.') 

1009 whitelist = List.T( 

1010 String.T(), 

1011 optional=True, 

1012 help='If not None, list of stations/components to include according ' 

1013 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 

1014 'Note: ''when whitelisting on channel level, both, the raw and ' 

1015 'the processed channel codes have to be listed.') 

1016 synthetic_test = SyntheticTest.T( 

1017 optional=True) 

1018 

1019 kite_scene_paths = List.T( 

1020 Path.T(), 

1021 optional=True, 

1022 help='List of directories for the InSAR scenes.') 

1023 

1024 gnss_campaign_paths = List.T( 

1025 Path.T(), 

1026 optional=True, 

1027 help='List of directories for the GNSS campaign data.') 

1028 

1029 def __init__(self, *args, **kwargs): 

1030 HasPaths.__init__(self, *args, **kwargs) 

1031 self._ds = {} 

1032 

1033 def get_event_names(self): 

1034 logger.info('Loading events ...') 

1035 

1036 def extra(path): 

1037 return expand_template(path, dict( 

1038 event_name='*')) 

1039 

1040 def fp(path): 

1041 return self.expand_path(path, extra=extra) 

1042 

1043 def check_events(events, fn): 

1044 for ev in events: 

1045 if not ev.name: 

1046 logger.warning('Event in %s has no name!', fn) 

1047 return 

1048 if not ev.lat or not ev.lon: 

1049 logger.warning('Event %s has inconsistent coordinates!', 

1050 ev.name) 

1051 if not ev.depth: 

1052 logger.warning('Event %s has no depth!', ev.name) 

1053 if not ev.time: 

1054 logger.warning('Event %s has no time!', ev.name) 

1055 

1056 events = [] 

1057 events_path = fp(self.events_path) 

1058 fns = glob.glob(events_path) 

1059 if not fns: 

1060 raise DatasetError('No event files matching "%s".' % events_path) 

1061 

1062 for fn in fns: 

1063 logger.debug('Loading from file %s' % fn) 

1064 ev = model.load_events(filename=fn) 

1065 check_events(ev, fn) 

1066 

1067 events.extend(ev) 

1068 

1069 event_names = [ev_.name for ev_ in events] 

1070 event_names.sort() 

1071 return event_names 

1072 

1073 def get_dataset(self, event_name): 

1074 if event_name not in self._ds: 

1075 def extra(path): 

1076 return expand_template(path, dict( 

1077 event_name=event_name)) 

1078 

1079 def fp(path): 

1080 p = self.expand_path(path, extra=extra) 

1081 if p is None: 

1082 return None 

1083 

1084 if isinstance(p, list): 

1085 for path in p: 

1086 if not op.exists(path): 

1087 logger.warning('Path %s does not exist.' % path) 

1088 else: 

1089 if not op.exists(p): 

1090 logger.warning('Path %s does not exist.' % p) 

1091 

1092 return p 

1093 

1094 ds = Dataset(event_name) 

1095 try: 

1096 ds.add_events(filename=fp(self.events_path)) 

1097 

1098 ds.add_stations( 

1099 pyrocko_stations_filename=fp(self.stations_path), 

1100 stationxml_filenames=fp(self.stations_stationxml_paths)) 

1101 

1102 if self.waveform_paths: 

1103 ds.add_waveforms(paths=fp(self.waveform_paths)) 

1104 

1105 if self.kite_scene_paths: 

1106 ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) 

1107 

1108 if self.gnss_campaign_paths: 

1109 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) 

1110 

1111 if self.clippings_path: 

1112 ds.add_clippings(markers_filename=fp(self.clippings_path)) 

1113 

1114 if self.responses_sacpz_path: 

1115 ds.add_responses( 

1116 sacpz_dirname=fp(self.responses_sacpz_path)) 

1117 

1118 if self.responses_stationxml_paths: 

1119 ds.add_responses( 

1120 stationxml_filenames=fp( 

1121 self.responses_stationxml_paths)) 

1122 

1123 if self.station_corrections_path: 

1124 ds.add_station_corrections( 

1125 filename=fp(self.station_corrections_path)) 

1126 

1127 ds.apply_correction_factors = self.apply_correction_factors 

1128 ds.apply_correction_delays = self.apply_correction_delays 

1129 ds.apply_displaced_sampling_workaround = \ 

1130 self.apply_displaced_sampling_workaround 

1131 ds.extend_incomplete = self.extend_incomplete 

1132 

1133 for picks_path in self.picks_paths: 

1134 ds.add_picks( 

1135 filename=fp(picks_path)) 

1136 

1137 ds.add_blacklist(self.blacklist) 

1138 ds.add_blacklist(filenames=fp(self.blacklist_paths)) 

1139 if self.whitelist: 

1140 ds.add_whitelist(self.whitelist) 

1141 if self.whitelist_paths: 

1142 ds.add_whitelist(filenames=fp(self.whitelist_paths)) 

1143 

1144 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) 

1145 self._ds[event_name] = ds 

1146 except (FileLoadError, OSError) as e: 

1147 raise DatasetError(str(e)) 

1148 

1149 return self._ds[event_name] 

1150 

1151 

1152__all__ = ''' 

1153 Dataset 

1154 DatasetConfig 

1155 DatasetError 

1156 InvalidObject 

1157 NotFound 

1158 StationCorrection 

1159 load_station_corrections 

1160 dump_station_corrections 

1161'''.split()