Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

1import glob

2import copy

3import os.path as op

4import logging

5import math

6import numpy as num

8from collections import defaultdict

9from pyrocko import util, pile, model, config, trace, \

10 marker as pmarker

11from pyrocko.io.io_common import FileLoadError

12from pyrocko.fdsn import enhanced_sacpz, station as fs

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,

14 load_all)

16from pyrocko import gf

18from .meta import Path, HasPaths, expand_template, GrondError

20from .synthetic_tests import SyntheticTest

22guts_prefix = 'grond'

23logger = logging.getLogger('grond.dataset')

26def quote_paths(paths):

27 return ', '.join('"%s"' % path for path in paths)

30class InvalidObject(Exception):

31 pass

34class NotFound(Exception):

35 def __init__(self, reason, codes=None, time_range=None):

36 self.reason = reason

37 self.time_range = time_range

38 self.codes = codes

40 def __str__(self):

41 s = self.reason

42 if self.codes:

43 s += ' (%s)' % '.'.join(self.codes)

45 if self.time_range:

46 s += ' (%s - %s)' % (

47 util.time_to_str(self.time_range[0]),

48 util.time_to_str(self.time_range[1]))

50 return s

53class DatasetError(GrondError):

54 pass

57class StationCorrection(Object):

58 codes = Tuple.T(4, String.T())

59 delay = Float.T()

60 factor = Float.T()

63def load_station_corrections(filename):

64 scs = load_all(filename=filename)

65 for sc in scs:

66 assert isinstance(sc, StationCorrection)

68 return scs

71def dump_station_corrections(station_corrections, filename):

72 return dump_all(station_corrections, filename=filename)

75class Dataset(object):

77 def __init__(self, event_name=None):

78 self.events = []

79 self.pile = pile.Pile()

80 self.stations = {}

81 self.responses = defaultdict(list)

82 self.responses_stationxml = []

83 self.clippings = {}

84 self.blacklist = set()

85 self.whitelist_nslc = None

86 self.whitelist_nsl_xx = None

87 self.whitelist = None

88 self.station_corrections = {}

89 self.station_factors = {}

90 self.pick_markers = []

91 self.apply_correction_delays = True

92 self.apply_correction_factors = True

93 self.apply_displaced_sampling_workaround = False

94 self.extend_incomplete = False

95 self.clip_handling = 'by_nsl'

96 self.kite_scenes = []

97 self.gnss_campaigns = []

98 self.synthetic_test = None

99 self._picks = None

100 self._cache = {}

101 self._event_name = event_name

102

103 def empty_cache(self):

104 self._cache = {}

105

106 def set_synthetic_test(self, synthetic_test):

107 self.synthetic_test = synthetic_test

108

109 def add_stations(

110 self,

111 stations=None,

112 pyrocko_stations_filename=None,

113 stationxml_filenames=None):

114

115 if stations is not None:

116 for station in stations:

117 self.stations[station.nsl()] = station

118

119 if pyrocko_stations_filename is not None:

120 logger.debug(

121 'Loading stations from file "%s"...' %

122 pyrocko_stations_filename)

123

124 for station in model.load_stations(pyrocko_stations_filename):

125 self.stations[station.nsl()] = station

126

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:

128

129 for stationxml_filename in stationxml_filenames:

130 if not op.exists(stationxml_filename):

131 continue

132

133 logger.debug(

134 'Loading stations from StationXML file "%s"...' %

135 stationxml_filename)

136

137 sx = fs.load_xml(filename=stationxml_filename)

138 ev = self.get_event()

139 try:

140 stations = sx.get_pyrocko_stations(

141 time=ev.time, sloppy=True)

142 except TypeError:

143 logger.warning(

144 'The installed version of Pyrocko does not support '

145 'the keyword argument `sloppy` in method '

146 '`FDSNStationXML.get_pyrocko_stations`. Please '

147 'upgrade Pyrocko.')

148

149 stations = sx.get_pyrocko_stations(time=ev.time)

150

151 if len(stations) == 0:

152 logger.warning(

153 'No stations found for time %s in file "%s".' % (

154 util.time_to_str(ev.time), stationxml_filename))

155

156 for station in stations:

157 logger.debug('Adding station: %s.%s.%s' % station.nsl())

158 channels = station.get_channels()

159 if len(channels) == 1 and channels[0].name.endswith('Z'):

160 logger.warning(

161 'Station "%s" has vertical component'

162 ' information only, adding mocked channels.'

163 % station.nsl_string())

164 station.add_channel(

165 model.Channel(channels[0].name[:-1] + 'N'))

166 station.add_channel(

167 model.Channel(channels[0].name[:-1] + 'E'))

168

169 self.stations[station.nsl()] = station

170

171 def add_events(self, events=None, filename=None):

172 if events is not None:

173 self.events.extend(events)

174

175 if filename is not None:

176 logger.debug('Loading events from file "%s"...' % filename)

177 try:

178 events = model.load_events(filename)

179 self.events.extend(events)

180 logger.info(

181 'Loading events from %s: %i events found.' %

182 (filename, len(events)))

183 except Exception as e:

184 logger.warning('Could not load events from %s!', filename)

185 raise e

186

187 def add_waveforms(self, paths, regex=None, fileformat='detect',

188 show_progress=False):

189 cachedirname = config.config().cache_dir

190

191 logger.debug('Selecting waveform files %s...' % quote_paths(paths))

192 fns = util.select_files(paths, include=regex,

193 show_progress=show_progress)

194 cache = pile.get_cache(cachedirname)

195 logger.debug('Scanning waveform files %s...' % quote_paths(paths))

196 self.pile.load_files(sorted(fns), cache=cache,

197 fileformat=fileformat,

198 show_progress=show_progress)

199

200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):

201 if sacpz_dirname:

202 logger.debug(

203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)

204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):

205 self.responses[x.codes].append(x)

206

207 if stationxml_filenames:

208 for stationxml_filename in stationxml_filenames:

209 if not op.exists(stationxml_filename):

210 continue

211

212 logger.debug(

213 'Loading StationXML responses from "%s"...' %

214 stationxml_filename)

215

216 self.responses_stationxml.append(

217 fs.load_xml(filename=stationxml_filename))

218

219 def add_clippings(self, markers_filename):

220 markers = pmarker.load_markers(markers_filename)

221 clippings = {}

222 for marker in markers:

223 nslc = marker.one_nslc()

224 nsl = nslc[:3]

225 if nsl not in clippings:

226 clippings[nsl] = []

227

228 if nslc not in clippings:

229 clippings[nslc] = []

230

231 clippings[nsl].append(marker.tmin)

232 clippings[nslc].append(marker.tmin)

233

234 for k, times in clippings.items():

235 atimes = num.array(times, dtype=float)

236 if k not in self.clippings:

237 self.clippings[k] = atimes

238 else:

239 self.clippings[k] = num.concatenate(self.clippings, atimes)

240

241 def add_blacklist(self, blacklist=[], filenames=None):

242 logger.debug('Loading blacklisted stations...')

243 if filenames:

244 blacklist = list(blacklist)

245 for filename in filenames:

246 if op.exists(filename):

247 with open(filename, 'r') as f:

248 blacklist.extend(

249 s.strip() for s in f.read().splitlines())

250 else:

251 logger.warning('No such blacklist file: %s' % filename)

252

253 for x in blacklist:

254 if isinstance(x, str):

255 x = tuple(x.split('.'))

256 self.blacklist.add(x)

257

258 def add_whitelist(self, whitelist=[], filenames=None):

259 logger.debug('Loading whitelisted stations...')

260 if filenames:

261 whitelist = list(whitelist)

262 for filename in filenames:

263 with open(filename, 'r') as f:

264 whitelist.extend(s.strip() for s in f.read().splitlines())

265

266 if self.whitelist_nslc is None:

267 self.whitelist_nslc = set()

268 self.whitelist = set()

269 self.whitelist_nsl_xx = set()

270

271 for x in whitelist:

272 if isinstance(x, str):

273 x = tuple(x.split('.'))

274 if len(x) == 4:

275 self.whitelist_nslc.add(x)

276 self.whitelist_nsl_xx.add(x[:3])

277 else:

278 self.whitelist.add(x)

279

280 def add_station_corrections(self, filename):

281 self.station_corrections.update(

282 (sc.codes, sc) for sc in load_station_corrections(filename))

283

284 def add_picks(self, filename):

285 self.pick_markers.extend(

286 pmarker.load_markers(filename))

287

288 self._picks = None

289

290 def add_gnss_campaigns(self, paths):

291 paths = util.select_files(

292 paths,

293 include=r'\.yml|\.yaml',

294 show_progress=False)

295

296 for path in paths:

297 self.add_gnss_campaign(filename=path)

298

299 def add_gnss_campaign(self, filename):

300 try:

301 from pyrocko.model import gnss # noqa

302 except ImportError:

303 raise ImportError(

304 'Module pyrocko.model.gnss not found. Please upgrade Pyrocko.')

305

306 logger.debug('Loading GNSS campaign from "%s"...' % filename)

307

308 campaign = load_all(filename=filename)

309 self.gnss_campaigns.append(campaign[0])

310

311 def add_kite_scenes(self, paths):

312 logger.info('Loading kite InSAR scenes...')

313 paths = util.select_files(

314 paths,

315 include=r'\.npz',

316 show_progress=False)

317

318 for path in paths:

319 self.add_kite_scene(filename=path)

320

321 if not self.kite_scenes:

322 logger.warning('Could not find any kite scenes at %s.' %

323 quote_paths(self.kite_scene_paths))

324

325 def add_kite_scene(self, filename):

326 try:

327 from kite import Scene

328 except ImportError:

329 raise ImportError('Module kite could not be imported,'

330 ' please install from https://pyrocko.org.')

331 logger.debug('Loading kite scene from "%s"...' % filename)

332

333 scene = Scene.load(filename)

334 scene._log.setLevel(logger.level)

335

336 try:

337 self.get_kite_scene(scene.meta.scene_id)

338 except NotFound:

339 self.kite_scenes.append(scene)

340 else:

341 raise AttributeError('Kite scene_id not unique for "%s".'

342 % filename)

343

344 def is_blacklisted(self, obj):

345 try:

346 nslc = self.get_nslc(obj)

347 if nslc in self.blacklist:

348 return True

349

350 except InvalidObject:

351 pass

352

353 nsl = self.get_nsl(obj)

354 return (

355 nsl in self.blacklist or

356 nsl[1:2] in self.blacklist or

357 nsl[:2] in self.blacklist)

358

359 def is_whitelisted(self, obj):

360 if self.whitelist_nslc is None:

361 return True

362

363 nsl = self.get_nsl(obj)

364

365 if (

366 nsl in self.whitelist or

367 nsl[1:2] in self.whitelist or

368 nsl[:2] in self.whitelist):

369

370 return True

371

372 try:

373 nslc = self.get_nslc(obj)

374 if nslc in self.whitelist_nslc:

375 return True

376

377 except InvalidObject:

378 return nsl in self.whitelist_nsl_xx

379

380 def has_clipping(self, nsl_or_nslc, tmin, tmax):

381 if nsl_or_nslc not in self.clippings:

382 return False

383

384 atimes = self.clippings[nsl_or_nslc]

385 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))

386

387 def get_nsl(self, obj):

388 if isinstance(obj, trace.Trace):

389 net, sta, loc, _ = obj.nslc_id

390 elif isinstance(obj, model.Station):

391 net, sta, loc = obj.nsl()

392 elif isinstance(obj, gf.Target):

393 net, sta, loc, _ = obj.codes

394 elif isinstance(obj, tuple) and len(obj) in (3, 4):

395 net, sta, loc = obj[:3]

396 else:

397 raise InvalidObject(

398 'Cannot get nsl code from given object of type "%s".'

399 % type(obj))

400

401 return net, sta, loc

402

403 def get_nslc(self, obj):

404 if isinstance(obj, trace.Trace):

405 return obj.nslc_id

406 elif isinstance(obj, gf.Target):

407 return obj.codes

408 elif isinstance(obj, tuple) and len(obj) == 4:

409 return obj

410 else:

411 raise InvalidObject(

412 'Cannot get nslc code from given object "%s"' % type(obj))

413

414 def get_tmin_tmax(self, obj):

415 if isinstance(obj, trace.Trace):

416 return obj.tmin, obj.tmax

417 else:

418 raise InvalidObject(

419 'Cannot get tmin and tmax from given object of type "%s"' %

420 type(obj))

421

422 def get_station(self, obj):

423 if self.is_blacklisted(obj):

424 raise NotFound('Station is blacklisted:', self.get_nsl(obj))

425

426 if not self.is_whitelisted(obj):

427 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))

428

429 if isinstance(obj, model.Station):

430 return obj

431

432 net, sta, loc = self.get_nsl(obj)

433

434 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]

435 for k in keys:

436 if k in self.stations:

437 return self.stations[k]

438

439 raise NotFound('No station information:', keys)

440

441 def get_stations(self):

442 return [self.stations[k] for k in sorted(self.stations)

443 if not self.is_blacklisted(self.stations[k])

444 and self.is_whitelisted(self.stations[k])]

445

446 def get_kite_scenes(self):

447 return self.kite_scenes

448

449 def get_kite_scene(self, scene_id=None):

450 if scene_id is None:

451 if len(self.kite_scenes) == 0:

452 raise AttributeError('No kite displacements defined.')

453 return self.kite_scenes[0]

454 else:

455 for scene in self.kite_scenes:

456 if scene.meta.scene_id == scene_id:

457 return scene

458 raise NotFound('No kite scene with id "%s" defined.' % scene_id)

459

460 def get_gnss_campaigns(self):

461 return self.gnss_campaigns

462

463 def get_gnss_campaign(self, name):

464 for camp in self.gnss_campaigns:

465 if camp.name == name:

466 return camp

467 raise NotFound('GNSS campaign %s not found!' % name)

468

469 def get_response(self, obj, quantity='displacement'):

470 if (self.responses is None or len(self.responses) == 0) \

471 and (self.responses_stationxml is None

472 or len(self.responses_stationxml) == 0):

473

474 raise NotFound('No response information available.')

475

476 quantity_to_unit = {

477 'displacement': 'M',

478 'velocity': 'M/S',

479 'acceleration': 'M/S**2'}

480

481 if self.is_blacklisted(obj):

482 raise NotFound('Response is blacklisted:', self.get_nslc(obj))

483

484 if not self.is_whitelisted(obj):

485 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))

486

487 net, sta, loc, cha = self.get_nslc(obj)

488 tmin, tmax = self.get_tmin_tmax(obj)

489

490 keys_x = [

491 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]

492

493 keys = []

494 for k in keys_x:

495 if k not in keys:

496 keys.append(k)

497

498 candidates = []

499 for k in keys:

500 if k in self.responses:

501 for x in self.responses[k]:

502 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):

503 if quantity == 'displacement':

504 candidates.append(x.response)

505 elif quantity == 'velocity':

506 candidates.append(trace.MultiplyResponse([

507 x.response,

508 trace.DifferentiationResponse()]))

509 elif quantity == 'acceleration':

510 candidates.append(trace.MultiplyResponse([

511 x.response,

512 trace.DifferentiationResponse(2)]))

513 else:

514 assert False

515

516 for sx in self.responses_stationxml:

517 try:

518 candidates.append(

519 sx.get_pyrocko_response(

520 (net, sta, loc, cha),

521 timespan=(tmin, tmax),

522 fake_input_units=quantity_to_unit[quantity]))

523

524 except (fs.NoResponseInformation, fs.MultipleResponseInformation):

525 pass

526

527 if len(candidates) == 1:

528 return candidates[0]

529

530 elif len(candidates) == 0:

531 raise NotFound('No response found:', (net, sta, loc, cha))

532 else:

533 raise NotFound('Multiple responses found:', (net, sta, loc, cha))

534

535 def get_waveform_raw(

536 self, obj,

537 tmin,

538 tmax,

539 tpad=0.,

540 toffset_noise_extract=0.,

541 want_incomplete=False,

542 extend_incomplete=False):

543

544 net, sta, loc, cha = self.get_nslc(obj)

545

546 if self.is_blacklisted((net, sta, loc, cha)):

547 raise NotFound(

548 'Waveform is blacklisted:', (net, sta, loc, cha))

549

550 if not self.is_whitelisted((net, sta, loc, cha)):

551 raise NotFound(

552 'Waveform is not on whitelist:', (net, sta, loc, cha))

553

554 if self.clip_handling == 'by_nsl':

555 if self.has_clipping((net, sta, loc), tmin, tmax):

556 raise NotFound(

557 'Waveform clipped:', (net, sta, loc))

558

559 elif self.clip_handling == 'by_nslc':

560 if self.has_clipping((net, sta, loc, cha), tmin, tmax):

561 raise NotFound(

562 'Waveform clipped:', (net, sta, loc, cha))

563

564 trs = self.pile.all(

565 tmin=tmin+toffset_noise_extract,

566 tmax=tmax+toffset_noise_extract,

567 tpad=tpad,

568 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),

569 want_incomplete=want_incomplete or extend_incomplete)

570

571 if self.apply_displaced_sampling_workaround:

572 for tr in trs:

573 tr.snap()

574

575 if toffset_noise_extract != 0.0:

576 for tr in trs:

577 tr.shift(-toffset_noise_extract)

578

579 if extend_incomplete and len(trs) == 1:

580 trs[0].extend(

581 tmin + toffset_noise_extract - tpad,

582 tmax + toffset_noise_extract + tpad,

583 fillmethod='repeat')

584

585 if not want_incomplete and len(trs) != 1:

586 if len(trs) == 0:

587 message = 'Waveform missing or incomplete.'

588 else:

589 message = 'Waveform has gaps.'

590

591 raise NotFound(

592 message,

593 codes=(net, sta, loc, cha),

594 time_range=(

595 tmin + toffset_noise_extract - tpad,

596 tmax + toffset_noise_extract + tpad))

597

598 return trs

599

600 def get_waveform_restituted(

601 self,

602 obj, quantity='displacement',

603 tmin=None, tmax=None, tpad=0.,

604 tfade=0., freqlimits=None, deltat=None,

605 toffset_noise_extract=0.,

606 want_incomplete=False,

607 extend_incomplete=False):

608

609 if deltat is not None:

610 tpad_downsample = deltat * 50.

611 else:

612 tpad_downsample = 0.

613

614 trs_raw = self.get_waveform_raw(

615 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,

616 toffset_noise_extract=toffset_noise_extract,

617 want_incomplete=want_incomplete,

618 extend_incomplete=extend_incomplete)

619

620 trs_restituted = []

621 for tr in trs_raw:

622 if deltat is not None:

623 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)

624 tr.deltat = deltat

625

626 resp = self.get_response(tr, quantity=quantity)

627 try:

628 trs_restituted.append(

629 tr.transfer(

630 tfade=tfade, freqlimits=freqlimits,

631 transfer_function=resp, invert=True, demean=True))

632

633 except trace.InfiniteResponse:

634 raise NotFound(

635 'Instrument response deconvolution failed '

636 '(divide by zero)', tr.nslc_id)

637

638 return trs_restituted, trs_raw

639

640 def _get_projections(

641 self, station, backazimuth, source, target, tmin, tmax):

642

643 # fill in missing channel information (happens when station file

644 # does not contain any channel information)

645 if not station.get_channels():

646 station = copy.deepcopy(station)

647

648 nsl = station.nsl()

649 trs = self.pile.all(

650 tmin=tmin, tmax=tmax,

651 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,

652 load_data=False)

653

654 channels = list(set(tr.channel for tr in trs))

655 station.set_channels_by_name(*channels)

656

657 projections = []

658 projections.extend(station.guess_projections_to_enu(

659 out_channels=('E', 'N', 'Z')))

660

661 if source is not None and target is not None:

662 backazimuth = source.azibazi_to(target)[1]

663

664 if backazimuth is not None:

665 projections.extend(station.guess_projections_to_rtu(

666 out_channels=('R', 'T', 'Z'),

667 backazimuth=backazimuth))

668

669 if not projections:

670 raise NotFound(

671 'Cannot determine projection of data components:',

672 station.nsl())

673

674 return projections

675

676 def _get_waveform(

677 self,

678 obj, quantity='displacement',

679 tmin=None, tmax=None, tpad=0.,

680 tfade=0., freqlimits=None, deltat=None, cache=None,

681 backazimuth=None,

682 source=None,

683 target=None,

684 debug=False):

685

686 assert not debug or (debug and cache is None)

687

688 if cache is True:

689 cache = self._cache

690

691 _, _, _, channel = self.get_nslc(obj)

692 station = self.get_station(self.get_nsl(obj))

693

694 nslc = station.nsl() + (channel,)

695

696 if self.is_blacklisted(nslc):

697 raise NotFound(

698 'Waveform is blacklisted:', nslc)

699

700 if not self.is_whitelisted(nslc):

701 raise NotFound(

702 'Waveform is not on whitelist:', nslc)

703

704 assert tmin is not None

705 assert tmax is not None

706

707 tmin = float(tmin)

708 tmax = float(tmax)

709

710 nslc = tuple(nslc)

711

712 cache_k = nslc + (

713 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)

714 if cache is not None and (nslc + cache_k) in cache:

715 obj = cache[nslc + cache_k]

716 if isinstance(obj, Exception):

717 raise obj

718 elif obj is None:

719 raise NotFound('Waveform not found!', nslc)

720 else:

721 return obj

722

723 syn_test = self.synthetic_test

724 toffset_noise_extract = 0.0

725 if syn_test:

726 if not syn_test.respect_data_availability:

727 if syn_test.real_noise_scale != 0.0:

728 raise DatasetError(

729 'respect_data_availability=False and '

730 'addition of real noise cannot be combined.')

731

732 tr = syn_test.get_waveform(

733 nslc, tmin, tmax,

734 tfade=tfade,

735 freqlimits=freqlimits)

736

737 if cache is not None:

738 cache[tr.nslc_id + cache_k] = tr

739

740 if debug:

741 return [], [], []

742 else:

743 return tr

744

745 if syn_test.real_noise_scale != 0.0:

746 toffset_noise_extract = syn_test.real_noise_toffset

747

748 abs_delays = []

749 for ocha in 'ENZRT':

750 sc = self.station_corrections.get(station.nsl() + (channel,), None)

751 if sc:

752 abs_delays.append(abs(sc.delay))

753

754 if abs_delays:

755 abs_delay_max = max(abs_delays)

756 else:

757 abs_delay_max = 0.0

758

759 projections = self._get_projections(

760 station, backazimuth, source, target, tmin, tmax)

761

762 try:

763 trs_projected = []

764 trs_restituted = []

765 trs_raw = []

766 exceptions = []

767 for matrix, in_channels, out_channels in projections:

768 deps = trace.project_dependencies(

769 matrix, in_channels, out_channels)

770

771 try:

772 trs_restituted_group = []

773 trs_raw_group = []

774 if channel in deps:

775 for cha in deps[channel]:

776 trs_restituted_this, trs_raw_this = \

777 self.get_waveform_restituted(

778 station.nsl() + (cha,),

779 quantity=quantity,

780 tmin=tmin, tmax=tmax,

781 tpad=tpad+abs_delay_max,

782 toffset_noise_extract=toffset_noise_extract, # noqa

783 tfade=tfade,

784 freqlimits=freqlimits,

785 deltat=deltat,

786 want_incomplete=debug,

787 extend_incomplete=self.extend_incomplete)

788

789 trs_restituted_group.extend(trs_restituted_this)

790 trs_raw_group.extend(trs_raw_this)

791

792 trs_projected.extend(

793 trace.project(

794 trs_restituted_group, matrix,

795 in_channels, out_channels))

796

797 trs_restituted.extend(trs_restituted_group)

798 trs_raw.extend(trs_raw_group)

799

800 except NotFound as e:

801 exceptions.append((in_channels, out_channels, e))

802

803 if not trs_projected:

804 err = []

805 for (in_channels, out_channels, e) in exceptions:

806 sin = ', '.join(c.name for c in in_channels)

807 sout = ', '.join(c.name for c in out_channels)

808 err.append('(%s) -> (%s): %s' % (sin, sout, e))

809

810 raise NotFound('\n'.join(err))

811

812 for tr in trs_projected:

813 sc = self.station_corrections.get(tr.nslc_id, None)

814 if sc:

815 if self.apply_correction_factors:

816 tr.ydata /= sc.factor

817

818 if self.apply_correction_delays:

819 tr.shift(-sc.delay)

820

821 if tmin is not None and tmax is not None:

822 tr.chop(tmin, tmax)

823

824 if syn_test:

825 trs_projected_synthetic = []

826 for tr in trs_projected:

827 if tr.channel == channel:

828 tr_syn = syn_test.get_waveform(

829 tr.nslc_id, tmin, tmax,

830 tfade=tfade, freqlimits=freqlimits)

831

832 if tr_syn:

833 if syn_test.real_noise_scale != 0.0:

834 tr_syn = tr_syn.copy()

835 tr_noise = tr.copy()

836 tr_noise.set_ydata(

837 tr_noise.get_ydata()

838 * syn_test.real_noise_scale)

839

840 tr_syn.add(tr_noise)

841

842 trs_projected_synthetic.append(tr_syn)

843

844 trs_projected = trs_projected_synthetic

845

846 if cache is not None:

847 for tr in trs_projected:

848 cache[tr.nslc_id + cache_k] = tr

849

850 tr_return = None

851 for tr in trs_projected:

852 if tr.channel == channel:

853 tr_return = tr

854

855 if debug:

856 return trs_projected, trs_restituted, trs_raw, tr_return

857

858 elif tr_return:

859 return tr_return

860

861 else:

862 raise NotFound(

863 'waveform not available', station.nsl() + (channel,))

864

865 except NotFound:

866 if cache is not None:

867 cache[nslc + cache_k] = None

868 raise

869

870 def get_waveform(self, obj, tinc_cache=None, **kwargs):

871 tmin = kwargs['tmin']

872 tmax = kwargs['tmax']

873 if tinc_cache is not None:

874 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache

875 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache

876 else:

877 tmin_r = tmin

878 tmax_r = tmax

879

880 kwargs['tmin'] = tmin_r

881 kwargs['tmax'] = tmax_r

882

883 if kwargs.get('debug', None):

884 return self._get_waveform(obj, **kwargs)

885 else:

886 tr = self._get_waveform(obj, **kwargs)

887 return tr.chop(tmin, tmax, inplace=False)

888

889 def get_events(self, magmin=None, event_names=None):

890 evs = []

891 for ev in self.events:

892 if ((magmin is None or ev.magnitude >= magmin) and

893 (event_names is None or ev.name in event_names)):

894 evs.append(ev)

895

896 return evs

897

898 def get_event_by_time(self, t, magmin=None):

899 evs = self.get_events(magmin=magmin)

900 ev_x = None

901 for ev in evs:

902 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):

903 ev_x = ev

904

905 if not ev_x:

906 raise NotFound(

907 'No event information matching criteria (t=%s, magmin=%s).' %

908 (t, magmin))

909

910 return ev_x

911

912 def get_event(self):

913 if self._event_name is None:

914 raise NotFound('No main event selected in dataset!')

915

916 for ev in self.events:

917 if ev.name == self._event_name:

918 return ev

919

920 raise NotFound('No such event: %s' % self._event_name)

921

922 def get_picks(self):

923 if self._picks is None:

924 hash_to_name = {}

925 names = set()

926 for marker in self.pick_markers:

927 if isinstance(marker, pmarker.EventMarker):

928 name = marker.get_event().name

929 if name in names:

930 raise DatasetError(

931 'Duplicate event name "%s" in picks.' % name)

932

933 names.add(name)

934 hash_to_name[marker.get_event_hash()] = name

935

936 for ev in self.events:

937 hash_to_name[ev.get_hash()] = ev.name

938

939 picks = {}

940 for marker in self.pick_markers:

941 if isinstance(marker, pmarker.PhaseMarker):

942 ehash = marker.get_event_hash()

943

944 nsl = marker.one_nslc()[:3]

945 phasename = marker.get_phasename()

946

947 if ehash is None or ehash not in hash_to_name:

948 raise DatasetError(

949 'Unassociated pick: %s.%s.%s, %s' %

950 (nsl + (phasename, )))

951

952 eventname = hash_to_name[ehash]

953

954 if (nsl, phasename, eventname) in picks:

955 raise DatasetError(

956 'Duplicate pick: %s.%s.%s, %s' %

957 (nsl + (phasename, )))

958

959 picks[nsl, phasename, eventname] = marker

960

961 self._picks = picks

962

963 return self._picks

964

965 def get_pick(self, eventname, obj, phasename):

966 nsl = self.get_nsl(obj)

967 return self.get_picks().get((nsl, phasename, eventname), None)

968

969

970class DatasetConfig(HasPaths):

971 ''' Configuration for a Grond `Dataset` object. '''

972

973 stations_path = Path.T(

974 optional=True,

975 help='List of files with station coordinates in Pyrocko format.')

976 stations_stationxml_paths = List.T(

977 Path.T(),

978 optional=True,

979 help='List of files with station coordinates in StationXML format.')

980 events_path = Path.T(

981 optional=True,

982 help='File with hypocenter information and possibly'

983 ' reference solution')

984 waveform_paths = List.T(

985 Path.T(),

986 optional=True,

987 help='List of directories with raw waveform data')

988 clippings_path = Path.T(

989 optional=True)

990 responses_sacpz_path = Path.T(

991 optional=True,

992 help='List of SACPZ response files for restitution of'

993 ' the raw waveform data.')

994 responses_stationxml_paths = List.T(

995 Path.T(),

996 optional=True,

997 help='List of StationXML response files for restitution of'

998 ' the raw waveform data.')

999 station_corrections_path = Path.T(

1000 optional=True,

1001 help='File containing station correction informations.')

1002 apply_correction_factors = Bool.T(

1003 optional=True,

1004 default=True,

1005 help='Apply correction factors from station corrections.')

1006 apply_correction_delays = Bool.T(

1007 optional=True,

1008 default=True,

1009 help='Apply correction delays from station corrections.')

1010 apply_displaced_sampling_workaround = Bool.T(

1011 optional=True,

1012 default=False,

1013 help='Work around displaced sampling issues.')

1014 extend_incomplete = Bool.T(

1015 default=False,

1016 help='Extend incomplete seismic traces.')

1017 picks_paths = List.T(

1018 Path.T())

1019 blacklist_paths = List.T(

1020 Path.T(),

1021 help='List of text files with blacklisted stations.')

1022 blacklist = List.T(

1023 String.T(),

1024 help='Stations/components to be excluded according to their STA, '

1025 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')

1026 whitelist_paths = List.T(

1027 Path.T(),

1028 help='List of text files with whitelisted stations.')

1029 whitelist = List.T(

1030 String.T(),

1031 optional=True,

1032 help='If not None, list of stations/components to include according '

1033 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '

1034 'Note: ''when whitelisting on channel level, both, the raw and '

1035 'the processed channel codes have to be listed.')

1036 synthetic_test = SyntheticTest.T(

1037 optional=True)

1038

1039 kite_scene_paths = List.T(

1040 Path.T(),

1041 optional=True,

1042 help='List of directories for the InSAR scenes.')

1043

1044 gnss_campaign_paths = List.T(

1045 Path.T(),

1046 optional=True,

1047 help='List of directories for the GNSS campaign data.')

1048

1049 def __init__(self, *args, **kwargs):

1050 HasPaths.__init__(self, *args, **kwargs)

1051 self._ds = {}

1052

1053 def get_event_names(self):

1054 logger.info('Loading events ...')

1055

1056 def extra(path):

1057 return expand_template(path, dict(

1058 event_name='*'))

1059

1060 def fp(path):

1061 return self.expand_path(path, extra=extra)

1062

1063 def check_events(events, fn):

1064 for ev in events:

1065 if not ev.name:

1066 logger.warning('Event in %s has no name!', fn)

1067 return

1068 if not ev.lat or not ev.lon:

1069 logger.warning('Event %s has inconsistent coordinates!',

1070 ev.name)

1071 if not ev.depth:

1072 logger.warning('Event %s has no depth!', ev.name)

1073 if not ev.time:

1074 logger.warning('Event %s has no time!', ev.name)

1075

1076 events = []

1077 events_path = fp(self.events_path)

1078 fns = glob.glob(events_path)

1079 if not fns:

1080 raise DatasetError('No event files matching "%s".' % events_path)

1081

1082 for fn in fns:

1083 logger.debug('Loading from file %s' % fn)

1084 ev = model.load_events(filename=fn)

1085 check_events(ev, fn)

1086

1087 events.extend(ev)

1088

1089 event_names = [ev_.name for ev_ in events]

1090 event_names.sort()

1091 return event_names

1092

1093 def get_dataset(self, event_name):

1094 if event_name not in self._ds:

1095 def extra(path):

1096 return expand_template(path, dict(

1097 event_name=event_name))

1098

1099 def fp(path):

1100 p = self.expand_path(path, extra=extra)

1101 if p is None:

1102 return None

1103

1104 if isinstance(p, list):

1105 for path in p:

1106 if not op.exists(path):

1107 logger.warning('Path %s does not exist.' % path)

1108 else:

1109 if not op.exists(p):

1110 logger.warning('Path %s does not exist.' % p)

1111

1112 return p

1113

1114 ds = Dataset(event_name)

1115 try:

1116 ds.add_events(filename=fp(self.events_path))

1117

1118 ds.add_stations(

1119 pyrocko_stations_filename=fp(self.stations_path),

1120 stationxml_filenames=fp(self.stations_stationxml_paths))

1121

1122 if self.waveform_paths:

1123 ds.add_waveforms(paths=fp(self.waveform_paths))

1124

1125 if self.kite_scene_paths:

1126 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))

1127

1128 if self.gnss_campaign_paths:

1129 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))

1130

1131 if self.clippings_path:

1132 ds.add_clippings(markers_filename=fp(self.clippings_path))

1133

1134 if self.responses_sacpz_path:

1135 ds.add_responses(

1136 sacpz_dirname=fp(self.responses_sacpz_path))

1137

1138 if self.responses_stationxml_paths:

1139 ds.add_responses(

1140 stationxml_filenames=fp(

1141 self.responses_stationxml_paths))

1142

1143 if self.station_corrections_path:

1144 ds.add_station_corrections(

1145 filename=fp(self.station_corrections_path))

1146

1147 ds.apply_correction_factors = self.apply_correction_factors

1148 ds.apply_correction_delays = self.apply_correction_delays

1149 ds.apply_displaced_sampling_workaround = \

1150 self.apply_displaced_sampling_workaround

1151 ds.extend_incomplete = self.extend_incomplete

1152

1153 for picks_path in self.picks_paths:

1154 ds.add_picks(

1155 filename=fp(picks_path))

1156

1157 ds.add_blacklist(self.blacklist)

1158 ds.add_blacklist(filenames=fp(self.blacklist_paths))

1159 if self.whitelist:

1160 ds.add_whitelist(self.whitelist)

1161 if self.whitelist_paths:

1162 ds.add_whitelist(filenames=fp(self.whitelist_paths))

1163

1164 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))

1165 self._ds[event_name] = ds

1166 except (FileLoadError, OSError) as e:

1167 raise DatasetError(str(e))

1168

1169 return self._ds[event_name]

1170

1171

1172__all__ = '''

1173 Dataset

1174 DatasetConfig

1175 DatasetError

1176 InvalidObject

1177 NotFound

1178 StationCorrection

1179 load_station_corrections

1180 dump_station_corrections

1181'''.split()