Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

1import glob

2import copy

3import os.path as op

4import logging

5import math

6import numpy as num

8from collections import defaultdict

9from pyrocko import util, pile, model, config, trace, \

10 marker as pmarker

11from pyrocko.io.io_common import FileLoadError

12from pyrocko.fdsn import enhanced_sacpz, station as fs

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,

14 load_all)

16from pyrocko import gf

18from .meta import Path, HasPaths, expand_template, GrondError

20from .synthetic_tests import SyntheticTest

22guts_prefix = 'grond'

23logger = logging.getLogger('grond.dataset')

26def quote_paths(paths):

27 return ', '.join('"%s"' % path for path in paths)

30class InvalidObject(Exception):

31 pass

34class NotFound(Exception):

35 def __init__(self, reason, codes=None, time_range=None):

36 self.reason = reason

37 self.time_range = time_range

38 self.codes = codes

40 def __str__(self):

41 s = self.reason

42 if self.codes:

43 s += ' (%s)' % '.'.join(self.codes)

45 if self.time_range:

46 s += ' (%s - %s)' % (

47 util.time_to_str(self.time_range[0]),

48 util.time_to_str(self.time_range[1]))

50 return s

53class DatasetError(GrondError):

54 pass

57class StationCorrection(Object):

58 codes = Tuple.T(4, String.T())

59 delay = Float.T()

60 factor = Float.T()

63def load_station_corrections(filename):

64 scs = load_all(filename=filename)

65 for sc in scs:

66 assert isinstance(sc, StationCorrection)

68 return scs

71def dump_station_corrections(station_corrections, filename):

72 return dump_all(station_corrections, filename=filename)

75class Dataset(object):

77 def __init__(self, event_name=None):

78 self.events = []

79 self.pile = pile.Pile()

80 self.stations = {}

81 self.responses = defaultdict(list)

82 self.responses_stationxml = []

83 self.clippings = {}

84 self.blacklist = set()

85 self.whitelist_nslc = None

86 self.whitelist_nsl_xx = None

87 self.whitelist = None

88 self.station_corrections = {}

89 self.station_factors = {}

90 self.pick_markers = []

91 self.apply_correction_delays = True

92 self.apply_correction_factors = True

93 self.apply_displaced_sampling_workaround = False

94 self.extend_incomplete = False

95 self.clip_handling = 'by_nsl'

96 self.kite_scenes = []

97 self.gnss_campaigns = []

98 self.synthetic_test = None

99 self._picks = None

100 self._cache = {}

101 self._event_name = event_name

102

103 def empty_cache(self):

104 self._cache = {}

105

106 def set_synthetic_test(self, synthetic_test):

107 self.synthetic_test = synthetic_test

108

109 def add_stations(

110 self,

111 stations=None,

112 pyrocko_stations_filename=None,

113 stationxml_filenames=None):

114

115 if stations is not None:

116 for station in stations:

117 self.stations[station.nsl()] = station

118

119 if pyrocko_stations_filename is not None:

120 logger.debug(

121 'Loading stations from file "%s"...' %

122 pyrocko_stations_filename)

123

124 for station in model.load_stations(pyrocko_stations_filename):

125 self.stations[station.nsl()] = station

126

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:

128

129 for stationxml_filename in stationxml_filenames:

130 if not op.exists(stationxml_filename):

131 continue

132

133 logger.debug(

134 'Loading stations from StationXML file "%s"...' %

135 stationxml_filename)

136

137 sx = fs.load_xml(filename=stationxml_filename)

138 ev = self.get_event()

139 stations = sx.get_pyrocko_stations(time=ev.time)

140 if len(stations) == 0:

141 logger.warning(

142 'No stations found for time %s in file "%s".' % (

143 util.time_to_str(ev.time), stationxml_filename))

144

145 for station in stations:

146 logger.debug('Adding station: %s.%s.%s' % station.nsl())

147 channels = station.get_channels()

148 if len(channels) == 1 and channels[0].name.endswith('Z'):

149 logger.warning(

150 'Station "%s" has vertical component'

151 ' information only, adding mocked channels.'

152 % station.nsl_string())

153 station.add_channel(

154 model.Channel(channels[0].name[:-1] + 'N'))

155 station.add_channel(

156 model.Channel(channels[0].name[:-1] + 'E'))

157

158 self.stations[station.nsl()] = station

159

160 def add_events(self, events=None, filename=None):

161 if events is not None:

162 self.events.extend(events)

163

164 if filename is not None:

165 logger.debug('Loading events from file "%s"...' % filename)

166 try:

167 events = model.load_events(filename)

168 self.events.extend(events)

169 logger.info(

170 'Loading events from %s: %i events found.' %

171 (filename, len(events)))

172 except Exception as e:

173 logger.warning('Could not load events from %s!', filename)

174 raise e

175

176 def add_waveforms(self, paths, regex=None, fileformat='detect',

177 show_progress=False):

178 cachedirname = config.config().cache_dir

179

180 logger.debug('Selecting waveform files %s...' % quote_paths(paths))

181 fns = util.select_files(paths, include=regex,

182 show_progress=show_progress)

183 cache = pile.get_cache(cachedirname)

184 logger.debug('Scanning waveform files %s...' % quote_paths(paths))

185 self.pile.load_files(sorted(fns), cache=cache,

186 fileformat=fileformat,

187 show_progress=show_progress)

188

189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):

190 if sacpz_dirname:

191 logger.debug(

192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)

193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):

194 self.responses[x.codes].append(x)

195

196 if stationxml_filenames:

197 for stationxml_filename in stationxml_filenames:

198 if not op.exists(stationxml_filename):

199 continue

200

201 logger.debug(

202 'Loading StationXML responses from "%s"...' %

203 stationxml_filename)

204

205 self.responses_stationxml.append(

206 fs.load_xml(filename=stationxml_filename))

207

208 def add_clippings(self, markers_filename):

209 markers = pmarker.load_markers(markers_filename)

210 clippings = {}

211 for marker in markers:

212 nslc = marker.one_nslc()

213 nsl = nslc[:3]

214 if nsl not in clippings:

215 clippings[nsl] = []

216

217 if nslc not in clippings:

218 clippings[nslc] = []

219

220 clippings[nsl].append(marker.tmin)

221 clippings[nslc].append(marker.tmin)

222

223 for k, times in clippings.items():

224 atimes = num.array(times, dtype=float)

225 if k not in self.clippings:

226 self.clippings[k] = atimes

227 else:

228 self.clippings[k] = num.concatenate(self.clippings, atimes)

229

230 def add_blacklist(self, blacklist=[], filenames=None):

231 logger.debug('Loading blacklisted stations...')

232 if filenames:

233 blacklist = list(blacklist)

234 for filename in filenames:

235 if op.exists(filename):

236 with open(filename, 'r') as f:

237 blacklist.extend(

238 s.strip() for s in f.read().splitlines())

239 else:

240 logger.warning('No such blacklist file: %s' % filename)

241

242 for x in blacklist:

243 if isinstance(x, str):

244 x = tuple(x.split('.'))

245 self.blacklist.add(x)

246

247 def add_whitelist(self, whitelist=[], filenames=None):

248 logger.debug('Loading whitelisted stations...')

249 if filenames:

250 whitelist = list(whitelist)

251 for filename in filenames:

252 with open(filename, 'r') as f:

253 whitelist.extend(s.strip() for s in f.read().splitlines())

254

255 if self.whitelist_nslc is None:

256 self.whitelist_nslc = set()

257 self.whitelist = set()

258 self.whitelist_nsl_xx = set()

259

260 for x in whitelist:

261 if isinstance(x, str):

262 x = tuple(x.split('.'))

263 if len(x) == 4:

264 self.whitelist_nslc.add(x)

265 self.whitelist_nsl_xx.add(x[:3])

266 else:

267 self.whitelist.add(x)

268

269 def add_station_corrections(self, filename):

270 self.station_corrections.update(

271 (sc.codes, sc) for sc in load_station_corrections(filename))

272

273 def add_picks(self, filename):

274 self.pick_markers.extend(

275 pmarker.load_markers(filename))

276

277 self._picks = None

278

279 def add_gnss_campaigns(self, paths):

280 paths = util.select_files(

281 paths,

282 include=r'\.yml|\.yaml',

283 show_progress=False)

284

285 for path in paths:

286 self.add_gnss_campaign(filename=path)

287

288 def add_gnss_campaign(self, filename):

289 try:

290 from pyrocko.model import gnss # noqa

291 except ImportError:

292 raise ImportError('Module pyrocko.model.gnss not found,'

293 ' please upgrade pyrocko!')

294 logger.debug('Loading GNSS campaign from "%s"...' % filename)

295

296 campaign = load_all(filename=filename)

297 self.gnss_campaigns.append(campaign[0])

298

299 def add_kite_scenes(self, paths):

300 logger.info('Loading kite InSAR scenes...')

301 paths = util.select_files(

302 paths,

303 include=r'\.npz',

304 show_progress=False)

305

306 for path in paths:

307 self.add_kite_scene(filename=path)

308

309 if not self.kite_scenes:

310 logger.warning('Could not find any kite scenes at %s.' %

311 quote_paths(self.kite_scene_paths))

312

313 def add_kite_scene(self, filename):

314 try:

315 from kite import Scene

316 except ImportError:

317 raise ImportError('Module kite could not be imported,'

318 ' please install from https://pyrocko.org.')

319 logger.debug('Loading kite scene from "%s"...' % filename)

320

321 scene = Scene.load(filename)

322 scene._log.setLevel(logger.level)

323

324 try:

325 self.get_kite_scene(scene.meta.scene_id)

326 except NotFound:

327 self.kite_scenes.append(scene)

328 else:

329 raise AttributeError('Kite scene_id not unique for "%s".'

330 % filename)

331

332 def is_blacklisted(self, obj):

333 try:

334 nslc = self.get_nslc(obj)

335 if nslc in self.blacklist:

336 return True

337

338 except InvalidObject:

339 pass

340

341 nsl = self.get_nsl(obj)

342 return (

343 nsl in self.blacklist or

344 nsl[1:2] in self.blacklist or

345 nsl[:2] in self.blacklist)

346

347 def is_whitelisted(self, obj):

348 if self.whitelist_nslc is None:

349 return True

350

351 nsl = self.get_nsl(obj)

352

353 if (

354 nsl in self.whitelist or

355 nsl[1:2] in self.whitelist or

356 nsl[:2] in self.whitelist):

357

358 return True

359

360 try:

361 nslc = self.get_nslc(obj)

362 if nslc in self.whitelist_nslc:

363 return True

364

365 except InvalidObject:

366 return nsl in self.whitelist_nsl_xx

367

368 def has_clipping(self, nsl_or_nslc, tmin, tmax):

369 if nsl_or_nslc not in self.clippings:

370 return False

371

372 atimes = self.clippings[nsl_or_nslc]

373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))

374

375 def get_nsl(self, obj):

376 if isinstance(obj, trace.Trace):

377 net, sta, loc, _ = obj.nslc_id

378 elif isinstance(obj, model.Station):

379 net, sta, loc = obj.nsl()

380 elif isinstance(obj, gf.Target):

381 net, sta, loc, _ = obj.codes

382 elif isinstance(obj, tuple) and len(obj) in (3, 4):

383 net, sta, loc = obj[:3]

384 else:

385 raise InvalidObject(

386 'Cannot get nsl code from given object of type "%s".'

387 % type(obj))

388

389 return net, sta, loc

390

391 def get_nslc(self, obj):

392 if isinstance(obj, trace.Trace):

393 return obj.nslc_id

394 elif isinstance(obj, gf.Target):

395 return obj.codes

396 elif isinstance(obj, tuple) and len(obj) == 4:

397 return obj

398 else:

399 raise InvalidObject(

400 'Cannot get nslc code from given object "%s"' % type(obj))

401

402 def get_tmin_tmax(self, obj):

403 if isinstance(obj, trace.Trace):

404 return obj.tmin, obj.tmax

405 else:

406 raise InvalidObject(

407 'Cannot get tmin and tmax from given object of type "%s"' %

408 type(obj))

409

410 def get_station(self, obj):

411 if self.is_blacklisted(obj):

412 raise NotFound('Station is blacklisted:', self.get_nsl(obj))

413

414 if not self.is_whitelisted(obj):

415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))

416

417 if isinstance(obj, model.Station):

418 return obj

419

420 net, sta, loc = self.get_nsl(obj)

421

422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]

423 for k in keys:

424 if k in self.stations:

425 return self.stations[k]

426

427 raise NotFound('No station information:', keys)

428

429 def get_stations(self):

430 return [self.stations[k] for k in sorted(self.stations)

431 if not self.is_blacklisted(self.stations[k])

432 and self.is_whitelisted(self.stations[k])]

433

434 def get_kite_scenes(self):

435 return self.kite_scenes

436

437 def get_kite_scene(self, scene_id=None):

438 if scene_id is None:

439 if len(self.kite_scenes) == 0:

440 raise AttributeError('No kite displacements defined.')

441 return self.kite_scenes[0]

442 else:

443 for scene in self.kite_scenes:

444 if scene.meta.scene_id == scene_id:

445 return scene

446 raise NotFound('No kite scene with id "%s" defined.' % scene_id)

447

448 def get_gnss_campaigns(self):

449 return self.gnss_campaigns

450

451 def get_gnss_campaign(self, name):

452 for camp in self.gnss_campaigns:

453 if camp.name == name:

454 return camp

455 raise NotFound('GNSS campaign %s not found!' % name)

456

457 def get_response(self, obj, quantity='displacement'):

458 if (self.responses is None or len(self.responses) == 0) \

459 and (self.responses_stationxml is None

460 or len(self.responses_stationxml) == 0):

461

462 raise NotFound('No response information available.')

463

464 quantity_to_unit = {

465 'displacement': 'M',

466 'velocity': 'M/S',

467 'acceleration': 'M/S**2'}

468

469 if self.is_blacklisted(obj):

470 raise NotFound('Response is blacklisted:', self.get_nslc(obj))

471

472 if not self.is_whitelisted(obj):

473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))

474

475 net, sta, loc, cha = self.get_nslc(obj)

476 tmin, tmax = self.get_tmin_tmax(obj)

477

478 keys_x = [

479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]

480

481 keys = []

482 for k in keys_x:

483 if k not in keys:

484 keys.append(k)

485

486 candidates = []

487 for k in keys:

488 if k in self.responses:

489 for x in self.responses[k]:

490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):

491 if quantity == 'displacement':

492 candidates.append(x.response)

493 elif quantity == 'velocity':

494 candidates.append(trace.MultiplyResponse([

495 x.response,

496 trace.DifferentiationResponse()]))

497 elif quantity == 'acceleration':

498 candidates.append(trace.MultiplyResponse([

499 x.response,

500 trace.DifferentiationResponse(2)]))

501 else:

502 assert False

503

504 for sx in self.responses_stationxml:

505 try:

506 candidates.append(

507 sx.get_pyrocko_response(

508 (net, sta, loc, cha),

509 timespan=(tmin, tmax),

510 fake_input_units=quantity_to_unit[quantity]))

511

512 except (fs.NoResponseInformation, fs.MultipleResponseInformation):

513 pass

514

515 if len(candidates) == 1:

516 return candidates[0]

517

518 elif len(candidates) == 0:

519 raise NotFound('No response found:', (net, sta, loc, cha))

520 else:

521 raise NotFound('Multiple responses found:', (net, sta, loc, cha))

522

523 def get_waveform_raw(

524 self, obj,

525 tmin,

526 tmax,

527 tpad=0.,

528 toffset_noise_extract=0.,

529 want_incomplete=False,

530 extend_incomplete=False):

531

532 net, sta, loc, cha = self.get_nslc(obj)

533

534 if self.is_blacklisted((net, sta, loc, cha)):

535 raise NotFound(

536 'Waveform is blacklisted:', (net, sta, loc, cha))

537

538 if not self.is_whitelisted((net, sta, loc, cha)):

539 raise NotFound(

540 'Waveform is not on whitelist:', (net, sta, loc, cha))

541

542 if self.clip_handling == 'by_nsl':

543 if self.has_clipping((net, sta, loc), tmin, tmax):

544 raise NotFound(

545 'Waveform clipped:', (net, sta, loc))

546

547 elif self.clip_handling == 'by_nslc':

548 if self.has_clipping((net, sta, loc, cha), tmin, tmax):

549 raise NotFound(

550 'Waveform clipped:', (net, sta, loc, cha))

551

552 trs = self.pile.all(

553 tmin=tmin+toffset_noise_extract,

554 tmax=tmax+toffset_noise_extract,

555 tpad=tpad,

556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),

557 want_incomplete=want_incomplete or extend_incomplete)

558

559 if self.apply_displaced_sampling_workaround:

560 for tr in trs:

561 tr.snap()

562

563 if toffset_noise_extract != 0.0:

564 for tr in trs:

565 tr.shift(-toffset_noise_extract)

566

567 if extend_incomplete and len(trs) == 1:

568 trs[0].extend(

569 tmin + toffset_noise_extract - tpad,

570 tmax + toffset_noise_extract + tpad,

571 fillmethod='repeat')

572

573 if not want_incomplete and len(trs) != 1:

574 if len(trs) == 0:

575 message = 'Waveform missing or incomplete.'

576 else:

577 message = 'Waveform has gaps.'

578

579 raise NotFound(

580 message,

581 codes=(net, sta, loc, cha),

582 time_range=(

583 tmin + toffset_noise_extract - tpad,

584 tmax + toffset_noise_extract + tpad))

585

586 return trs

587

588 def get_waveform_restituted(

589 self,

590 obj, quantity='displacement',

591 tmin=None, tmax=None, tpad=0.,

592 tfade=0., freqlimits=None, deltat=None,

593 toffset_noise_extract=0.,

594 want_incomplete=False,

595 extend_incomplete=False):

596

597 if deltat is not None:

598 tpad_downsample = deltat * 50.

599 else:

600 tpad_downsample = 0.

601

602 trs_raw = self.get_waveform_raw(

603 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,

604 toffset_noise_extract=toffset_noise_extract,

605 want_incomplete=want_incomplete,

606 extend_incomplete=extend_incomplete)

607

608 trs_restituted = []

609 for tr in trs_raw:

610 if deltat is not None:

611 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)

612 tr.deltat = deltat

613

614 resp = self.get_response(tr, quantity=quantity)

615 try:

616 trs_restituted.append(

617 tr.transfer(

618 tfade=tfade, freqlimits=freqlimits,

619 transfer_function=resp, invert=True, demean=True))

620

621 except trace.InfiniteResponse:

622 raise NotFound(

623 'Instrument response deconvolution failed '

624 '(divide by zero)', tr.nslc_id)

625

626 return trs_restituted, trs_raw

627

628 def _get_projections(

629 self, station, backazimuth, source, target, tmin, tmax):

630

631 # fill in missing channel information (happens when station file

632 # does not contain any channel information)

633 if not station.get_channels():

634 station = copy.deepcopy(station)

635

636 nsl = station.nsl()

637 trs = self.pile.all(

638 tmin=tmin, tmax=tmax,

639 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,

640 load_data=False)

641

642 channels = list(set(tr.channel for tr in trs))

643 station.set_channels_by_name(*channels)

644

645 projections = []

646 projections.extend(station.guess_projections_to_enu(

647 out_channels=('E', 'N', 'Z')))

648

649 if source is not None and target is not None:

650 backazimuth = source.azibazi_to(target)[1]

651

652 if backazimuth is not None:

653 projections.extend(station.guess_projections_to_rtu(

654 out_channels=('R', 'T', 'Z'),

655 backazimuth=backazimuth))

656

657 if not projections:

658 raise NotFound(

659 'Cannot determine projection of data components:',

660 station.nsl())

661

662 return projections

663

664 def _get_waveform(

665 self,

666 obj, quantity='displacement',

667 tmin=None, tmax=None, tpad=0.,

668 tfade=0., freqlimits=None, deltat=None, cache=None,

669 backazimuth=None,

670 source=None,

671 target=None,

672 debug=False):

673

674 assert not debug or (debug and cache is None)

675

676 if cache is True:

677 cache = self._cache

678

679 _, _, _, channel = self.get_nslc(obj)

680 station = self.get_station(self.get_nsl(obj))

681

682 nslc = station.nsl() + (channel,)

683

684 if self.is_blacklisted(nslc):

685 raise NotFound(

686 'Waveform is blacklisted:', nslc)

687

688 if not self.is_whitelisted(nslc):

689 raise NotFound(

690 'Waveform is not on whitelist:', nslc)

691

692 assert tmin is not None

693 assert tmax is not None

694

695 tmin = float(tmin)

696 tmax = float(tmax)

697

698 nslc = tuple(nslc)

699

700 cache_k = nslc + (

701 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)

702 if cache is not None and (nslc + cache_k) in cache:

703 obj = cache[nslc + cache_k]

704 if isinstance(obj, Exception):

705 raise obj

706 elif obj is None:

707 raise NotFound('Waveform not found!', nslc)

708 else:

709 return obj

710

711 syn_test = self.synthetic_test

712 toffset_noise_extract = 0.0

713 if syn_test:

714 if not syn_test.respect_data_availability:

715 if syn_test.real_noise_scale != 0.0:

716 raise DatasetError(

717 'respect_data_availability=False and '

718 'addition of real noise cannot be combined.')

719

720 tr = syn_test.get_waveform(

721 nslc, tmin, tmax,

722 tfade=tfade,

723 freqlimits=freqlimits)

724

725 if cache is not None:

726 cache[tr.nslc_id + cache_k] = tr

727

728 if debug:

729 return [], [], []

730 else:

731 return tr

732

733 if syn_test.real_noise_scale != 0.0:

734 toffset_noise_extract = syn_test.real_noise_toffset

735

736 abs_delays = []

737 for ocha in 'ENZRT':

738 sc = self.station_corrections.get(station.nsl() + (channel,), None)

739 if sc:

740 abs_delays.append(abs(sc.delay))

741

742 if abs_delays:

743 abs_delay_max = max(abs_delays)

744 else:

745 abs_delay_max = 0.0

746

747 projections = self._get_projections(

748 station, backazimuth, source, target, tmin, tmax)

749

750 try:

751 trs_projected = []

752 trs_restituted = []

753 trs_raw = []

754 exceptions = []

755 for matrix, in_channels, out_channels in projections:

756 deps = trace.project_dependencies(

757 matrix, in_channels, out_channels)

758

759 try:

760 trs_restituted_group = []

761 trs_raw_group = []

762 if channel in deps:

763 for cha in deps[channel]:

764 trs_restituted_this, trs_raw_this = \

765 self.get_waveform_restituted(

766 station.nsl() + (cha,),

767 quantity=quantity,

768 tmin=tmin, tmax=tmax,

769 tpad=tpad+abs_delay_max,

770 toffset_noise_extract=toffset_noise_extract, # noqa

771 tfade=tfade,

772 freqlimits=freqlimits,

773 deltat=deltat,

774 want_incomplete=debug,

775 extend_incomplete=self.extend_incomplete)

776

777 trs_restituted_group.extend(trs_restituted_this)

778 trs_raw_group.extend(trs_raw_this)

779

780 trs_projected.extend(

781 trace.project(

782 trs_restituted_group, matrix,

783 in_channels, out_channels))

784

785 trs_restituted.extend(trs_restituted_group)

786 trs_raw.extend(trs_raw_group)

787

788 except NotFound as e:

789 exceptions.append((in_channels, out_channels, e))

790

791 if not trs_projected:

792 err = []

793 for (in_channels, out_channels, e) in exceptions:

794 sin = ', '.join(c.name for c in in_channels)

795 sout = ', '.join(c.name for c in out_channels)

796 err.append('(%s) -> (%s): %s' % (sin, sout, e))

797

798 raise NotFound('\n'.join(err))

799

800 for tr in trs_projected:

801 sc = self.station_corrections.get(tr.nslc_id, None)

802 if sc:

803 if self.apply_correction_factors:

804 tr.ydata /= sc.factor

805

806 if self.apply_correction_delays:

807 tr.shift(-sc.delay)

808

809 if tmin is not None and tmax is not None:

810 tr.chop(tmin, tmax)

811

812 if syn_test:

813 trs_projected_synthetic = []

814 for tr in trs_projected:

815 if tr.channel == channel:

816 tr_syn = syn_test.get_waveform(

817 tr.nslc_id, tmin, tmax,

818 tfade=tfade, freqlimits=freqlimits)

819

820 if tr_syn:

821 if syn_test.real_noise_scale != 0.0:

822 tr_syn = tr_syn.copy()

823 tr_noise = tr.copy()

824 tr_noise.set_ydata(

825 tr_noise.get_ydata()

826 * syn_test.real_noise_scale)

827

828 tr_syn.add(tr_noise)

829

830 trs_projected_synthetic.append(tr_syn)

831

832 trs_projected = trs_projected_synthetic

833

834 if cache is not None:

835 for tr in trs_projected:

836 cache[tr.nslc_id + cache_k] = tr

837

838 tr_return = None

839 for tr in trs_projected:

840 if tr.channel == channel:

841 tr_return = tr

842

843 if debug:

844 return trs_projected, trs_restituted, trs_raw, tr_return

845

846 elif tr_return:

847 return tr_return

848

849 else:

850 raise NotFound(

851 'waveform not available', station.nsl() + (channel,))

852

853 except NotFound:

854 if cache is not None:

855 cache[nslc + cache_k] = None

856 raise

857

858 def get_waveform(self, obj, tinc_cache=None, **kwargs):

859 tmin = kwargs['tmin']

860 tmax = kwargs['tmax']

861 if tinc_cache is not None:

862 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache

863 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache

864 else:

865 tmin_r = tmin

866 tmax_r = tmax

867

868 kwargs['tmin'] = tmin_r

869 kwargs['tmax'] = tmax_r

870

871 if kwargs.get('debug', None):

872 return self._get_waveform(obj, **kwargs)

873 else:

874 tr = self._get_waveform(obj, **kwargs)

875 return tr.chop(tmin, tmax, inplace=False)

876

877 def get_events(self, magmin=None, event_names=None):

878 evs = []

879 for ev in self.events:

880 if ((magmin is None or ev.magnitude >= magmin) and

881 (event_names is None or ev.name in event_names)):

882 evs.append(ev)

883

884 return evs

885

886 def get_event_by_time(self, t, magmin=None):

887 evs = self.get_events(magmin=magmin)

888 ev_x = None

889 for ev in evs:

890 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):

891 ev_x = ev

892

893 if not ev_x:

894 raise NotFound(

895 'No event information matching criteria (t=%s, magmin=%s).' %

896 (t, magmin))

897

898 return ev_x

899

900 def get_event(self):

901 if self._event_name is None:

902 raise NotFound('No main event selected in dataset!')

903

904 for ev in self.events:

905 if ev.name == self._event_name:

906 return ev

907

908 raise NotFound('No such event: %s' % self._event_name)

909

910 def get_picks(self):

911 if self._picks is None:

912 hash_to_name = {}

913 names = set()

914 for marker in self.pick_markers:

915 if isinstance(marker, pmarker.EventMarker):

916 name = marker.get_event().name

917 if name in names:

918 raise DatasetError(

919 'Duplicate event name "%s" in picks.' % name)

920

921 names.add(name)

922 hash_to_name[marker.get_event_hash()] = name

923

924 for ev in self.events:

925 hash_to_name[ev.get_hash()] = ev.name

926

927 picks = {}

928 for marker in self.pick_markers:

929 if isinstance(marker, pmarker.PhaseMarker):

930 ehash = marker.get_event_hash()

931

932 nsl = marker.one_nslc()[:3]

933 phasename = marker.get_phasename()

934

935 if ehash is None or ehash not in hash_to_name:

936 raise DatasetError(

937 'Unassociated pick: %s.%s.%s, %s' %

938 (nsl + (phasename, )))

939

940 eventname = hash_to_name[ehash]

941

942 if (nsl, phasename, eventname) in picks:

943 raise DatasetError(

944 'Duplicate pick: %s.%s.%s, %s' %

945 (nsl + (phasename, )))

946

947 picks[nsl, phasename, eventname] = marker

948

949 self._picks = picks

950

951 return self._picks

952

953 def get_pick(self, eventname, obj, phasename):

954 nsl = self.get_nsl(obj)

955 return self.get_picks().get((nsl, phasename, eventname), None)

956

957

958class DatasetConfig(HasPaths):

959 ''' Configuration for a Grond `Dataset` object. '''

960

961 stations_path = Path.T(

962 optional=True,

963 help='List of files with station coordinates in Pyrocko format.')

964 stations_stationxml_paths = List.T(

965 Path.T(),

966 optional=True,

967 help='List of files with station coordinates in StationXML format.')

968 events_path = Path.T(

969 optional=True,

970 help='File with hypocenter information and possibly'

971 ' reference solution')

972 waveform_paths = List.T(

973 Path.T(),

974 optional=True,

975 help='List of directories with raw waveform data')

976 clippings_path = Path.T(

977 optional=True)

978 responses_sacpz_path = Path.T(

979 optional=True,

980 help='List of SACPZ response files for restitution of'

981 ' the raw waveform data.')

982 responses_stationxml_paths = List.T(

983 Path.T(),

984 optional=True,

985 help='List of StationXML response files for restitution of'

986 ' the raw waveform data.')

987 station_corrections_path = Path.T(

988 optional=True,

989 help='File containing station correction informations.')

990 apply_correction_factors = Bool.T(

991 optional=True,

992 default=True,

993 help='Apply correction factors from station corrections.')

994 apply_correction_delays = Bool.T(

995 optional=True,

996 default=True,

997 help='Apply correction delays from station corrections.')

998 apply_displaced_sampling_workaround = Bool.T(

999 optional=True,

1000 default=False,

1001 help='Work around displaced sampling issues.')

1002 extend_incomplete = Bool.T(

1003 default=False,

1004 help='Extend incomplete seismic traces.')

1005 picks_paths = List.T(

1006 Path.T())

1007 blacklist_paths = List.T(

1008 Path.T(),

1009 help='List of text files with blacklisted stations.')

1010 blacklist = List.T(

1011 String.T(),

1012 help='Stations/components to be excluded according to their STA, '

1013 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')

1014 whitelist_paths = List.T(

1015 Path.T(),

1016 help='List of text files with whitelisted stations.')

1017 whitelist = List.T(

1018 String.T(),

1019 optional=True,

1020 help='If not None, list of stations/components to include according '

1021 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '

1022 'Note: ''when whitelisting on channel level, both, the raw and '

1023 'the processed channel codes have to be listed.')

1024 synthetic_test = SyntheticTest.T(

1025 optional=True)

1026

1027 kite_scene_paths = List.T(

1028 Path.T(),

1029 optional=True,

1030 help='List of directories for the InSAR scenes.')

1031

1032 gnss_campaign_paths = List.T(

1033 Path.T(),

1034 optional=True,

1035 help='List of directories for the GNSS campaign data.')

1036

1037 def __init__(self, *args, **kwargs):

1038 HasPaths.__init__(self, *args, **kwargs)

1039 self._ds = {}

1040

1041 def get_event_names(self):

1042 logger.info('Loading events ...')

1043

1044 def extra(path):

1045 return expand_template(path, dict(

1046 event_name='*'))

1047

1048 def fp(path):

1049 return self.expand_path(path, extra=extra)

1050

1051 def check_events(events, fn):

1052 for ev in events:

1053 if not ev.name:

1054 logger.warning('Event in %s has no name!', fn)

1055 return

1056 if not ev.lat or not ev.lon:

1057 logger.warning('Event %s has inconsistent coordinates!',

1058 ev.name)

1059 if not ev.depth:

1060 logger.warning('Event %s has no depth!', ev.name)

1061 if not ev.time:

1062 logger.warning('Event %s has no time!', ev.name)

1063

1064 events = []

1065 events_path = fp(self.events_path)

1066 fns = glob.glob(events_path)

1067 if not fns:

1068 raise DatasetError('No event files matching "%s".' % events_path)

1069

1070 for fn in fns:

1071 logger.debug('Loading from file %s' % fn)

1072 ev = model.load_events(filename=fn)

1073 check_events(ev, fn)

1074

1075 events.extend(ev)

1076

1077 event_names = [ev_.name for ev_ in events]

1078 event_names.sort()

1079 return event_names

1080

1081 def get_dataset(self, event_name):

1082 if event_name not in self._ds:

1083 def extra(path):

1084 return expand_template(path, dict(

1085 event_name=event_name))

1086

1087 def fp(path):

1088 p = self.expand_path(path, extra=extra)

1089 if p is None:

1090 return None

1091

1092 if isinstance(p, list):

1093 for path in p:

1094 if not op.exists(path):

1095 logger.warning('Path %s does not exist.' % path)

1096 else:

1097 if not op.exists(p):

1098 logger.warning('Path %s does not exist.' % p)

1099

1100 return p

1101

1102 ds = Dataset(event_name)

1103 try:

1104 ds.add_events(filename=fp(self.events_path))

1105

1106 ds.add_stations(

1107 pyrocko_stations_filename=fp(self.stations_path),

1108 stationxml_filenames=fp(self.stations_stationxml_paths))

1109

1110 if self.waveform_paths:

1111 ds.add_waveforms(paths=fp(self.waveform_paths))

1112

1113 if self.kite_scene_paths:

1114 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))

1115

1116 if self.gnss_campaign_paths:

1117 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))

1118

1119 if self.clippings_path:

1120 ds.add_clippings(markers_filename=fp(self.clippings_path))

1121

1122 if self.responses_sacpz_path:

1123 ds.add_responses(

1124 sacpz_dirname=fp(self.responses_sacpz_path))

1125

1126 if self.responses_stationxml_paths:

1127 ds.add_responses(

1128 stationxml_filenames=fp(

1129 self.responses_stationxml_paths))

1130

1131 if self.station_corrections_path:

1132 ds.add_station_corrections(

1133 filename=fp(self.station_corrections_path))

1134

1135 ds.apply_correction_factors = self.apply_correction_factors

1136 ds.apply_correction_delays = self.apply_correction_delays

1137 ds.apply_displaced_sampling_workaround = \

1138 self.apply_displaced_sampling_workaround

1139 ds.extend_incomplete = self.extend_incomplete

1140

1141 for picks_path in self.picks_paths:

1142 ds.add_picks(

1143 filename=fp(picks_path))

1144

1145 ds.add_blacklist(self.blacklist)

1146 ds.add_blacklist(filenames=fp(self.blacklist_paths))

1147 if self.whitelist:

1148 ds.add_whitelist(self.whitelist)

1149 if self.whitelist_paths:

1150 ds.add_whitelist(filenames=fp(self.whitelist_paths))

1151

1152 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))

1153 self._ds[event_name] = ds

1154 except (FileLoadError, OSError) as e:

1155 raise DatasetError(str(e))

1156

1157 return self._ds[event_name]

1158

1159

1160__all__ = '''

1161 Dataset

1162 DatasetConfig

1163 DatasetError

1164 InvalidObject

1165 NotFound

1166 StationCorrection

1167 load_station_corrections

1168 dump_station_corrections

1169'''.split()