Coverage for /usr/local/lib/python3.9/dist-packages/grond/dataset.py: 69%

1import glob

2import copy

3import os.path as op

4import logging

5import math

6import numpy as num

8from collections import defaultdict

9from pyrocko import util, pile, model, config, trace, \

10 marker as pmarker

11from pyrocko.io.io_common import FileLoadError

12from pyrocko.fdsn import enhanced_sacpz, station as fs

13from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,

14 load_all)

16from pyrocko import gf

18from .meta import Path, HasPaths, expand_template, GrondError

20from .synthetic_tests import SyntheticTest

22guts_prefix = 'grond'

23logger = logging.getLogger('grond.dataset')

26def quote_paths(paths):

27 return ', '.join('"%s"' % path for path in paths)

30class InvalidObject(Exception):

31 pass

34class NotFound(Exception):

35 def __init__(self, reason, codes=None, time_range=None):

36 self.reason = reason

37 self.time_range = time_range

38 self.codes = codes

40 def __str__(self):

41 s = self.reason

42 if self.codes:

43 s += ' (%s)' % '.'.join(self.codes)

45 if self.time_range:

46 s += ' (%s - %s)' % (

47 util.time_to_str(self.time_range[0]),

48 util.time_to_str(self.time_range[1]))

50 return s

53class DatasetError(GrondError):

54 pass

57class StationCorrection(Object):

58 codes = Tuple.T(4, String.T())

59 delay = Float.T()

60 factor = Float.T()

63def load_station_corrections(filename):

64 scs = load_all(filename=filename)

65 for sc in scs:

66 assert isinstance(sc, StationCorrection)

68 return scs

71def dump_station_corrections(station_corrections, filename):

72 return dump_all(station_corrections, filename=filename)

75class Dataset(object):

77 def __init__(self, event_name=None):

78 self.events = []

79 self.pile = pile.Pile()

80 self.stations = {}

81 self.responses = defaultdict(list)

82 self.responses_stationxml = []

83 self.clippings = {}

84 self.blacklist = set()

85 self.whitelist_nslc = None

86 self.whitelist_nsl_xx = None

87 self.whitelist = None

88 self.station_corrections = {}

89 self.station_factors = {}

90 self.pick_markers = []

91 self.apply_correction_delays = True

92 self.apply_correction_factors = True

93 self.apply_displaced_sampling_workaround = False

94 self.extend_incomplete = False

95 self.clip_handling = 'by_nsl'

96 self.kite_scenes = []

97 self.gnss_campaigns = []

98 self.synthetic_test = None

99 self._picks = None

100 self._cache = {}

101 self._event_name = event_name

102

103 def empty_cache(self):

104 self._cache = {}

105

106 def set_synthetic_test(self, synthetic_test):

107 self.synthetic_test = synthetic_test

108

109 def add_stations(

110 self,

111 stations=None,

112 pyrocko_stations_filename=None,

113 stationxml_filenames=None):

114

115 if stations is not None:

116 for station in stations:

117 self.stations[station.nsl()] = station

118

119 if pyrocko_stations_filename is not None:

120 logger.debug(

121 'Loading stations from file "%s"...' %

122 pyrocko_stations_filename)

123

124 for station in model.load_stations(pyrocko_stations_filename):

125 self.stations[station.nsl()] = station

126

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:

128

129 for stationxml_filename in stationxml_filenames:

130 if not op.exists(stationxml_filename):

131 continue

132

133 logger.debug(

134 'Loading stations from StationXML file "%s"...' %

135 stationxml_filename)

136

137 sx = fs.load_xml(filename=stationxml_filename)

138 ev = self.get_event()

139 stations = sx.get_pyrocko_stations(time=ev.time)

140 if len(stations) == 0:

141 logger.warning(

142 'No stations found for time %s in file "%s".' % (

143 util.time_to_str(ev.time), stationxml_filename))

144

145 for station in stations:

146 logger.debug('Adding station: %s.%s.%s' % station.nsl())

147 channels = station.get_channels()

148 if len(channels) == 1 and channels[0].name.endswith('Z'):

149 logger.warning(

150 'Station "%s" has vertical component'

151 ' information only, adding mocked channels.'

152 % station.nsl_string())

153 station.add_channel(

154 model.Channel(channels[0].name[:-1] + 'N'))

155 station.add_channel(

156 model.Channel(channels[0].name[:-1] + 'E'))

157

158 self.stations[station.nsl()] = station

159

160 def add_events(self, events=None, filename=None):

161 if events is not None:

162 self.events.extend(events)

163

164 if filename is not None:

165 logger.debug('Loading events from file "%s"...' % filename)

166 try:

167 events = model.load_events(filename)

168 self.events.extend(events)

169 logger.info(

170 'Loading events from %s: %i events found.' %

171 (filename, len(events)))

172 except Exception as e:

173 logger.warning('Could not load events from %s!', filename)

174 raise e

175

176 def add_waveforms(self, paths, regex=None, fileformat='detect',

177 show_progress=False):

178 cachedirname = config.config().cache_dir

179

180 logger.debug('Selecting waveform files %s...' % quote_paths(paths))

181 fns = util.select_files(paths, include=regex,

182 show_progress=show_progress)

183 cache = pile.get_cache(cachedirname)

184 logger.debug('Scanning waveform files %s...' % quote_paths(paths))

185 self.pile.load_files(sorted(fns), cache=cache,

186 fileformat=fileformat,

187 show_progress=show_progress)

188

189 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):

190 if sacpz_dirname:

191 logger.debug(

192 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)

193 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):

194 self.responses[x.codes].append(x)

195

196 if stationxml_filenames:

197 for stationxml_filename in stationxml_filenames:

198 if not op.exists(stationxml_filename):

199 continue

200

201 logger.debug(

202 'Loading StationXML responses from "%s"...' %

203 stationxml_filename)

204

205 self.responses_stationxml.append(

206 fs.load_xml(filename=stationxml_filename))

207

208 def add_clippings(self, markers_filename):

209 markers = pmarker.load_markers(markers_filename)

210 clippings = {}

211 for marker in markers:

212 nslc = marker.one_nslc()

213 nsl = nslc[:3]

214 if nsl not in clippings:

215 clippings[nsl] = []

216

217 if nslc not in clippings:

218 clippings[nslc] = []

219

220 clippings[nsl].append(marker.tmin)

221 clippings[nslc].append(marker.tmin)

222

223 for k, times in clippings.items():

224 atimes = num.array(times, dtype=float)

225 if k not in self.clippings:

226 self.clippings[k] = atimes

227 else:

228 self.clippings[k] = num.concatenate(self.clippings, atimes)

229

230 def add_blacklist(self, blacklist=[], filenames=None):

231 logger.debug('Loading blacklisted stations...')

232 if filenames:

233 blacklist = list(blacklist)

234 for filename in filenames:

235 if op.exists(filename):

236 with open(filename, 'r') as f:

237 blacklist.extend(

238 s.strip() for s in f.read().splitlines())

239 else:

240 logger.warning('No such blacklist file: %s' % filename)

241

242 for x in blacklist:

243 if isinstance(x, str):

244 x = tuple(x.split('.'))

245 self.blacklist.add(x)

246

247 def add_whitelist(self, whitelist=[], filenames=None):

248 logger.debug('Loading whitelisted stations...')

249 if filenames:

250 whitelist = list(whitelist)

251 for filename in filenames:

252 with open(filename, 'r') as f:

253 whitelist.extend(s.strip() for s in f.read().splitlines())

254

255 if self.whitelist_nslc is None:

256 self.whitelist_nslc = set()

257 self.whitelist = set()

258 self.whitelist_nsl_xx = set()

259

260 for x in whitelist:

261 if isinstance(x, str):

262 x = tuple(x.split('.'))

263 if len(x) == 4:

264 self.whitelist_nslc.add(x)

265 self.whitelist_nsl_xx.add(x[:3])

266 else:

267 self.whitelist.add(x)

268

269 def add_station_corrections(self, filename):

270 self.station_corrections.update(

271 (sc.codes, sc) for sc in load_station_corrections(filename))

272

273 def add_picks(self, filename):

274 self.pick_markers.extend(

275 pmarker.load_markers(filename))

276

277 self._picks = None

278

279 def add_gnss_campaigns(self, paths):

280 paths = util.select_files(

281 paths,

282 include=r'\.yml|\.yaml',

283 show_progress=False)

284

285 for path in paths:

286 self.add_gnss_campaign(filename=path)

287

288 def add_gnss_campaign(self, filename):

289 try:

290 from pyrocko.model import gnss # noqa

291 except ImportError:

292 raise ImportError('Module pyrocko.model.gnss not found,'

293 ' please upgrade pyrocko!')

294 logger.debug('Loading GNSS campaign from "%s"...' % filename)

295

296 campaign = load_all(filename=filename)

297 self.gnss_campaigns.append(campaign[0])

298

299 def add_kite_scenes(self, paths):

300 logger.info('Loading kite InSAR scenes...')

301 paths = util.select_files(

302 paths,

303 include=r'\.npz',

304 show_progress=False)

305

306 for path in paths:

307 self.add_kite_scene(filename=path)

308

309 if not self.kite_scenes:

310 logger.warning('Could not find any kite scenes at %s.' %

311 quote_paths(self.kite_scene_paths))

312

313 def add_kite_scene(self, filename):

314 try:

315 from kite import Scene

316 except ImportError:

317 raise ImportError('Module kite could not be imported,'

318 ' please install from https://pyrocko.org.')

319 logger.debug('Loading kite scene from "%s"...' % filename)

320

321 scene = Scene.load(filename)

322 scene._log.setLevel(logger.level)

323

324 try:

325 self.get_kite_scene(scene.meta.scene_id)

326 except NotFound:

327 self.kite_scenes.append(scene)

328 else:

329 raise AttributeError('Kite scene_id not unique for "%s".'

330 % filename)

331

332 def is_blacklisted(self, obj):

333 try:

334 nslc = self.get_nslc(obj)

335 if nslc in self.blacklist:

336 return True

337

338 except InvalidObject:

339 pass

340

341 nsl = self.get_nsl(obj)

342 return (

343 nsl in self.blacklist or

344 nsl[1:2] in self.blacklist or

345 nsl[:2] in self.blacklist)

346

347 def is_whitelisted(self, obj):

348 if self.whitelist_nslc is None:

349 return True

350

351 nsl = self.get_nsl(obj)

352

353 if (

354 nsl in self.whitelist or

355 nsl[1:2] in self.whitelist or

356 nsl[:2] in self.whitelist):

357

358 return True

359

360 try:

361 nslc = self.get_nslc(obj)

362 if nslc in self.whitelist_nslc:

363 return True

364

365 except InvalidObject:

366 return nsl in self.whitelist_nsl_xx

367

368 def has_clipping(self, nsl_or_nslc, tmin, tmax):

369 if nsl_or_nslc not in self.clippings:

370 return False

371

372 atimes = self.clippings[nsl_or_nslc]

373 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))

374

375 def get_nsl(self, obj):

376 if isinstance(obj, trace.Trace):

377 net, sta, loc, _ = obj.nslc_id

378 elif isinstance(obj, model.Station):

379 net, sta, loc = obj.nsl()

380 elif isinstance(obj, gf.Target):

381 net, sta, loc, _ = obj.codes

382 elif isinstance(obj, tuple) and len(obj) in (3, 4):

383 net, sta, loc = obj[:3]

384 else:

385 raise InvalidObject(

386 'Cannot get nsl code from given object of type "%s".'

387 % type(obj))

388

389 return net, sta, loc

390

391 def get_nslc(self, obj):

392 if isinstance(obj, trace.Trace):

393 return obj.nslc_id

394 elif isinstance(obj, gf.Target):

395 return obj.codes

396 elif isinstance(obj, tuple) and len(obj) == 4:

397 return obj

398 else:

399 raise InvalidObject(

400 'Cannot get nslc code from given object "%s"' % type(obj))

401

402 def get_tmin_tmax(self, obj):

403 if isinstance(obj, trace.Trace):

404 return obj.tmin, obj.tmax

405 else:

406 raise InvalidObject(

407 'Cannot get tmin and tmax from given object of type "%s"' %

408 type(obj))

409

410 def get_station(self, obj):

411 if self.is_blacklisted(obj):

412 raise NotFound('Station is blacklisted:', self.get_nsl(obj))

413

414 if not self.is_whitelisted(obj):

415 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))

416

417 if isinstance(obj, model.Station):

418 return obj

419

420 net, sta, loc = self.get_nsl(obj)

421

422 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]

423 for k in keys:

424 if k in self.stations:

425 return self.stations[k]

426

427 raise NotFound('No station information:', keys)

428

429 def get_stations(self):

430 return [self.stations[k] for k in sorted(self.stations)

431 if not self.is_blacklisted(self.stations[k])

432 and self.is_whitelisted(self.stations[k])]

433

434 def get_kite_scenes(self):

435 return self.kite_scenes

436

437 def get_kite_scene(self, scene_id=None):

438 if scene_id is None:

439 if len(self.kite_scenes) == 0:

440 raise AttributeError('No kite displacements defined.')

441 return self.kite_scenes[0]

442 else:

443 for scene in self.kite_scenes:

444 if scene.meta.scene_id == scene_id:

445 return scene

446 raise NotFound('No kite scene with id "%s" defined.' % scene_id)

447

448 def get_gnss_campaigns(self):

449 return self.gnss_campaigns

450

451 def get_gnss_campaign(self, name):

452 for camp in self.gnss_campaigns:

453 if camp.name == name:

454 return camp

455 raise NotFound('GNSS campaign %s not found!' % name)

456

457 def get_response(self, obj, quantity='displacement'):

458 if (self.responses is None or len(self.responses) == 0) \

459 and (self.responses_stationxml is None

460 or len(self.responses_stationxml) == 0):

461

462 raise NotFound('No response information available.')

463

464 quantity_to_unit = {

465 'displacement': 'M',

466 'velocity': 'M/S',

467 'acceleration': 'M/S**2'}

468

469 if self.is_blacklisted(obj):

470 raise NotFound('Response is blacklisted:', self.get_nslc(obj))

471

472 if not self.is_whitelisted(obj):

473 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))

474

475 net, sta, loc, cha = self.get_nslc(obj)

476 tmin, tmax = self.get_tmin_tmax(obj)

477

478 keys_x = [

479 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]

480

481 keys = []

482 for k in keys_x:

483 if k not in keys:

484 keys.append(k)

485

486 candidates = []

487 for k in keys:

488 if k in self.responses:

489 for x in self.responses[k]:

490 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):

491 if quantity == 'displacement':

492 candidates.append(x.response)

493 elif quantity == 'velocity':

494 candidates.append(trace.MultiplyResponse([

495 x.response,

496 trace.DifferentiationResponse()]))

497 elif quantity == 'acceleration':

498 candidates.append(trace.MultiplyResponse([

499 x.response,

500 trace.DifferentiationResponse(2)]))

501 else:

502 assert False

503

504 for sx in self.responses_stationxml:

505 try:

506 candidates.append(

507 sx.get_pyrocko_response(

508 (net, sta, loc, cha),

509 timespan=(tmin, tmax),

510 fake_input_units=quantity_to_unit[quantity]))

511

512 except (fs.NoResponseInformation, fs.MultipleResponseInformation):

513 pass

514

515 if len(candidates) == 1:

516 return candidates[0]

517

518 elif len(candidates) == 0:

519 raise NotFound('No response found:', (net, sta, loc, cha))

520 else:

521 raise NotFound('Multiple responses found:', (net, sta, loc, cha))

522

523 def get_waveform_raw(

524 self, obj,

525 tmin,

526 tmax,

527 tpad=0.,

528 toffset_noise_extract=0.,

529 want_incomplete=False,

530 extend_incomplete=False):

531

532 net, sta, loc, cha = self.get_nslc(obj)

533

534 if self.is_blacklisted((net, sta, loc, cha)):

535 raise NotFound(

536 'Waveform is blacklisted:', (net, sta, loc, cha))

537

538 if not self.is_whitelisted((net, sta, loc, cha)):

539 raise NotFound(

540 'Waveform is not on whitelist:', (net, sta, loc, cha))

541

542 if self.clip_handling == 'by_nsl':

543 if self.has_clipping((net, sta, loc), tmin, tmax):

544 raise NotFound(

545 'Waveform clipped:', (net, sta, loc))

546

547 elif self.clip_handling == 'by_nslc':

548 if self.has_clipping((net, sta, loc, cha), tmin, tmax):

549 raise NotFound(

550 'Waveform clipped:', (net, sta, loc, cha))

551

552 trs = self.pile.all(

553 tmin=tmin+toffset_noise_extract,

554 tmax=tmax+toffset_noise_extract,

555 tpad=tpad,

556 trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha),

557 want_incomplete=want_incomplete or extend_incomplete)

558

559 if self.apply_displaced_sampling_workaround:

560 for tr in trs:

561 tr.snap()

562

563 if toffset_noise_extract != 0.0:

564 for tr in trs:

565 tr.shift(-toffset_noise_extract)

566

567 if extend_incomplete and len(trs) == 1:

568 trs[0].extend(

569 tmin + toffset_noise_extract - tpad,

570 tmax + toffset_noise_extract + tpad,

571 fillmethod='repeat')

572

573 if not want_incomplete and len(trs) != 1:

574 if len(trs) == 0:

575 message = 'Waveform missing or incomplete.'

576 else:

577 message = 'Waveform has gaps.'

578

579 raise NotFound(

580 message,

581 codes=(net, sta, loc, cha),

582 time_range=(

583 tmin + toffset_noise_extract - tpad,

584 tmax + toffset_noise_extract + tpad))

585

586 return trs

587

588 def get_waveform_restituted(

589 self,

590 obj, quantity='displacement',

591 tmin=None, tmax=None, tpad=0.,

592 tfade=0., freqlimits=None, deltat=None,

593 toffset_noise_extract=0.,

594 want_incomplete=False,

595 extend_incomplete=False):

596

597 trs_raw = self.get_waveform_raw(

598 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade,

599 toffset_noise_extract=toffset_noise_extract,

600 want_incomplete=want_incomplete,

601 extend_incomplete=extend_incomplete)

602

603 trs_restituted = []

604 for tr in trs_raw:

605 if deltat is not None:

606 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)

607 tr.deltat = deltat

608

609 resp = self.get_response(tr, quantity=quantity)

610 try:

611 trs_restituted.append(

612 tr.transfer(

613 tfade=tfade, freqlimits=freqlimits,

614 transfer_function=resp, invert=True, demean=True))

615

616 except trace.InfiniteResponse:

617 raise NotFound(

618 'Instrument response deconvolution failed '

619 '(divide by zero)', tr.nslc_id)

620

621 return trs_restituted, trs_raw

622

623 def _get_projections(

624 self, station, backazimuth, source, target, tmin, tmax):

625

626 # fill in missing channel information (happens when station file

627 # does not contain any channel information)

628 if not station.get_channels():

629 station = copy.deepcopy(station)

630

631 nsl = station.nsl()

632 trs = self.pile.all(

633 tmin=tmin, tmax=tmax,

634 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,

635 load_data=False)

636

637 channels = list(set(tr.channel for tr in trs))

638 station.set_channels_by_name(*channels)

639

640 projections = []

641 projections.extend(station.guess_projections_to_enu(

642 out_channels=('E', 'N', 'Z')))

643

644 if source is not None and target is not None:

645 backazimuth = source.azibazi_to(target)[1]

646

647 if backazimuth is not None:

648 projections.extend(station.guess_projections_to_rtu(

649 out_channels=('R', 'T', 'Z'),

650 backazimuth=backazimuth))

651

652 if not projections:

653 raise NotFound(

654 'Cannot determine projection of data components:',

655 station.nsl())

656

657 return projections

658

659 def _get_waveform(

660 self,

661 obj, quantity='displacement',

662 tmin=None, tmax=None, tpad=0.,

663 tfade=0., freqlimits=None, deltat=None, cache=None,

664 backazimuth=None,

665 source=None,

666 target=None,

667 debug=False):

668

669 assert not debug or (debug and cache is None)

670

671 if cache is True:

672 cache = self._cache

673

674 _, _, _, channel = self.get_nslc(obj)

675 station = self.get_station(self.get_nsl(obj))

676

677 nslc = station.nsl() + (channel,)

678

679 if self.is_blacklisted(nslc):

680 raise NotFound(

681 'Waveform is blacklisted:', nslc)

682

683 if not self.is_whitelisted(nslc):

684 raise NotFound(

685 'Waveform is not on whitelist:', nslc)

686

687 assert tmin is not None

688 assert tmax is not None

689

690 tmin = float(tmin)

691 tmax = float(tmax)

692

693 nslc = tuple(nslc)

694

695 cache_k = nslc + (

696 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)

697 if cache is not None and (nslc + cache_k) in cache:

698 obj = cache[nslc + cache_k]

699 if isinstance(obj, Exception):

700 raise obj

701 elif obj is None:

702 raise NotFound('Waveform not found!', nslc)

703 else:

704 return obj

705

706 syn_test = self.synthetic_test

707 toffset_noise_extract = 0.0

708 if syn_test:

709 if not syn_test.respect_data_availability:

710 if syn_test.real_noise_scale != 0.0:

711 raise DatasetError(

712 'respect_data_availability=False and '

713 'addition of real noise cannot be combined.')

714

715 tr = syn_test.get_waveform(

716 nslc, tmin, tmax,

717 tfade=tfade,

718 freqlimits=freqlimits)

719

720 if cache is not None:

721 cache[tr.nslc_id + cache_k] = tr

722

723 if debug:

724 return [], [], []

725 else:

726 return tr

727

728 if syn_test.real_noise_scale != 0.0:

729 toffset_noise_extract = syn_test.real_noise_toffset

730

731 abs_delays = []

732 for ocha in 'ENZRT':

733 sc = self.station_corrections.get(station.nsl() + (channel,), None)

734 if sc:

735 abs_delays.append(abs(sc.delay))

736

737 if abs_delays:

738 abs_delay_max = max(abs_delays)

739 else:

740 abs_delay_max = 0.0

741

742 projections = self._get_projections(

743 station, backazimuth, source, target, tmin, tmax)

744

745 try:

746 trs_projected = []

747 trs_restituted = []

748 trs_raw = []

749 exceptions = []

750 for matrix, in_channels, out_channels in projections:

751 deps = trace.project_dependencies(

752 matrix, in_channels, out_channels)

753

754 try:

755 trs_restituted_group = []

756 trs_raw_group = []

757 if channel in deps:

758 for cha in deps[channel]:

759 trs_restituted_this, trs_raw_this = \

760 self.get_waveform_restituted(

761 station.nsl() + (cha,),

762 quantity=quantity,

763 tmin=tmin, tmax=tmax,

764 tpad=tpad+abs_delay_max,

765 toffset_noise_extract=toffset_noise_extract, # noqa

766 tfade=tfade,

767 freqlimits=freqlimits,

768 deltat=deltat,

769 want_incomplete=debug,

770 extend_incomplete=self.extend_incomplete)

771

772 trs_restituted_group.extend(trs_restituted_this)

773 trs_raw_group.extend(trs_raw_this)

774

775 trs_projected.extend(

776 trace.project(

777 trs_restituted_group, matrix,

778 in_channels, out_channels))

779

780 trs_restituted.extend(trs_restituted_group)

781 trs_raw.extend(trs_raw_group)

782

783 except NotFound as e:

784 exceptions.append((in_channels, out_channels, e))

785

786 if not trs_projected:

787 err = []

788 for (in_channels, out_channels, e) in exceptions:

789 sin = ', '.join(c.name for c in in_channels)

790 sout = ', '.join(c.name for c in out_channels)

791 err.append('(%s) -> (%s): %s' % (sin, sout, e))

792

793 raise NotFound('\n'.join(err))

794

795 for tr in trs_projected:

796 sc = self.station_corrections.get(tr.nslc_id, None)

797 if sc:

798 if self.apply_correction_factors:

799 tr.ydata /= sc.factor

800

801 if self.apply_correction_delays:

802 tr.shift(-sc.delay)

803

804 if tmin is not None and tmax is not None:

805 tr.chop(tmin, tmax)

806

807 if syn_test:

808 trs_projected_synthetic = []

809 for tr in trs_projected:

810 if tr.channel == channel:

811 tr_syn = syn_test.get_waveform(

812 tr.nslc_id, tmin, tmax,

813 tfade=tfade, freqlimits=freqlimits)

814

815 if tr_syn:

816 if syn_test.real_noise_scale != 0.0:

817 tr_syn = tr_syn.copy()

818 tr_noise = tr.copy()

819 tr_noise.set_ydata(

820 tr_noise.get_ydata()

821 * syn_test.real_noise_scale)

822

823 tr_syn.add(tr_noise)

824

825 trs_projected_synthetic.append(tr_syn)

826

827 trs_projected = trs_projected_synthetic

828

829 if cache is not None:

830 for tr in trs_projected:

831 cache[tr.nslc_id + cache_k] = tr

832

833 tr_return = None

834 for tr in trs_projected:

835 if tr.channel == channel:

836 tr_return = tr

837

838 if debug:

839 return trs_projected, trs_restituted, trs_raw, tr_return

840

841 elif tr_return:

842 return tr_return

843

844 else:

845 raise NotFound(

846 'waveform not available', station.nsl() + (channel,))

847

848 except NotFound:

849 if cache is not None:

850 cache[nslc + cache_k] = None

851 raise

852

853 def get_waveform(self, obj, tinc_cache=None, **kwargs):

854 tmin = kwargs['tmin']

855 tmax = kwargs['tmax']

856 if tinc_cache is not None:

857 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache

858 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache

859 else:

860 tmin_r = tmin

861 tmax_r = tmax

862

863 kwargs['tmin'] = tmin_r

864 kwargs['tmax'] = tmax_r

865

866 if kwargs.get('debug', None):

867 return self._get_waveform(obj, **kwargs)

868 else:

869 tr = self._get_waveform(obj, **kwargs)

870 return tr.chop(tmin, tmax, inplace=False)

871

872 def get_events(self, magmin=None, event_names=None):

873 evs = []

874 for ev in self.events:

875 if ((magmin is None or ev.magnitude >= magmin) and

876 (event_names is None or ev.name in event_names)):

877 evs.append(ev)

878

879 return evs

880

881 def get_event_by_time(self, t, magmin=None):

882 evs = self.get_events(magmin=magmin)

883 ev_x = None

884 for ev in evs:

885 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):

886 ev_x = ev

887

888 if not ev_x:

889 raise NotFound(

890 'No event information matching criteria (t=%s, magmin=%s).' %

891 (t, magmin))

892

893 return ev_x

894

895 def get_event(self):

896 if self._event_name is None:

897 raise NotFound('No main event selected in dataset!')

898

899 for ev in self.events:

900 if ev.name == self._event_name:

901 return ev

902

903 raise NotFound('No such event: %s' % self._event_name)

904

905 def get_picks(self):

906 if self._picks is None:

907 hash_to_name = {}

908 names = set()

909 for marker in self.pick_markers:

910 if isinstance(marker, pmarker.EventMarker):

911 name = marker.get_event().name

912 if name in names:

913 raise DatasetError(

914 'Duplicate event name "%s" in picks.' % name)

915

916 names.add(name)

917 hash_to_name[marker.get_event_hash()] = name

918

919 for ev in self.events:

920 hash_to_name[ev.get_hash()] = ev.name

921

922 picks = {}

923 for marker in self.pick_markers:

924 if isinstance(marker, pmarker.PhaseMarker):

925 ehash = marker.get_event_hash()

926

927 nsl = marker.one_nslc()[:3]

928 phasename = marker.get_phasename()

929

930 if ehash is None or ehash not in hash_to_name:

931 raise DatasetError(

932 'Unassociated pick: %s.%s.%s, %s' %

933 (nsl + (phasename, )))

934

935 eventname = hash_to_name[ehash]

936

937 if (nsl, phasename, eventname) in picks:

938 raise DatasetError(

939 'Duplicate pick: %s.%s.%s, %s' %

940 (nsl + (phasename, )))

941

942 picks[nsl, phasename, eventname] = marker

943

944 self._picks = picks

945

946 return self._picks

947

948 def get_pick(self, eventname, obj, phasename):

949 nsl = self.get_nsl(obj)

950 return self.get_picks().get((nsl, phasename, eventname), None)

951

952

953class DatasetConfig(HasPaths):

954 ''' Configuration for a Grond `Dataset` object. '''

955

956 stations_path = Path.T(

957 optional=True,

958 help='List of files with station coordinates in Pyrocko format.')

959 stations_stationxml_paths = List.T(

960 Path.T(),

961 optional=True,

962 help='List of files with station coordinates in StationXML format.')

963 events_path = Path.T(

964 optional=True,

965 help='File with hypocenter information and possibly'

966 ' reference solution')

967 waveform_paths = List.T(

968 Path.T(),

969 optional=True,

970 help='List of directories with raw waveform data')

971 clippings_path = Path.T(

972 optional=True)

973 responses_sacpz_path = Path.T(

974 optional=True,

975 help='List of SACPZ response files for restitution of'

976 ' the raw waveform data.')

977 responses_stationxml_paths = List.T(

978 Path.T(),

979 optional=True,

980 help='List of StationXML response files for restitution of'

981 ' the raw waveform data.')

982 station_corrections_path = Path.T(

983 optional=True,

984 help='File containing station correction informations.')

985 apply_correction_factors = Bool.T(

986 optional=True,

987 default=True,

988 help='Apply correction factors from station corrections.')

989 apply_correction_delays = Bool.T(

990 optional=True,

991 default=True,

992 help='Apply correction delays from station corrections.')

993 apply_displaced_sampling_workaround = Bool.T(

994 optional=True,

995 default=False,

996 help='Work around displaced sampling issues.')

997 extend_incomplete = Bool.T(

998 default=False,

999 help='Extend incomplete seismic traces.')

1000 picks_paths = List.T(

1001 Path.T())

1002 blacklist_paths = List.T(

1003 Path.T(),

1004 help='List of text files with blacklisted stations.')

1005 blacklist = List.T(

1006 String.T(),

1007 help='Stations/components to be excluded according to their STA, '

1008 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')

1009 whitelist_paths = List.T(

1010 Path.T(),

1011 help='List of text files with whitelisted stations.')

1012 whitelist = List.T(

1013 String.T(),

1014 optional=True,

1015 help='If not None, list of stations/components to include according '

1016 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '

1017 'Note: ''when whitelisting on channel level, both, the raw and '

1018 'the processed channel codes have to be listed.')

1019 synthetic_test = SyntheticTest.T(

1020 optional=True)

1021

1022 kite_scene_paths = List.T(

1023 Path.T(),

1024 optional=True,

1025 help='List of directories for the InSAR scenes.')

1026

1027 gnss_campaign_paths = List.T(

1028 Path.T(),

1029 optional=True,

1030 help='List of directories for the GNSS campaign data.')

1031

1032 def __init__(self, *args, **kwargs):

1033 HasPaths.__init__(self, *args, **kwargs)

1034 self._ds = {}

1035

1036 def get_event_names(self):

1037 logger.info('Loading events ...')

1038

1039 def extra(path):

1040 return expand_template(path, dict(

1041 event_name='*'))

1042

1043 def fp(path):

1044 return self.expand_path(path, extra=extra)

1045

1046 def check_events(events, fn):

1047 for ev in events:

1048 if not ev.name:

1049 logger.warning('Event in %s has no name!', fn)

1050 return

1051 if not ev.lat or not ev.lon:

1052 logger.warning('Event %s has inconsistent coordinates!',

1053 ev.name)

1054 if not ev.depth:

1055 logger.warning('Event %s has no depth!', ev.name)

1056 if not ev.time:

1057 logger.warning('Event %s has no time!', ev.name)

1058

1059 events = []

1060 events_path = fp(self.events_path)

1061 fns = glob.glob(events_path)

1062 if not fns:

1063 raise DatasetError('No event files matching "%s".' % events_path)

1064

1065 for fn in fns:

1066 logger.debug('Loading from file %s' % fn)

1067 ev = model.load_events(filename=fn)

1068 check_events(ev, fn)

1069

1070 events.extend(ev)

1071

1072 event_names = [ev_.name for ev_ in events]

1073 event_names.sort()

1074 return event_names

1075

1076 def get_dataset(self, event_name):

1077 if event_name not in self._ds:

1078 def extra(path):

1079 return expand_template(path, dict(

1080 event_name=event_name))

1081

1082 def fp(path):

1083 p = self.expand_path(path, extra=extra)

1084 if p is None:

1085 return None

1086

1087 if isinstance(p, list):

1088 for path in p:

1089 if not op.exists(path):

1090 logger.warning('Path %s does not exist.' % path)

1091 else:

1092 if not op.exists(p):

1093 logger.warning('Path %s does not exist.' % p)

1094

1095 return p

1096

1097 ds = Dataset(event_name)

1098 try:

1099 ds.add_events(filename=fp(self.events_path))

1100

1101 ds.add_stations(

1102 pyrocko_stations_filename=fp(self.stations_path),

1103 stationxml_filenames=fp(self.stations_stationxml_paths))

1104

1105 if self.waveform_paths:

1106 ds.add_waveforms(paths=fp(self.waveform_paths))

1107

1108 if self.kite_scene_paths:

1109 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))

1110

1111 if self.gnss_campaign_paths:

1112 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))

1113

1114 if self.clippings_path:

1115 ds.add_clippings(markers_filename=fp(self.clippings_path))

1116

1117 if self.responses_sacpz_path:

1118 ds.add_responses(

1119 sacpz_dirname=fp(self.responses_sacpz_path))

1120

1121 if self.responses_stationxml_paths:

1122 ds.add_responses(

1123 stationxml_filenames=fp(

1124 self.responses_stationxml_paths))

1125

1126 if self.station_corrections_path:

1127 ds.add_station_corrections(

1128 filename=fp(self.station_corrections_path))

1129

1130 ds.apply_correction_factors = self.apply_correction_factors

1131 ds.apply_correction_delays = self.apply_correction_delays

1132 ds.apply_displaced_sampling_workaround = \

1133 self.apply_displaced_sampling_workaround

1134 ds.extend_incomplete = self.extend_incomplete

1135

1136 for picks_path in self.picks_paths:

1137 ds.add_picks(

1138 filename=fp(picks_path))

1139

1140 ds.add_blacklist(self.blacklist)

1141 ds.add_blacklist(filenames=fp(self.blacklist_paths))

1142 if self.whitelist:

1143 ds.add_whitelist(self.whitelist)

1144 if self.whitelist_paths:

1145 ds.add_whitelist(filenames=fp(self.whitelist_paths))

1146

1147 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))

1148 self._ds[event_name] = ds

1149 except (FileLoadError, OSError) as e:

1150 raise DatasetError(str(e))

1151

1152 return self._ds[event_name]

1153

1154

1155__all__ = '''

1156 Dataset

1157 DatasetConfig

1158 DatasetError

1159 InvalidObject

1160 NotFound

1161 StationCorrection

1162 load_station_corrections

1163 dump_station_corrections

1164'''.split()

Coverage for /usr/local/lib/python3.9/dist-packages/grond/dataset.py : 69%

674 statements 468 run 206 missing 0 excluded