Coverage for /usr/local/lib/python3.11/dist-packages/grond/dataset.py: 72%

1# https://pyrocko.org/grond - GPLv3

3# The Grond Developers, 21st Century

5import glob

6import copy

7import os.path as op

8import logging

9import math

10import numpy as num

12from collections import defaultdict

13from pyrocko import util, model, trace, marker as pmarker

14from pyrocko.io.io_common import FileLoadError

15from pyrocko.fdsn import enhanced_sacpz, station as fs

16from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,

17 load_all)

18from pyrocko import squirrel

20from pyrocko import gf

22from .meta import Path, HasPaths, expand_template, GrondError

24from .synthetic_tests import SyntheticTest

26guts_prefix = 'grond'

27logger = logging.getLogger('grond.dataset')

30def quote_paths(paths):

31 return ', '.join('"%s"' % path for path in paths)

34class InvalidObject(Exception):

35 pass

38class NotFound(Exception):

39 def __init__(self, reason, codes=None, time_range=None):

40 self.reason = reason

41 self.time_range = time_range

42 self.codes = codes

44 def __str__(self):

45 s = self.reason

46 if self.codes:

47 s += ' (%s)' % '.'.join(self.codes)

49 if self.time_range:

50 s += ' (%s - %s)' % (

51 util.time_to_str(self.time_range[0]),

52 util.time_to_str(self.time_range[1]))

54 return s

57class DatasetError(GrondError):

58 pass

61class StationCorrection(Object):

62 codes = Tuple.T(4, String.T())

63 delay = Float.T()

64 factor = Float.T()

67def load_station_corrections(filename):

68 scs = load_all(filename=filename)

69 for sc in scs:

70 assert isinstance(sc, StationCorrection)

72 return scs

75def dump_station_corrections(station_corrections, filename):

76 return dump_all(station_corrections, filename=filename)

79class Dataset(object):

81 def __init__(self, event_name=None):

82 self.events = []

83 self.squirrel = squirrel.Squirrel()

84 self.stations = {}

85 self.responses = defaultdict(list)

86 self.responses_stationxml = []

87 self.clippings = {}

88 self.exclude = set()

89 self.include_nslc = None

90 self.include_nsl_xx = None

91 self.include = None

92 self.station_corrections = {}

93 self.station_factors = {}

94 self.pick_markers = []

95 self.apply_correction_delays = True

96 self.apply_correction_factors = True

97 self.apply_displaced_sampling_workaround = False

98 self.extend_incomplete = False

99 self.clip_handling = 'by_nsl'

100 self.kite_scenes = []

101 self.gnss_campaigns = []

102 self.synthetic_test = None

103 self._picks = None

104 self._cache = {}

105 self._event_name = event_name

106

107 def close(self):

108 del self.squirrel

109 self.squirrel = None

110

111 def empty_cache(self):

112 self._cache = {}

113

114 def set_synthetic_test(self, synthetic_test):

115 self.synthetic_test = synthetic_test

116

117 def add_stations(

118 self,

119 stations=None,

120 pyrocko_stations_filename=None,

121 stationxml_filenames=None):

122

123 if stations is not None:

124 for station in stations:

125 self.stations[station.nsl()] = station

126

127 if pyrocko_stations_filename is not None:

128 logger.debug(

129 'Loading stations from file "%s"...' %

130 pyrocko_stations_filename)

131

132 for station in model.load_stations(pyrocko_stations_filename):

133 self.stations[station.nsl()] = station

134

135 if stationxml_filenames is not None and len(stationxml_filenames) > 0:

136

137 for stationxml_filename in stationxml_filenames:

138 if not op.exists(stationxml_filename):

139 continue

140

141 logger.debug(

142 'Loading stations from StationXML file "%s"...' %

143 stationxml_filename)

144

145 sx = fs.load_xml(filename=stationxml_filename)

146 ev = self.get_event()

147 try:

148 stations = sx.get_pyrocko_stations(

149 time=ev.time, sloppy=True)

150 except TypeError:

151 logger.warning(

152 'The installed version of Pyrocko does not support '

153 'the keyword argument `sloppy` in method '

154 '`FDSNStationXML.get_pyrocko_stations`. Please '

155 'upgrade Pyrocko.')

156

157 stations = sx.get_pyrocko_stations(time=ev.time)

158

159 if len(stations) == 0:

160 logger.warning(

161 'No stations found for time %s in file "%s".' % (

162 util.time_to_str(ev.time), stationxml_filename))

163

164 for station in stations:

165 logger.debug('Adding station: %s.%s.%s' % station.nsl())

166 channels = station.get_channels()

167 if len(channels) == 1 and channels[0].name.endswith('Z'):

168 logger.warning(

169 'Station "%s" has vertical component'

170 ' information only, adding mocked channels.'

171 % station.nsl_string())

172 station.add_channel(

173 model.Channel(channels[0].name[:-1] + 'N'))

174 station.add_channel(

175 model.Channel(channels[0].name[:-1] + 'E'))

176

177 self.stations[station.nsl()] = station

178

179 def add_events(self, events=None, filename=None):

180 if events is not None:

181 self.events.extend(events)

182

183 if filename is not None:

184 logger.debug('Loading events from file "%s"...' % filename)

185 try:

186 events = model.load_events(filename)

187 self.events.extend(events)

188 logger.info(

189 'Loading events from %s: %i events found.' %

190 (filename, len(events)))

191 except Exception as e:

192 logger.warning('Could not load events from %s!', filename)

193 raise e

194

195 def add_waveforms(self, paths, regex=None, fileformat='detect',

196 show_progress=False):

197

198 self.squirrel.add(paths)

199

200 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):

201 if sacpz_dirname:

202 logger.debug(

203 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)

204 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):

205 self.responses[x.codes].append(x)

206

207 if stationxml_filenames:

208 for stationxml_filename in stationxml_filenames:

209 if not op.exists(stationxml_filename):

210 continue

211

212 logger.debug(

213 'Loading StationXML responses from "%s"...' %

214 stationxml_filename)

215

216 self.responses_stationxml.append(

217 fs.load_xml(filename=stationxml_filename))

218

219 def add_clippings(self, markers_filename):

220 markers = pmarker.load_markers(markers_filename)

221 clippings = {}

222 for marker in markers:

223 nslc = marker.one_nslc()

224 nsl = nslc[:3]

225 if nsl not in clippings:

226 clippings[nsl] = []

227

228 if nslc not in clippings:

229 clippings[nslc] = []

230

231 clippings[nsl].append(marker.tmin)

232 clippings[nslc].append(marker.tmin)

233

234 for k, times in clippings.items():

235 atimes = num.array(times, dtype=float)

236 if k not in self.clippings:

237 self.clippings[k] = atimes

238 else:

239 self.clippings[k] = num.concatenate(self.clippings, atimes)

240

241 def add_exclude(self, exclude=[], filenames=None):

242 logger.debug('Loading exclude definitions...')

243 if filenames:

244 exclude = list(exclude)

245 for filename in filenames:

246 if op.exists(filename):

247 with open(filename, 'r') as f:

248 exclude.extend(

249 s.strip() for s in f.read().splitlines())

250 else:

251 logger.warning('No such exclude file: %s' % filename)

252

253 for x in exclude:

254 if isinstance(x, str):

255 x = tuple(x.split('.'))

256 self.exclude.add(x)

257

258 def add_include(self, include=[], filenames=None):

259 logger.debug('Loading included stations...')

260 if filenames:

261 include = list(include)

262 for filename in filenames:

263 with open(filename, 'r') as f:

264 include.extend(s.strip() for s in f.read().splitlines())

265

266 if self.include_nslc is None:

267 self.include_nslc = set()

268 self.include = set()

269 self.include_nsl_xx = set()

270

271 for x in include:

272 if isinstance(x, str):

273 x = tuple(x.split('.'))

274 if len(x) == 4:

275 self.include_nslc.add(x)

276 self.include_nsl_xx.add(x[:3])

277 else:

278 self.include.add(x)

279

280 def add_station_corrections(self, filename):

281 self.station_corrections.update(

282 (sc.codes, sc) for sc in load_station_corrections(filename))

283

284 def add_picks(self, filename):

285 self.pick_markers.extend(

286 pmarker.load_markers(filename))

287

288 self._picks = None

289

290 def add_gnss_campaigns(self, paths):

291 paths = util.select_files(

292 paths,

293 include=r'\.yml|\.yaml',

294 show_progress=False)

295

296 for path in paths:

297 self.add_gnss_campaign(filename=path)

298

299 def add_gnss_campaign(self, filename):

300 from pyrocko.model import gnss # noqa

301

302 logger.debug('Loading GNSS campaign from "%s"...' % filename)

303

304 campaign = load_all(filename=filename)

305 self.gnss_campaigns.append(campaign[0])

306

307 def add_kite_scenes(self, paths):

308 logger.info('Loading kite InSAR scenes...')

309 paths = util.select_files(

310 paths,

311 include=r'\.npz',

312 show_progress=False)

313

314 for path in paths:

315 self.add_kite_scene(filename=path)

316

317 if not self.kite_scenes:

318 logger.warning('Could not find any kite scenes at %s.' %

319 quote_paths(self.kite_scene_paths))

320

321 def add_kite_scene(self, filename):

322 try:

323 from kite import Scene

324 except ImportError:

325 raise ImportError('Module kite could not be imported,'

326 ' please install from https://pyrocko.org.')

327 logger.debug('Loading kite scene from "%s"...' % filename)

328

329 scene = Scene.load(filename)

330 scene._log.setLevel(logger.level)

331

332 try:

333 self.get_kite_scene(scene.meta.scene_id)

334 except NotFound:

335 self.kite_scenes.append(scene)

336 else:

337 raise AttributeError('Kite scene_id not unique for "%s".'

338 % filename)

339

340 def is_excluded(self, obj):

341 try:

342 nslc = self.get_nslc(obj)

343 if nslc in self.exclude:

344 return True

345

346 except InvalidObject:

347 pass

348

349 nsl = self.get_nsl(obj)

350 return (

351 nsl in self.exclude or

352 nsl[1:2] in self.exclude or

353 nsl[:2] in self.exclude)

354

355 def is_included(self, obj):

356 if self.include_nslc is None:

357 return True

358

359 nsl = self.get_nsl(obj)

360

361 if (

362 nsl in self.include or

363 nsl[1:2] in self.include or

364 nsl[:2] in self.include):

365

366 return True

367

368 try:

369 nslc = self.get_nslc(obj)

370 if nslc in self.include_nslc:

371 return True

372

373 except InvalidObject:

374 return nsl in self.include_nsl_xx

375

376 def has_clipping(self, nsl_or_nslc, tmin, tmax):

377 if nsl_or_nslc not in self.clippings:

378 return False

379

380 atimes = self.clippings[nsl_or_nslc]

381 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))

382

383 def get_nsl(self, obj):

384 if isinstance(obj, trace.Trace):

385 net, sta, loc, _ = obj.nslc_id

386 elif isinstance(obj, model.Station):

387 net, sta, loc = obj.nsl()

388 elif isinstance(obj, gf.Target):

389 net, sta, loc, _ = obj.codes

390 elif isinstance(obj, tuple) and len(obj) in (3, 4):

391 net, sta, loc = obj[:3]

392 else:

393 raise InvalidObject(

394 'Cannot get nsl code from given object of type "%s".'

395 % type(obj))

396

397 return net, sta, loc

398

399 def get_nslc(self, obj):

400 if isinstance(obj, trace.Trace):

401 return obj.nslc_id

402 elif isinstance(obj, gf.Target):

403 return obj.codes

404 elif isinstance(obj, tuple) and len(obj) == 4:

405 return obj

406 else:

407 raise InvalidObject(

408 'Cannot get nslc code from given object "%s"' % type(obj))

409

410 def get_tmin_tmax(self, obj):

411 if isinstance(obj, trace.Trace):

412 return obj.tmin, obj.tmax

413 else:

414 raise InvalidObject(

415 'Cannot get tmin and tmax from given object of type "%s"' %

416 type(obj))

417

418 def get_station(self, obj):

419 if self.is_excluded(obj):

420 raise NotFound('Station is in exclude list:', self.get_nsl(obj))

421

422 if not self.is_included(obj):

423 raise NotFound(

424 'Station is not on include list:', self.get_nsl(obj))

425

426 if isinstance(obj, model.Station):

427 return obj

428

429 net, sta, loc = self.get_nsl(obj)

430

431 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]

432 for k in keys:

433 if k in self.stations:

434 return self.stations[k]

435

436 raise NotFound('No station information:', keys)

437

438 def get_stations(self):

439 return [self.stations[k] for k in sorted(self.stations)

440 if not self.is_excluded(self.stations[k])

441 and self.is_included(self.stations[k])]

442

443 def get_kite_scenes(self):

444 return self.kite_scenes

445

446 def get_kite_scene(self, scene_id=None):

447 if scene_id is None:

448 if len(self.kite_scenes) == 0:

449 raise AttributeError('No kite displacements defined.')

450 return self.kite_scenes[0]

451 else:

452 for scene in self.kite_scenes:

453 if scene.meta.scene_id == scene_id:

454 return scene

455 raise NotFound('No kite scene with id "%s" defined.' % scene_id)

456

457 def get_gnss_campaigns(self):

458 return self.gnss_campaigns

459

460 def get_gnss_campaign(self, name):

461 for camp in self.gnss_campaigns:

462 if camp.name == name:

463 return camp

464 raise NotFound('GNSS campaign %s not found!' % name)

465

466 def get_response(self, obj, quantity='displacement'):

467 if (self.responses is None or len(self.responses) == 0) \

468 and (self.responses_stationxml is None

469 or len(self.responses_stationxml) == 0):

470

471 raise NotFound('No response information available.')

472

473 quantity_to_unit = {

474 'displacement': 'M',

475 'velocity': 'M/S',

476 'acceleration': 'M/S**2',

477 'rotation_displacement': 'RAD',

478 'rotation_velocity': 'RAD/S',

479 'rotation_acceleration': 'RAD/S**2'}

480

481 if self.is_excluded(obj):

482 raise NotFound(

483 'Response is in exclude list:', self.get_nslc(obj))

484

485 if not self.is_included(obj):

486 raise NotFound(

487 'Response is not on include list:', self.get_nslc(obj))

488

489 net, sta, loc, cha = self.get_nslc(obj)

490 tmin, tmax = self.get_tmin_tmax(obj)

491

492 keys_x = [

493 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]

494

495 keys = []

496 for k in keys_x:

497 if k not in keys:

498 keys.append(k)

499

500 candidates = []

501 for k in keys:

502 if k in self.responses:

503 for x in self.responses[k]:

504 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):

505 if quantity in (

506 'displacement', 'rotation_displacement'):

507 candidates.append(x.response)

508 elif quantity in (

509 'velocity', 'rotation_velocity'):

510 candidates.append(trace.MultiplyResponse([

511 x.response,

512 trace.DifferentiationResponse()]))

513 elif quantity in (

514 'acceleration', 'rotation_acceleration'):

515 candidates.append(trace.MultiplyResponse([

516 x.response,

517 trace.DifferentiationResponse(2)]))

518 else:

519 assert False

520

521 for sx in self.responses_stationxml:

522 try:

523 candidates.append(

524 sx.get_pyrocko_response(

525 (net, sta, loc, cha),

526 timespan=(tmin, tmax),

527 fake_input_units=quantity_to_unit[quantity]))

528

529 except (fs.NoResponseInformation, fs.MultipleResponseInformation):

530 pass

531

532 if len(candidates) == 1:

533 return candidates[0]

534

535 elif len(candidates) == 0:

536 raise NotFound('No response found:', (net, sta, loc, cha))

537 else:

538 raise NotFound('Multiple responses found:', (net, sta, loc, cha))

539

540 def get_waveform_raw(

541 self, obj,

542 tmin,

543 tmax,

544 tpad=0.,

545 toffset_noise_extract=0.,

546 want_incomplete=False,

547 extend_incomplete=False):

548

549 net, sta, loc, cha = self.get_nslc(obj)

550

551 if self.is_excluded((net, sta, loc, cha)):

552 raise NotFound(

553 'Waveform is in exclude list:', (net, sta, loc, cha))

554

555 if not self.is_included((net, sta, loc, cha)):

556 raise NotFound(

557 'Waveform is not on include list:', (net, sta, loc, cha))

558

559 if self.clip_handling == 'by_nsl':

560 if self.has_clipping((net, sta, loc), tmin, tmax):

561 raise NotFound(

562 'Waveform clipped:', (net, sta, loc))

563

564 elif self.clip_handling == 'by_nslc':

565 if self.has_clipping((net, sta, loc, cha), tmin, tmax):

566 raise NotFound(

567 'Waveform clipped:', (net, sta, loc, cha))

568

569 trs = self.squirrel.get_waveforms(

570 tmin=tmin+toffset_noise_extract-tpad,

571 tmax=tmax+toffset_noise_extract+tpad,

572 codes=(net, sta, loc, cha),

573 want_incomplete=want_incomplete or extend_incomplete)

574

575 if self.apply_displaced_sampling_workaround:

576 for tr in trs:

577 tr.snap()

578

579 if toffset_noise_extract != 0.0:

580 for tr in trs:

581 tr.shift(-toffset_noise_extract)

582

583 if extend_incomplete and len(trs) == 1:

584 trs[0].extend(

585 tmin + toffset_noise_extract - tpad,

586 tmax + toffset_noise_extract + tpad,

587 fillmethod='repeat')

588

589 if not want_incomplete and len(trs) != 1:

590 if len(trs) == 0:

591 message = 'Waveform missing or incomplete.'

592 else:

593 message = 'Waveform has gaps.'

594

595 raise NotFound(

596 message,

597 codes=(net, sta, loc, cha),

598 time_range=(

599 tmin + toffset_noise_extract - tpad,

600 tmax + toffset_noise_extract + tpad))

601

602 return trs

603

604 def get_waveform_restituted(

605 self,

606 obj, quantity='displacement',

607 tmin=None, tmax=None, tpad=0.,

608 tfade=0., freqlimits=None, deltat=None,

609 toffset_noise_extract=0.,

610 want_incomplete=False,

611 extend_incomplete=False):

612

613 if deltat is not None:

614 tpad_downsample = deltat * 50.

615 else:

616 tpad_downsample = 0.

617

618 trs_raw = self.get_waveform_raw(

619 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample,

620 toffset_noise_extract=toffset_noise_extract,

621 want_incomplete=want_incomplete,

622 extend_incomplete=extend_incomplete)

623

624 trs_restituted = []

625 for tr in trs_raw:

626 if deltat is not None:

627 try:

628 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)

629

630 except util.UnavailableDecimation:

631 logger.warn('using resample instead of decimation')

632 tr.resample(deltat)

633

634 tr.deltat = deltat

635

636 resp = self.get_response(tr, quantity=quantity)

637 try:

638 trs_restituted.append(

639 tr.transfer(

640 tfade=tfade, freqlimits=freqlimits,

641 transfer_function=resp, invert=True, demean=True))

642

643 except trace.InfiniteResponse:

644 raise NotFound(

645 'Instrument response deconvolution failed '

646 '(divide by zero)', tr.nslc_id)

647

648 return trs_restituted, trs_raw

649

650 def _get_projections(

651 self, station, backazimuth, source, target, tmin, tmax):

652

653 # fill in missing channel information (happens when station file

654 # does not contain any channel information)

655 if not station.get_channels():

656 station = copy.deepcopy(station)

657

658 nsl = station.nsl()

659 trs = self.squirrel.pile.all(

660 tmin=tmin, tmax=tmax,

661 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,

662 load_data=False)

663

664 channels = list(set(tr.channel for tr in trs))

665 station.set_channels_by_name(*channels)

666

667 projections = []

668 projections.extend(station.guess_projections_to_enu(

669 out_channels=('E', 'N', 'Z')))

670

671 if source is not None and target is not None:

672 backazimuth = source.azibazi_to(target)[1]

673

674 if backazimuth is not None:

675 projections.extend(station.guess_projections_to_rtu(

676 out_channels=('R', 'T', 'Z'),

677 backazimuth=backazimuth))

678

679 if not projections:

680 raise NotFound(

681 'Cannot determine projection of data components:',

682 station.nsl())

683

684 return projections

685

686 def _get_waveform(

687 self,

688 obj, quantity='displacement',

689 tmin=None, tmax=None, tpad=0.,

690 tfade=0., freqlimits=None, deltat=None, cache=None,

691 backazimuth=None,

692 source=None,

693 target=None,

694 debug=False):

695

696 assert not debug or (debug and cache is None)

697

698 if cache is True:

699 cache = self._cache

700

701 _, _, _, channel = self.get_nslc(obj)

702 station = self.get_station(self.get_nsl(obj))

703

704 nslc = station.nsl() + (channel,)

705

706 if self.is_excluded(nslc):

707 raise NotFound(

708 'Waveform is excluded:', nslc)

709

710 if not self.is_included(nslc):

711 raise NotFound(

712 'Waveform is not on include list:', nslc)

713

714 assert tmin is not None

715 assert tmax is not None

716

717 tmin = float(tmin)

718 tmax = float(tmax)

719

720 nslc = tuple(nslc)

721

722 cache_k = nslc + (

723 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)

724 if cache is not None and (nslc + cache_k) in cache:

725 obj = cache[nslc + cache_k]

726 if isinstance(obj, Exception):

727 raise obj

728 elif obj is None:

729 raise NotFound('Waveform not found!', nslc)

730 else:

731 return obj

732

733 syn_test = self.synthetic_test

734 toffset_noise_extract = 0.0

735 if syn_test:

736 if not syn_test.respect_data_availability:

737 if syn_test.real_noise_scale != 0.0:

738 raise DatasetError(

739 'respect_data_availability=False and '

740 'addition of real noise cannot be combined.')

741

742 tr = syn_test.get_waveform(

743 nslc, tmin, tmax,

744 tfade=tfade,

745 freqlimits=freqlimits)

746

747 if cache is not None:

748 cache[tr.nslc_id + cache_k] = tr

749

750 if debug:

751 return [], [], [], []

752 else:

753 return tr

754

755 if syn_test.real_noise_scale != 0.0:

756 toffset_noise_extract = syn_test.real_noise_toffset

757

758 abs_delays = []

759 for ocha in 'ENZRT':

760 sc = self.station_corrections.get(station.nsl() + (channel,), None)

761 if sc:

762 abs_delays.append(abs(sc.delay))

763

764 if abs_delays:

765 abs_delay_max = max(abs_delays)

766 else:

767 abs_delay_max = 0.0

768

769 projections = self._get_projections(

770 station, backazimuth, source, target, tmin, tmax)

771

772 try:

773 trs_projected = []

774 trs_restituted = []

775 trs_raw = []

776 exceptions = []

777 for matrix, in_channels, out_channels in projections:

778 deps = trace.project_dependencies(

779 matrix, in_channels, out_channels)

780

781 try:

782 trs_restituted_group = []

783 trs_raw_group = []

784 if channel in deps:

785 for cha in deps[channel]:

786 trs_restituted_this, trs_raw_this = \

787 self.get_waveform_restituted(

788 station.nsl() + (cha,),

789 quantity=quantity,

790 tmin=tmin, tmax=tmax,

791 tpad=tpad+abs_delay_max,

792 toffset_noise_extract=toffset_noise_extract, # noqa

793 tfade=tfade,

794 freqlimits=freqlimits,

795 deltat=deltat,

796 want_incomplete=debug,

797 extend_incomplete=self.extend_incomplete)

798

799 trs_restituted_group.extend(trs_restituted_this)

800 trs_raw_group.extend(trs_raw_this)

801

802 trs_projected.extend(

803 trace.project(

804 trs_restituted_group, matrix,

805 in_channels, out_channels))

806

807 trs_restituted.extend(trs_restituted_group)

808 trs_raw.extend(trs_raw_group)

809

810 except NotFound as e:

811 exceptions.append((in_channels, out_channels, e))

812

813 if not trs_projected:

814 err = []

815 for (in_channels, out_channels, e) in exceptions:

816 sin = ', '.join(c.name for c in in_channels)

817 sout = ', '.join(c.name for c in out_channels)

818 err.append('(%s) -> (%s): %s' % (sin, sout, e))

819

820 raise NotFound('\n'.join(err))

821

822 for tr in trs_projected:

823 sc = self.station_corrections.get(tr.nslc_id, None)

824 if sc:

825 if self.apply_correction_factors:

826 tr.ydata /= sc.factor

827

828 if self.apply_correction_delays:

829 tr.shift(-sc.delay)

830

831 if tmin is not None and tmax is not None:

832 tr.chop(tmin, tmax)

833

834 if syn_test:

835 trs_projected_synthetic = []

836 for tr in trs_projected:

837 if tr.channel == channel:

838 tr_syn = syn_test.get_waveform(

839 tr.nslc_id, tmin, tmax,

840 tfade=tfade, freqlimits=freqlimits)

841

842 if tr_syn:

843 if syn_test.real_noise_scale != 0.0:

844 tr_syn = tr_syn.copy()

845 tr_noise = tr.copy()

846 tr_noise.set_ydata(

847 tr_noise.get_ydata()

848 * syn_test.real_noise_scale)

849

850 tr_syn.add(tr_noise)

851

852 trs_projected_synthetic.append(tr_syn)

853

854 trs_projected = trs_projected_synthetic

855

856 if cache is not None:

857 for tr in trs_projected:

858 cache[tr.nslc_id + cache_k] = tr

859

860 tr_return = None

861 for tr in trs_projected:

862 if tr.channel == channel:

863 tr_return = tr

864

865 if debug:

866 return trs_projected, trs_restituted, trs_raw, tr_return

867

868 elif tr_return:

869 return tr_return

870

871 else:

872 raise NotFound(

873 'waveform not available', station.nsl() + (channel,))

874

875 except NotFound:

876 if cache is not None:

877 cache[nslc + cache_k] = None

878 raise

879

880 def get_waveform(self, obj, tinc_cache=None, **kwargs):

881 tmin = kwargs['tmin']

882 tmax = kwargs['tmax']

883 if tinc_cache is not None:

884 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache

885 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache

886 else:

887 tmin_r = tmin

888 tmax_r = tmax

889

890 kwargs['tmin'] = tmin_r

891 kwargs['tmax'] = tmax_r

892

893 if kwargs.get('debug', None):

894 return self._get_waveform(obj, **kwargs)

895 else:

896 tr = self._get_waveform(obj, **kwargs)

897 return tr.chop(tmin, tmax, inplace=False)

898

899 def get_events(self, magmin=None, event_names=None):

900 evs = []

901 for ev in self.events:

902 if ((magmin is None or ev.magnitude >= magmin) and

903 (event_names is None or ev.name in event_names)):

904 evs.append(ev)

905

906 return evs

907

908 def get_event_by_time(self, t, magmin=None):

909 evs = self.get_events(magmin=magmin)

910 ev_x = None

911 for ev in evs:

912 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):

913 ev_x = ev

914

915 if not ev_x:

916 raise NotFound(

917 'No event information matching criteria (t=%s, magmin=%s).' %

918 (t, magmin))

919

920 return ev_x

921

922 def get_event(self):

923 if self._event_name is None:

924 raise NotFound('No main event selected in dataset!')

925

926 for ev in self.events:

927 if ev.name == self._event_name:

928 return ev

929

930 raise NotFound('No such event: %s' % self._event_name)

931

932 def get_picks(self):

933 if self._picks is None:

934 hash_to_name = {}

935 names = set()

936 for marker in self.pick_markers:

937 if isinstance(marker, pmarker.EventMarker):

938 name = marker.get_event().name

939 if name in names:

940 raise DatasetError(

941 'Duplicate event name "%s" in picks.' % name)

942

943 names.add(name)

944 hash_to_name[marker.get_event_hash()] = name

945

946 for ev in self.events:

947 hash_to_name[ev.get_hash()] = ev.name

948

949 picks = {}

950 for marker in self.pick_markers:

951 if isinstance(marker, pmarker.PhaseMarker):

952 ehash = marker.get_event_hash()

953

954 nsl = marker.one_nslc()[:3]

955 phasename = marker.get_phasename()

956

957 if ehash is None or ehash not in hash_to_name:

958 raise DatasetError(

959 'Unassociated pick: %s.%s.%s, %s' %

960 (nsl + (phasename, )))

961

962 eventname = hash_to_name[ehash]

963

964 if (nsl, phasename, eventname) in picks:

965 raise DatasetError(

966 'Duplicate pick: %s.%s.%s, %s' %

967 (nsl + (phasename, )))

968

969 picks[nsl, phasename, eventname] = marker

970

971 self._picks = picks

972

973 return self._picks

974

975 def get_pick(self, eventname, obj, phasename):

976 nsl = self.get_nsl(obj)

977 return self.get_picks().get((nsl, phasename, eventname), None)

978

979

980class DatasetConfig(HasPaths):

981 ''' Configuration for a Grond `Dataset` object. '''

982

983 stations_path = Path.T(

984 optional=True,

985 help='List of files with station coordinates in Pyrocko format.')

986 stations_stationxml_paths = List.T(

987 Path.T(),

988 optional=True,

989 help='List of files with station coordinates in StationXML format.')

990 events_path = Path.T(

991 optional=True,

992 help='File with hypocenter information and possibly'

993 ' reference solution')

994 waveform_paths = List.T(

995 Path.T(),

996 optional=True,

997 help='List of directories with raw waveform data')

998 clippings_path = Path.T(

999 optional=True)

1000 responses_sacpz_path = Path.T(

1001 optional=True,

1002 help='List of SACPZ response files for restitution of'

1003 ' the raw waveform data.')

1004 responses_stationxml_paths = List.T(

1005 Path.T(),

1006 optional=True,

1007 help='List of StationXML response files for restitution of'

1008 ' the raw waveform data.')

1009 station_corrections_path = Path.T(

1010 optional=True,

1011 help='File containing station correction informations.')

1012 apply_correction_factors = Bool.T(

1013 optional=True,

1014 default=True,

1015 help='Apply correction factors from station corrections.')

1016 apply_correction_delays = Bool.T(

1017 optional=True,

1018 default=True,

1019 help='Apply correction delays from station corrections.')

1020 apply_displaced_sampling_workaround = Bool.T(

1021 optional=True,

1022 default=False,

1023 help='Work around displaced sampling issues.')

1024 extend_incomplete = Bool.T(

1025 default=False,

1026 help='Extend incomplete seismic traces.')

1027 picks_paths = List.T(

1028 Path.T())

1029 exclude_paths = List.T(

1030 Path.T(),

1031 help='List of text files with excluded stations.')

1032 exclude = List.T(

1033 String.T(),

1034 help='Stations/components to be excluded according to their STA, '

1035 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')

1036 include_paths = List.T(

1037 Path.T(),

1038 help='List of text files with included stations.')

1039 include = List.T(

1040 String.T(),

1041 optional=True,

1042 help='If not None, list of stations/components to include according '

1043 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '

1044 'Note: ''when including on channel level, both, the raw and '

1045 'the processed channel codes have to be listed.')

1046

1047 synthetic_test = SyntheticTest.T(

1048 optional=True)

1049

1050 kite_scene_paths = List.T(

1051 Path.T(),

1052 optional=True,

1053 help='List of directories for the InSAR scenes.')

1054

1055 gnss_campaign_paths = List.T(

1056 Path.T(),

1057 optional=True,

1058 help='List of directories for the GNSS campaign data.')

1059

1060 # for backward compatibility

1061 blacklist = List.T(String.T(), optional=True)

1062 whitelist = List.T(String.T(), optional=True)

1063 blacklist_paths = List.T(Path.T(), optional=True)

1064 whitelist_paths = List.T(Path.T(), optional=True)

1065

1066 def __init__(self, *args, **kwargs):

1067 HasPaths.__init__(self, *args, **kwargs)

1068

1069 # for backward compatibility

1070 if self.blacklist:

1071 self.exclude.extend(self.blacklist)

1072 self.blacklist = None

1073 if self.whitelist:

1074 self.include.extend(self.whitelist)

1075 self.whitelist = None

1076 if self.blacklist_paths:

1077 self.exclude_paths.extend(self.blacklist_paths)

1078 self.blacklist_paths = None

1079 if self.whitelist_paths:

1080 self.include_paths.extend(self.whitelist_paths)

1081 self.whitelist_paths = None

1082

1083 self._ds = {}

1084

1085 def get_event_names(self):

1086 logger.info('Loading events ...')

1087

1088 def extra(path):

1089 return expand_template(path, dict(

1090 event_name='*'))

1091

1092 def fp(path):

1093 return self.expand_path(path, extra=extra)

1094

1095 def check_events(events, fn):

1096 for ev in events:

1097 if not ev.name:

1098 logger.warning('Event in %s has no name!', fn)

1099 return

1100 if not ev.lat or not ev.lon:

1101 logger.warning('Event %s has inconsistent coordinates!',

1102 ev.name)

1103 if not ev.depth:

1104 logger.warning('Event %s has no depth!', ev.name)

1105 if not ev.time:

1106 logger.warning('Event %s has no time!', ev.name)

1107

1108 events = []

1109 events_path = fp(self.events_path)

1110 fns = glob.glob(events_path)

1111 if not fns:

1112 raise DatasetError('No event files matching "%s".' % events_path)

1113

1114 for fn in fns:

1115 logger.debug('Loading from file %s' % fn)

1116 ev = model.load_events(filename=fn)

1117 check_events(ev, fn)

1118

1119 events.extend(ev)

1120

1121 event_names = [ev_.name for ev_ in events]

1122 event_names.sort()

1123 return event_names

1124

1125 def get_dataset(self, event_name):

1126 if event_name not in self._ds:

1127 def extra(path):

1128 return expand_template(path, dict(

1129 event_name=event_name))

1130

1131 def fp(path):

1132 p = self.expand_path(path, extra=extra)

1133 if p is None:

1134 return None

1135

1136 if isinstance(p, list):

1137 for path in p:

1138 if not op.exists(path):

1139 logger.warning('Path %s does not exist.' % path)

1140 else:

1141 if not op.exists(p):

1142 logger.warning('Path %s does not exist.' % p)

1143

1144 return p

1145

1146 ds = Dataset(event_name)

1147 try:

1148 ds.add_events(filename=fp(self.events_path))

1149

1150 ds.add_stations(

1151 pyrocko_stations_filename=fp(self.stations_path),

1152 stationxml_filenames=fp(self.stations_stationxml_paths))

1153

1154 if self.waveform_paths:

1155 ds.add_waveforms(paths=fp(self.waveform_paths))

1156

1157 if self.kite_scene_paths:

1158 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))

1159

1160 if self.gnss_campaign_paths:

1161 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))

1162

1163 if self.clippings_path:

1164 ds.add_clippings(markers_filename=fp(self.clippings_path))

1165

1166 if self.responses_sacpz_path:

1167 ds.add_responses(

1168 sacpz_dirname=fp(self.responses_sacpz_path))

1169

1170 if self.responses_stationxml_paths:

1171 ds.add_responses(

1172 stationxml_filenames=fp(

1173 self.responses_stationxml_paths))

1174

1175 if self.station_corrections_path:

1176 ds.add_station_corrections(

1177 filename=fp(self.station_corrections_path))

1178

1179 ds.apply_correction_factors = self.apply_correction_factors

1180 ds.apply_correction_delays = self.apply_correction_delays

1181 ds.apply_displaced_sampling_workaround = \

1182 self.apply_displaced_sampling_workaround

1183 ds.extend_incomplete = self.extend_incomplete

1184

1185 for picks_path in self.picks_paths:

1186 ds.add_picks(

1187 filename=fp(picks_path))

1188

1189 ds.add_exclude(self.exclude)

1190 ds.add_exclude(filenames=fp(self.exclude_paths))

1191 if self.include:

1192 ds.add_include(self.include)

1193 if self.include_paths:

1194 ds.add_include(filenames=fp(self.include_paths))

1195

1196 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))

1197 self._ds[event_name] = ds

1198 except (FileLoadError, OSError) as e:

1199 raise DatasetError(str(e))

1200

1201 return self._ds[event_name]

1202

1203

1204__all__ = '''

1205 Dataset

1206 DatasetConfig

1207 DatasetError

1208 InvalidObject

1209 NotFound

1210 StationCorrection

1211 load_station_corrections

1212 dump_station_corrections

1213'''.split()