Coverage for /usr/local/lib/python3.9/dist-packages/grond/dataset.py: 69%

1import glob

2import copy

3import os.path as op

4import logging

5import math

6import numpy as num

8from collections import defaultdict

9from pyrocko import util, model, trace, marker as pmarker

10from pyrocko.io.io_common import FileLoadError

11from pyrocko.fdsn import enhanced_sacpz, station as fs

12from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,

13 load_all)

14from pyrocko import squirrel

16from pyrocko import gf

18from .meta import Path, HasPaths, expand_template, GrondError

20from .synthetic_tests import SyntheticTest

22guts_prefix = 'grond'

23logger = logging.getLogger('grond.dataset')

26def quote_paths(paths):

27 return ', '.join('"%s"' % path for path in paths)

30class InvalidObject(Exception):

31 pass

34class NotFound(Exception):

35 def __init__(self, reason, codes=None, time_range=None):

36 self.reason = reason

37 self.time_range = time_range

38 self.codes = codes

40 def __str__(self):

41 s = self.reason

42 if self.codes:

43 s += ' (%s)' % '.'.join(self.codes)

45 if self.time_range:

46 s += ' (%s - %s)' % (

47 util.time_to_str(self.time_range[0]),

48 util.time_to_str(self.time_range[1]))

50 return s

53class DatasetError(GrondError):

54 pass

57class StationCorrection(Object):

58 codes = Tuple.T(4, String.T())

59 delay = Float.T()

60 factor = Float.T()

63def load_station_corrections(filename):

64 scs = load_all(filename=filename)

65 for sc in scs:

66 assert isinstance(sc, StationCorrection)

68 return scs

71def dump_station_corrections(station_corrections, filename):

72 return dump_all(station_corrections, filename=filename)

75class Dataset(object):

77 def __init__(self, event_name=None):

78 self.events = []

79 self.squirrel = squirrel.Squirrel()

80 self.stations = {}

81 self.responses = defaultdict(list)

82 self.responses_stationxml = []

83 self.clippings = {}

84 self.blacklist = set()

85 self.whitelist_nslc = None

86 self.whitelist_nsl_xx = None

87 self.whitelist = None

88 self.station_corrections = {}

89 self.station_factors = {}

90 self.pick_markers = []

91 self.apply_correction_delays = True

92 self.apply_correction_factors = True

93 self.apply_displaced_sampling_workaround = False

94 self.extend_incomplete = False

95 self.clip_handling = 'by_nsl'

96 self.kite_scenes = []

97 self.gnss_campaigns = []

98 self.synthetic_test = None

99 self._picks = None

100 self._cache = {}

101 self._event_name = event_name

102

103 def empty_cache(self):

104 self._cache = {}

105

106 def set_synthetic_test(self, synthetic_test):

107 self.synthetic_test = synthetic_test

108

109 def add_stations(

110 self,

111 stations=None,

112 pyrocko_stations_filename=None,

113 stationxml_filenames=None):

114

115 if stations is not None:

116 for station in stations:

117 self.stations[station.nsl()] = station

118

119 if pyrocko_stations_filename is not None:

120 logger.debug(

121 'Loading stations from file "%s"...' %

122 pyrocko_stations_filename)

123

124 for station in model.load_stations(pyrocko_stations_filename):

125 self.stations[station.nsl()] = station

126

127 if stationxml_filenames is not None and len(stationxml_filenames) > 0:

128

129 for stationxml_filename in stationxml_filenames:

130 if not op.exists(stationxml_filename):

131 continue

132

133 logger.debug(

134 'Loading stations from StationXML file "%s"...' %

135 stationxml_filename)

136

137 sx = fs.load_xml(filename=stationxml_filename)

138 ev = self.get_event()

139 stations = sx.get_pyrocko_stations(time=ev.time)

140 if len(stations) == 0:

141 logger.warning(

142 'No stations found for time %s in file "%s".' % (

143 util.time_to_str(ev.time), stationxml_filename))

144

145 for station in stations:

146 logger.debug('Adding station: %s.%s.%s' % station.nsl())

147 channels = station.get_channels()

148 if len(channels) == 1 and channels[0].name.endswith('Z'):

149 logger.warning(

150 'Station "%s" has vertical component'

151 ' information only, adding mocked channels.'

152 % station.nsl_string())

153 station.add_channel(

154 model.Channel(channels[0].name[:-1] + 'N'))

155 station.add_channel(

156 model.Channel(channels[0].name[:-1] + 'E'))

157

158 self.stations[station.nsl()] = station

159

160 def add_events(self, events=None, filename=None):

161 if events is not None:

162 self.events.extend(events)

163

164 if filename is not None:

165 logger.debug('Loading events from file "%s"...' % filename)

166 try:

167 events = model.load_events(filename)

168 self.events.extend(events)

169 logger.info(

170 'Loading events from %s: %i events found.' %

171 (filename, len(events)))

172 except Exception as e:

173 logger.warning('Could not load events from %s!', filename)

174 raise e

175

176 def add_waveforms(self, paths, regex=None, fileformat='detect',

177 show_progress=False):

178

179 self.squirrel.add(paths)

180

181 def add_responses(self, sacpz_dirname=None, stationxml_filenames=None):

182 if sacpz_dirname:

183 logger.debug(

184 'Loading SAC PZ responses from "%s"...' % sacpz_dirname)

185 for x in enhanced_sacpz.iload_dirname(sacpz_dirname):

186 self.responses[x.codes].append(x)

187

188 if stationxml_filenames:

189 for stationxml_filename in stationxml_filenames:

190 if not op.exists(stationxml_filename):

191 continue

192

193 logger.debug(

194 'Loading StationXML responses from "%s"...' %

195 stationxml_filename)

196

197 self.responses_stationxml.append(

198 fs.load_xml(filename=stationxml_filename))

199

200 def add_clippings(self, markers_filename):

201 markers = pmarker.load_markers(markers_filename)

202 clippings = {}

203 for marker in markers:

204 nslc = marker.one_nslc()

205 nsl = nslc[:3]

206 if nsl not in clippings:

207 clippings[nsl] = []

208

209 if nslc not in clippings:

210 clippings[nslc] = []

211

212 clippings[nsl].append(marker.tmin)

213 clippings[nslc].append(marker.tmin)

214

215 for k, times in clippings.items():

216 atimes = num.array(times, dtype=float)

217 if k not in self.clippings:

218 self.clippings[k] = atimes

219 else:

220 self.clippings[k] = num.concatenate(self.clippings, atimes)

221

222 def add_blacklist(self, blacklist=[], filenames=None):

223 logger.debug('Loading blacklisted stations...')

224 if filenames:

225 blacklist = list(blacklist)

226 for filename in filenames:

227 if op.exists(filename):

228 with open(filename, 'r') as f:

229 blacklist.extend(

230 s.strip() for s in f.read().splitlines())

231 else:

232 logger.warning('No such blacklist file: %s' % filename)

233

234 for x in blacklist:

235 if isinstance(x, str):

236 x = tuple(x.split('.'))

237 self.blacklist.add(x)

238

239 def add_whitelist(self, whitelist=[], filenames=None):

240 logger.debug('Loading whitelisted stations...')

241 if filenames:

242 whitelist = list(whitelist)

243 for filename in filenames:

244 with open(filename, 'r') as f:

245 whitelist.extend(s.strip() for s in f.read().splitlines())

246

247 if self.whitelist_nslc is None:

248 self.whitelist_nslc = set()

249 self.whitelist = set()

250 self.whitelist_nsl_xx = set()

251

252 for x in whitelist:

253 if isinstance(x, str):

254 x = tuple(x.split('.'))

255 if len(x) == 4:

256 self.whitelist_nslc.add(x)

257 self.whitelist_nsl_xx.add(x[:3])

258 else:

259 self.whitelist.add(x)

260

261 def add_station_corrections(self, filename):

262 self.station_corrections.update(

263 (sc.codes, sc) for sc in load_station_corrections(filename))

264

265 def add_picks(self, filename):

266 self.pick_markers.extend(

267 pmarker.load_markers(filename))

268

269 self._picks = None

270

271 def add_gnss_campaigns(self, paths):

272 paths = util.select_files(

273 paths,

274 include=r'\.yml|\.yaml',

275 show_progress=False)

276

277 for path in paths:

278 self.add_gnss_campaign(filename=path)

279

280 def add_gnss_campaign(self, filename):

281 try:

282 from pyrocko.model import gnss # noqa

283 except ImportError:

284 raise ImportError('Module pyrocko.model.gnss not found,'

285 ' please upgrade pyrocko!')

286 logger.debug('Loading GNSS campaign from "%s"...' % filename)

287

288 campaign = load_all(filename=filename)

289 self.gnss_campaigns.append(campaign[0])

290

291 def add_kite_scenes(self, paths):

292 logger.info('Loading kite InSAR scenes...')

293 paths = util.select_files(

294 paths,

295 include=r'\.npz',

296 show_progress=False)

297

298 for path in paths:

299 self.add_kite_scene(filename=path)

300

301 if not self.kite_scenes:

302 logger.warning('Could not find any kite scenes at %s.' %

303 quote_paths(self.kite_scene_paths))

304

305 def add_kite_scene(self, filename):

306 try:

307 from kite import Scene

308 except ImportError:

309 raise ImportError('Module kite could not be imported,'

310 ' please install from https://pyrocko.org.')

311 logger.debug('Loading kite scene from "%s"...' % filename)

312

313 scene = Scene.load(filename)

314 scene._log.setLevel(logger.level)

315

316 try:

317 self.get_kite_scene(scene.meta.scene_id)

318 except NotFound:

319 self.kite_scenes.append(scene)

320 else:

321 raise AttributeError('Kite scene_id not unique for "%s".'

322 % filename)

323

324 def is_blacklisted(self, obj):

325 try:

326 nslc = self.get_nslc(obj)

327 if nslc in self.blacklist:

328 return True

329

330 except InvalidObject:

331 pass

332

333 nsl = self.get_nsl(obj)

334 return (

335 nsl in self.blacklist or

336 nsl[1:2] in self.blacklist or

337 nsl[:2] in self.blacklist)

338

339 def is_whitelisted(self, obj):

340 if self.whitelist_nslc is None:

341 return True

342

343 nsl = self.get_nsl(obj)

344

345 if (

346 nsl in self.whitelist or

347 nsl[1:2] in self.whitelist or

348 nsl[:2] in self.whitelist):

349

350 return True

351

352 try:

353 nslc = self.get_nslc(obj)

354 if nslc in self.whitelist_nslc:

355 return True

356

357 except InvalidObject:

358 return nsl in self.whitelist_nsl_xx

359

360 def has_clipping(self, nsl_or_nslc, tmin, tmax):

361 if nsl_or_nslc not in self.clippings:

362 return False

363

364 atimes = self.clippings[nsl_or_nslc]

365 return num.any(num.logical_and(tmin < atimes, atimes <= tmax))

366

367 def get_nsl(self, obj):

368 if isinstance(obj, trace.Trace):

369 net, sta, loc, _ = obj.nslc_id

370 elif isinstance(obj, model.Station):

371 net, sta, loc = obj.nsl()

372 elif isinstance(obj, gf.Target):

373 net, sta, loc, _ = obj.codes

374 elif isinstance(obj, tuple) and len(obj) in (3, 4):

375 net, sta, loc = obj[:3]

376 else:

377 raise InvalidObject(

378 'Cannot get nsl code from given object of type "%s".'

379 % type(obj))

380

381 return net, sta, loc

382

383 def get_nslc(self, obj):

384 if isinstance(obj, trace.Trace):

385 return obj.nslc_id

386 elif isinstance(obj, gf.Target):

387 return obj.codes

388 elif isinstance(obj, tuple) and len(obj) == 4:

389 return obj

390 else:

391 raise InvalidObject(

392 'Cannot get nslc code from given object "%s"' % type(obj))

393

394 def get_tmin_tmax(self, obj):

395 if isinstance(obj, trace.Trace):

396 return obj.tmin, obj.tmax

397 else:

398 raise InvalidObject(

399 'Cannot get tmin and tmax from given object of type "%s"' %

400 type(obj))

401

402 def get_station(self, obj):

403 if self.is_blacklisted(obj):

404 raise NotFound('Station is blacklisted:', self.get_nsl(obj))

405

406 if not self.is_whitelisted(obj):

407 raise NotFound('Station is not on whitelist:', self.get_nsl(obj))

408

409 if isinstance(obj, model.Station):

410 return obj

411

412 net, sta, loc = self.get_nsl(obj)

413

414 keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')]

415 for k in keys:

416 if k in self.stations:

417 return self.stations[k]

418

419 raise NotFound('No station information:', keys)

420

421 def get_stations(self):

422 return [self.stations[k] for k in sorted(self.stations)

423 if not self.is_blacklisted(self.stations[k])

424 and self.is_whitelisted(self.stations[k])]

425

426 def get_kite_scenes(self):

427 return self.kite_scenes

428

429 def get_kite_scene(self, scene_id=None):

430 if scene_id is None:

431 if len(self.kite_scenes) == 0:

432 raise AttributeError('No kite displacements defined.')

433 return self.kite_scenes[0]

434 else:

435 for scene in self.kite_scenes:

436 if scene.meta.scene_id == scene_id:

437 return scene

438 raise NotFound('No kite scene with id "%s" defined.' % scene_id)

439

440 def get_gnss_campaigns(self):

441 return self.gnss_campaigns

442

443 def get_gnss_campaign(self, name):

444 for camp in self.gnss_campaigns:

445 if camp.name == name:

446 return camp

447 raise NotFound('GNSS campaign %s not found!' % name)

448

449 def get_response(self, obj, quantity='displacement'):

450 if (self.responses is None or len(self.responses) == 0) \

451 and (self.responses_stationxml is None

452 or len(self.responses_stationxml) == 0):

453

454 raise NotFound('No response information available.')

455

456 quantity_to_unit = {

457 'displacement': 'M',

458 'velocity': 'M/S',

459 'acceleration': 'M/S**2'}

460

461 if self.is_blacklisted(obj):

462 raise NotFound('Response is blacklisted:', self.get_nslc(obj))

463

464 if not self.is_whitelisted(obj):

465 raise NotFound('Response is not on whitelist:', self.get_nslc(obj))

466

467 net, sta, loc, cha = self.get_nslc(obj)

468 tmin, tmax = self.get_tmin_tmax(obj)

469

470 keys_x = [

471 (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)]

472

473 keys = []

474 for k in keys_x:

475 if k not in keys:

476 keys.append(k)

477

478 candidates = []

479 for k in keys:

480 if k in self.responses:

481 for x in self.responses[k]:

482 if x.tmin < tmin and (x.tmax is None or tmax < x.tmax):

483 if quantity == 'displacement':

484 candidates.append(x.response)

485 elif quantity == 'velocity':

486 candidates.append(trace.MultiplyResponse([

487 x.response,

488 trace.DifferentiationResponse()]))

489 elif quantity == 'acceleration':

490 candidates.append(trace.MultiplyResponse([

491 x.response,

492 trace.DifferentiationResponse(2)]))

493 else:

494 assert False

495

496 for sx in self.responses_stationxml:

497 try:

498 candidates.append(

499 sx.get_pyrocko_response(

500 (net, sta, loc, cha),

501 timespan=(tmin, tmax),

502 fake_input_units=quantity_to_unit[quantity]))

503

504 except (fs.NoResponseInformation, fs.MultipleResponseInformation):

505 pass

506

507 if len(candidates) == 1:

508 return candidates[0]

509

510 elif len(candidates) == 0:

511 raise NotFound('No response found:', (net, sta, loc, cha))

512 else:

513 raise NotFound('Multiple responses found:', (net, sta, loc, cha))

514

515 def get_waveform_raw(

516 self, obj,

517 tmin,

518 tmax,

519 tpad=0.,

520 toffset_noise_extract=0.,

521 want_incomplete=False,

522 extend_incomplete=False):

523

524 net, sta, loc, cha = self.get_nslc(obj)

525

526 if self.is_blacklisted((net, sta, loc, cha)):

527 raise NotFound(

528 'Waveform is blacklisted:', (net, sta, loc, cha))

529

530 if not self.is_whitelisted((net, sta, loc, cha)):

531 raise NotFound(

532 'Waveform is not on whitelist:', (net, sta, loc, cha))

533

534 if self.clip_handling == 'by_nsl':

535 if self.has_clipping((net, sta, loc), tmin, tmax):

536 raise NotFound(

537 'Waveform clipped:', (net, sta, loc))

538

539 elif self.clip_handling == 'by_nslc':

540 if self.has_clipping((net, sta, loc, cha), tmin, tmax):

541 raise NotFound(

542 'Waveform clipped:', (net, sta, loc, cha))

543

544 trs = self.squirrel.get_waveforms(

545 tmin=tmin+toffset_noise_extract-tpad,

546 tmax=tmax+toffset_noise_extract+tpad,

547 codes=(net, sta, loc, cha),

548 want_incomplete=want_incomplete or extend_incomplete)

549

550 if self.apply_displaced_sampling_workaround:

551 for tr in trs:

552 tr.snap()

553

554 if toffset_noise_extract != 0.0:

555 for tr in trs:

556 tr.shift(-toffset_noise_extract)

557

558 if extend_incomplete and len(trs) == 1:

559 trs[0].extend(

560 tmin + toffset_noise_extract - tpad,

561 tmax + toffset_noise_extract + tpad,

562 fillmethod='repeat')

563

564 if not want_incomplete and len(trs) != 1:

565 if len(trs) == 0:

566 message = 'Waveform missing or incomplete.'

567 else:

568 message = 'Waveform has gaps.'

569

570 raise NotFound(

571 message,

572 codes=(net, sta, loc, cha),

573 time_range=(

574 tmin + toffset_noise_extract - tpad,

575 tmax + toffset_noise_extract + tpad))

576

577 return trs

578

579 def get_waveform_restituted(

580 self,

581 obj, quantity='displacement',

582 tmin=None, tmax=None, tpad=0.,

583 tfade=0., freqlimits=None, deltat=None,

584 toffset_noise_extract=0.,

585 want_incomplete=False,

586 extend_incomplete=False):

587

588 trs_raw = self.get_waveform_raw(

589 obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade,

590 toffset_noise_extract=toffset_noise_extract,

591 want_incomplete=want_incomplete,

592 extend_incomplete=extend_incomplete)

593

594 trs_restituted = []

595 for tr in trs_raw:

596 if deltat is not None:

597 try:

598 tr.downsample_to(deltat, snap=True, allow_upsample_max=5)

599

600 except util.UnavailableDecimation:

601 logger.warn('using resample instead of decimation')

602 tr.resample(deltat)

603

604 tr.deltat = deltat

605

606 resp = self.get_response(tr, quantity=quantity)

607 try:

608 trs_restituted.append(

609 tr.transfer(

610 tfade=tfade, freqlimits=freqlimits,

611 transfer_function=resp, invert=True, demean=True))

612

613 except trace.InfiniteResponse:

614 raise NotFound(

615 'Instrument response deconvolution failed '

616 '(divide by zero)', tr.nslc_id)

617

618 return trs_restituted, trs_raw

619

620 def _get_projections(

621 self, station, backazimuth, source, target, tmin, tmax):

622

623 # fill in missing channel information (happens when station file

624 # does not contain any channel information)

625 if not station.get_channels():

626 station = copy.deepcopy(station)

627

628 nsl = station.nsl()

629 trs = self.squirrel.pile.all(

630 tmin=tmin, tmax=tmax,

631 trace_selector=lambda tr: tr.nslc_id[:3] == nsl,

632 load_data=False)

633

634 channels = list(set(tr.channel for tr in trs))

635 station.set_channels_by_name(*channels)

636

637 projections = []

638 projections.extend(station.guess_projections_to_enu(

639 out_channels=('E', 'N', 'Z')))

640

641 if source is not None and target is not None:

642 backazimuth = source.azibazi_to(target)[1]

643

644 if backazimuth is not None:

645 projections.extend(station.guess_projections_to_rtu(

646 out_channels=('R', 'T', 'Z'),

647 backazimuth=backazimuth))

648

649 if not projections:

650 raise NotFound(

651 'Cannot determine projection of data components:',

652 station.nsl())

653

654 return projections

655

656 def _get_waveform(

657 self,

658 obj, quantity='displacement',

659 tmin=None, tmax=None, tpad=0.,

660 tfade=0., freqlimits=None, deltat=None, cache=None,

661 backazimuth=None,

662 source=None,

663 target=None,

664 debug=False):

665

666 assert not debug or (debug and cache is None)

667

668 if cache is True:

669 cache = self._cache

670

671 _, _, _, channel = self.get_nslc(obj)

672 station = self.get_station(self.get_nsl(obj))

673

674 nslc = station.nsl() + (channel,)

675

676 if self.is_blacklisted(nslc):

677 raise NotFound(

678 'Waveform is blacklisted:', nslc)

679

680 if not self.is_whitelisted(nslc):

681 raise NotFound(

682 'Waveform is not on whitelist:', nslc)

683

684 assert tmin is not None

685 assert tmax is not None

686

687 tmin = float(tmin)

688 tmax = float(tmax)

689

690 nslc = tuple(nslc)

691

692 cache_k = nslc + (

693 tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity)

694 if cache is not None and (nslc + cache_k) in cache:

695 obj = cache[nslc + cache_k]

696 if isinstance(obj, Exception):

697 raise obj

698 elif obj is None:

699 raise NotFound('Waveform not found!', nslc)

700 else:

701 return obj

702

703 syn_test = self.synthetic_test

704 toffset_noise_extract = 0.0

705 if syn_test:

706 if not syn_test.respect_data_availability:

707 if syn_test.real_noise_scale != 0.0:

708 raise DatasetError(

709 'respect_data_availability=False and '

710 'addition of real noise cannot be combined.')

711

712 tr = syn_test.get_waveform(

713 nslc, tmin, tmax,

714 tfade=tfade,

715 freqlimits=freqlimits)

716

717 if cache is not None:

718 cache[tr.nslc_id + cache_k] = tr

719

720 if debug:

721 return [], [], []

722 else:

723 return tr

724

725 if syn_test.real_noise_scale != 0.0:

726 toffset_noise_extract = syn_test.real_noise_toffset

727

728 abs_delays = []

729 for ocha in 'ENZRT':

730 sc = self.station_corrections.get(station.nsl() + (channel,), None)

731 if sc:

732 abs_delays.append(abs(sc.delay))

733

734 if abs_delays:

735 abs_delay_max = max(abs_delays)

736 else:

737 abs_delay_max = 0.0

738

739 projections = self._get_projections(

740 station, backazimuth, source, target, tmin, tmax)

741

742 try:

743 trs_projected = []

744 trs_restituted = []

745 trs_raw = []

746 exceptions = []

747 for matrix, in_channels, out_channels in projections:

748 deps = trace.project_dependencies(

749 matrix, in_channels, out_channels)

750

751 try:

752 trs_restituted_group = []

753 trs_raw_group = []

754 if channel in deps:

755 for cha in deps[channel]:

756 trs_restituted_this, trs_raw_this = \

757 self.get_waveform_restituted(

758 station.nsl() + (cha,),

759 quantity=quantity,

760 tmin=tmin, tmax=tmax,

761 tpad=tpad+abs_delay_max,

762 toffset_noise_extract=toffset_noise_extract, # noqa

763 tfade=tfade,

764 freqlimits=freqlimits,

765 deltat=deltat,

766 want_incomplete=debug,

767 extend_incomplete=self.extend_incomplete)

768

769 trs_restituted_group.extend(trs_restituted_this)

770 trs_raw_group.extend(trs_raw_this)

771

772 trs_projected.extend(

773 trace.project(

774 trs_restituted_group, matrix,

775 in_channels, out_channels))

776

777 trs_restituted.extend(trs_restituted_group)

778 trs_raw.extend(trs_raw_group)

779

780 except NotFound as e:

781 exceptions.append((in_channels, out_channels, e))

782

783 if not trs_projected:

784 err = []

785 for (in_channels, out_channels, e) in exceptions:

786 sin = ', '.join(c.name for c in in_channels)

787 sout = ', '.join(c.name for c in out_channels)

788 err.append('(%s) -> (%s): %s' % (sin, sout, e))

789

790 raise NotFound('\n'.join(err))

791

792 for tr in trs_projected:

793 sc = self.station_corrections.get(tr.nslc_id, None)

794 if sc:

795 if self.apply_correction_factors:

796 tr.ydata /= sc.factor

797

798 if self.apply_correction_delays:

799 tr.shift(-sc.delay)

800

801 if tmin is not None and tmax is not None:

802 tr.chop(tmin, tmax)

803

804 if syn_test:

805 trs_projected_synthetic = []

806 for tr in trs_projected:

807 if tr.channel == channel:

808 tr_syn = syn_test.get_waveform(

809 tr.nslc_id, tmin, tmax,

810 tfade=tfade, freqlimits=freqlimits)

811

812 if tr_syn:

813 if syn_test.real_noise_scale != 0.0:

814 tr_syn = tr_syn.copy()

815 tr_noise = tr.copy()

816 tr_noise.set_ydata(

817 tr_noise.get_ydata()

818 * syn_test.real_noise_scale)

819

820 tr_syn.add(tr_noise)

821

822 trs_projected_synthetic.append(tr_syn)

823

824 trs_projected = trs_projected_synthetic

825

826 if cache is not None:

827 for tr in trs_projected:

828 cache[tr.nslc_id + cache_k] = tr

829

830 tr_return = None

831 for tr in trs_projected:

832 if tr.channel == channel:

833 tr_return = tr

834

835 if debug:

836 return trs_projected, trs_restituted, trs_raw, tr_return

837

838 elif tr_return:

839 return tr_return

840

841 else:

842 raise NotFound(

843 'waveform not available', station.nsl() + (channel,))

844

845 except NotFound:

846 if cache is not None:

847 cache[nslc + cache_k] = None

848 raise

849

850 def get_waveform(self, obj, tinc_cache=None, **kwargs):

851 tmin = kwargs['tmin']

852 tmax = kwargs['tmax']

853 if tinc_cache is not None:

854 tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache

855 tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache

856 else:

857 tmin_r = tmin

858 tmax_r = tmax

859

860 kwargs['tmin'] = tmin_r

861 kwargs['tmax'] = tmax_r

862

863 if kwargs.get('debug', None):

864 return self._get_waveform(obj, **kwargs)

865 else:

866 tr = self._get_waveform(obj, **kwargs)

867 return tr.chop(tmin, tmax, inplace=False)

868

869 def get_events(self, magmin=None, event_names=None):

870 evs = []

871 for ev in self.events:

872 if ((magmin is None or ev.magnitude >= magmin) and

873 (event_names is None or ev.name in event_names)):

874 evs.append(ev)

875

876 return evs

877

878 def get_event_by_time(self, t, magmin=None):

879 evs = self.get_events(magmin=magmin)

880 ev_x = None

881 for ev in evs:

882 if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t):

883 ev_x = ev

884

885 if not ev_x:

886 raise NotFound(

887 'No event information matching criteria (t=%s, magmin=%s).' %

888 (t, magmin))

889

890 return ev_x

891

892 def get_event(self):

893 if self._event_name is None:

894 raise NotFound('No main event selected in dataset!')

895

896 for ev in self.events:

897 if ev.name == self._event_name:

898 return ev

899

900 raise NotFound('No such event: %s' % self._event_name)

901

902 def get_picks(self):

903 if self._picks is None:

904 hash_to_name = {}

905 names = set()

906 for marker in self.pick_markers:

907 if isinstance(marker, pmarker.EventMarker):

908 name = marker.get_event().name

909 if name in names:

910 raise DatasetError(

911 'Duplicate event name "%s" in picks.' % name)

912

913 names.add(name)

914 hash_to_name[marker.get_event_hash()] = name

915

916 for ev in self.events:

917 hash_to_name[ev.get_hash()] = ev.name

918

919 picks = {}

920 for marker in self.pick_markers:

921 if isinstance(marker, pmarker.PhaseMarker):

922 ehash = marker.get_event_hash()

923

924 nsl = marker.one_nslc()[:3]

925 phasename = marker.get_phasename()

926

927 if ehash is None or ehash not in hash_to_name:

928 raise DatasetError(

929 'Unassociated pick: %s.%s.%s, %s' %

930 (nsl + (phasename, )))

931

932 eventname = hash_to_name[ehash]

933

934 if (nsl, phasename, eventname) in picks:

935 raise DatasetError(

936 'Duplicate pick: %s.%s.%s, %s' %

937 (nsl + (phasename, )))

938

939 picks[nsl, phasename, eventname] = marker

940

941 self._picks = picks

942

943 return self._picks

944

945 def get_pick(self, eventname, obj, phasename):

946 nsl = self.get_nsl(obj)

947 return self.get_picks().get((nsl, phasename, eventname), None)

948

949

950class DatasetConfig(HasPaths):

951 ''' Configuration for a Grond `Dataset` object. '''

952

953 stations_path = Path.T(

954 optional=True,

955 help='List of files with station coordinates in Pyrocko format.')

956 stations_stationxml_paths = List.T(

957 Path.T(),

958 optional=True,

959 help='List of files with station coordinates in StationXML format.')

960 events_path = Path.T(

961 optional=True,

962 help='File with hypocenter information and possibly'

963 ' reference solution')

964 waveform_paths = List.T(

965 Path.T(),

966 optional=True,

967 help='List of directories with raw waveform data')

968 clippings_path = Path.T(

969 optional=True)

970 responses_sacpz_path = Path.T(

971 optional=True,

972 help='List of SACPZ response files for restitution of'

973 ' the raw waveform data.')

974 responses_stationxml_paths = List.T(

975 Path.T(),

976 optional=True,

977 help='List of StationXML response files for restitution of'

978 ' the raw waveform data.')

979 station_corrections_path = Path.T(

980 optional=True,

981 help='File containing station correction informations.')

982 apply_correction_factors = Bool.T(

983 optional=True,

984 default=True,

985 help='Apply correction factors from station corrections.')

986 apply_correction_delays = Bool.T(

987 optional=True,

988 default=True,

989 help='Apply correction delays from station corrections.')

990 apply_displaced_sampling_workaround = Bool.T(

991 optional=True,

992 default=False,

993 help='Work around displaced sampling issues.')

994 extend_incomplete = Bool.T(

995 default=False,

996 help='Extend incomplete seismic traces.')

997 picks_paths = List.T(

998 Path.T())

999 blacklist_paths = List.T(

1000 Path.T(),

1001 help='List of text files with blacklisted stations.')

1002 blacklist = List.T(

1003 String.T(),

1004 help='Stations/components to be excluded according to their STA, '

1005 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.')

1006 whitelist_paths = List.T(

1007 Path.T(),

1008 help='List of text files with whitelisted stations.')

1009 whitelist = List.T(

1010 String.T(),

1011 optional=True,

1012 help='If not None, list of stations/components to include according '

1013 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. '

1014 'Note: ''when whitelisting on channel level, both, the raw and '

1015 'the processed channel codes have to be listed.')

1016 synthetic_test = SyntheticTest.T(

1017 optional=True)

1018

1019 kite_scene_paths = List.T(

1020 Path.T(),

1021 optional=True,

1022 help='List of directories for the InSAR scenes.')

1023

1024 gnss_campaign_paths = List.T(

1025 Path.T(),

1026 optional=True,

1027 help='List of directories for the GNSS campaign data.')

1028

1029 def __init__(self, *args, **kwargs):

1030 HasPaths.__init__(self, *args, **kwargs)

1031 self._ds = {}

1032

1033 def get_event_names(self):

1034 logger.info('Loading events ...')

1035

1036 def extra(path):

1037 return expand_template(path, dict(

1038 event_name='*'))

1039

1040 def fp(path):

1041 return self.expand_path(path, extra=extra)

1042

1043 def check_events(events, fn):

1044 for ev in events:

1045 if not ev.name:

1046 logger.warning('Event in %s has no name!', fn)

1047 return

1048 if not ev.lat or not ev.lon:

1049 logger.warning('Event %s has inconsistent coordinates!',

1050 ev.name)

1051 if not ev.depth:

1052 logger.warning('Event %s has no depth!', ev.name)

1053 if not ev.time:

1054 logger.warning('Event %s has no time!', ev.name)

1055

1056 events = []

1057 events_path = fp(self.events_path)

1058 fns = glob.glob(events_path)

1059 if not fns:

1060 raise DatasetError('No event files matching "%s".' % events_path)

1061

1062 for fn in fns:

1063 logger.debug('Loading from file %s' % fn)

1064 ev = model.load_events(filename=fn)

1065 check_events(ev, fn)

1066

1067 events.extend(ev)

1068

1069 event_names = [ev_.name for ev_ in events]

1070 event_names.sort()

1071 return event_names

1072

1073 def get_dataset(self, event_name):

1074 if event_name not in self._ds:

1075 def extra(path):

1076 return expand_template(path, dict(

1077 event_name=event_name))

1078

1079 def fp(path):

1080 p = self.expand_path(path, extra=extra)

1081 if p is None:

1082 return None

1083

1084 if isinstance(p, list):

1085 for path in p:

1086 if not op.exists(path):

1087 logger.warning('Path %s does not exist.' % path)

1088 else:

1089 if not op.exists(p):

1090 logger.warning('Path %s does not exist.' % p)

1091

1092 return p

1093

1094 ds = Dataset(event_name)

1095 try:

1096 ds.add_events(filename=fp(self.events_path))

1097

1098 ds.add_stations(

1099 pyrocko_stations_filename=fp(self.stations_path),

1100 stationxml_filenames=fp(self.stations_stationxml_paths))

1101

1102 if self.waveform_paths:

1103 ds.add_waveforms(paths=fp(self.waveform_paths))

1104

1105 if self.kite_scene_paths:

1106 ds.add_kite_scenes(paths=fp(self.kite_scene_paths))

1107

1108 if self.gnss_campaign_paths:

1109 ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths))

1110

1111 if self.clippings_path:

1112 ds.add_clippings(markers_filename=fp(self.clippings_path))

1113

1114 if self.responses_sacpz_path:

1115 ds.add_responses(

1116 sacpz_dirname=fp(self.responses_sacpz_path))

1117

1118 if self.responses_stationxml_paths:

1119 ds.add_responses(

1120 stationxml_filenames=fp(

1121 self.responses_stationxml_paths))

1122

1123 if self.station_corrections_path:

1124 ds.add_station_corrections(

1125 filename=fp(self.station_corrections_path))

1126

1127 ds.apply_correction_factors = self.apply_correction_factors

1128 ds.apply_correction_delays = self.apply_correction_delays

1129 ds.apply_displaced_sampling_workaround = \

1130 self.apply_displaced_sampling_workaround

1131 ds.extend_incomplete = self.extend_incomplete

1132

1133 for picks_path in self.picks_paths:

1134 ds.add_picks(

1135 filename=fp(picks_path))

1136

1137 ds.add_blacklist(self.blacklist)

1138 ds.add_blacklist(filenames=fp(self.blacklist_paths))

1139 if self.whitelist:

1140 ds.add_whitelist(self.whitelist)

1141 if self.whitelist_paths:

1142 ds.add_whitelist(filenames=fp(self.whitelist_paths))

1143

1144 ds.set_synthetic_test(copy.deepcopy(self.synthetic_test))

1145 self._ds[event_name] = ds

1146 except (FileLoadError, OSError) as e:

1147 raise DatasetError(str(e))

1148

1149 return self._ds[event_name]

1150

1151

1152__all__ = '''

1153 Dataset

1154 DatasetConfig

1155 DatasetError

1156 InvalidObject

1157 NotFound

1158 StationCorrection

1159 load_station_corrections

1160 dump_station_corrections

1161'''.split()

Coverage for /usr/local/lib/python3.9/dist-packages/grond/dataset.py : 69%

674 statements 464 run 210 missing 0 excluded