1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5from __future__ import absolute_import, division, print_function 

6 

7import hashlib 

8import math 

9import logging 

10import numpy as num 

11 

12from os import path as op 

13from functools import reduce 

14 

15from pyrocko.guts import StringChoice, Float, List, Bool 

16from pyrocko.gui.marker import PhaseMarker, EventMarker 

17from pyrocko import gf, model, util, trace, io 

18from pyrocko.io.io_common import FileSaveError 

19from pyrocko import pile 

20 

21from ..station import StationGenerator, RandomStationGenerator 

22from .base import TargetGenerator, NoiseGenerator 

23from ..error import ScenarioError 

24 

25 

26DEFAULT_STORE_ID = 'global_2s' 

27 

28logger = logging.getLogger('pyrocko.scenario.targets.waveform') 

29guts_prefix = 'pf.scenario' 

30 

31 

32class WaveformNoiseGenerator(NoiseGenerator): 

33 

34 def get_time_increment(self, deltat): 

35 return deltat * 1024 

36 

37 def get_intersecting_snippets(self, deltat, codes, tmin, tmax): 

38 raise NotImplementedError() 

39 

40 def add_noise(self, tr): 

41 for ntr in self.get_intersecting_snippets( 

42 tr.deltat, tr.nslc_id, tr.tmin, tr.tmax): 

43 tr.add(ntr) 

44 

45 

46class WhiteNoiseGenerator(WaveformNoiseGenerator): 

47 

48 scale = Float.T(default=1e-6) 

49 

50 def get_seed_offset2(self, deltat, iw, codes): 

51 m = hashlib.sha1(('%e %i %s.%s.%s.%s' % ((deltat, iw) + codes)) 

52 .encode('utf8')) 

53 return int(m.hexdigest(), base=16) % 10000000 

54 

55 def get_intersecting_snippets(self, deltat, codes, tmin, tmax): 

56 tinc = self.get_time_increment(deltat) 

57 iwmin = int(math.floor(tmin / tinc)) 

58 iwmax = int(math.floor(tmax / tinc)) 

59 

60 trs = [] 

61 for iw in range(iwmin, iwmax+1): 

62 seed_offset = self.get_seed_offset2(deltat, iw, codes) 

63 rstate = self.get_rstate(seed_offset) 

64 

65 n = int(round(tinc // deltat)) 

66 

67 trs.append(trace.Trace( 

68 codes[0], codes[1], codes[2], codes[3], 

69 deltat=deltat, 

70 tmin=iw*tinc, 

71 ydata=rstate.normal(loc=0, scale=self.scale, size=n))) 

72 

73 return trs 

74 

75 

76class WaveformGenerator(TargetGenerator): 

77 

78 station_generators = List.T( 

79 StationGenerator.T(), 

80 default=[RandomStationGenerator.D()], 

81 help='List of StationGenerators.') 

82 

83 noise_generator = WaveformNoiseGenerator.T( 

84 default=WhiteNoiseGenerator.D(), 

85 help='Add Synthetic noise on the waveforms.') 

86 

87 store_id = gf.StringID.T( 

88 default=DEFAULT_STORE_ID, 

89 help='The GF store to use for forward-calculations.') 

90 

91 seismogram_quantity = StringChoice.T( 

92 choices=['displacement', 'velocity', 'acceleration', 'counts'], 

93 default='displacement') 

94 

95 vmin_cut = Float.T( 

96 default=2000., 

97 help='Minimum velocity to seismic velocity to consider in the model.') 

98 vmax_cut = Float.T( 

99 default=8000., 

100 help='Maximum velocity to seismic velocity to consider in the model.') 

101 

102 fmin = Float.T( 

103 default=0.01, 

104 help='Minimum frequency/wavelength to resolve in the' 

105 ' synthetic waveforms.') 

106 

107 tabulated_phases = List.T( 

108 gf.meta.TPDef.T(), optional=True, 

109 help='Define seismic phases to be calculated.') 

110 

111 tabulated_phases_from_store = Bool.T( 

112 default=False, 

113 help='Calculate seismic phase arrivals for all travel-time tables ' 

114 'defined in GF store.') 

115 

116 tabulated_phases_noise_scale = Float.T( 

117 default=0.0, 

118 help='Standard deviation of normally distributed noise added to ' 

119 'calculated phase arrivals.') 

120 

121 taper = trace.Taper.T( 

122 optional=True, 

123 help='Time domain taper applied to synthetic waveforms.') 

124 

125 compensate_synthetic_offsets = Bool.T( 

126 default=False, 

127 help='Center synthetic trace amplitudes using mean of waveform tips.') 

128 

129 tinc = Float.T( 

130 optional=True, 

131 help='Time increment of waveforms.') 

132 

133 continuous = Bool.T( 

134 default=True, 

135 help='Only produce traces that intersect with events.') 

136 

137 def __init__(self, *args, **kwargs): 

138 super(WaveformGenerator, self).__init__(*args, **kwargs) 

139 self._targets = [] 

140 self._piles = {} 

141 

142 def _get_pile(self, path): 

143 apath = op.abspath(path) 

144 assert op.isdir(apath) 

145 

146 if apath not in self._piles: 

147 fns = util.select_files( 

148 [apath], show_progress=False) 

149 

150 p = pile.Pile() 

151 if fns: 

152 p.load_files(fns, fileformat='mseed', show_progress=False) 

153 

154 self._piles[apath] = p 

155 

156 return self._piles[apath] 

157 

158 def get_stations(self): 

159 stations = [] 

160 for station_generator in self.station_generators: 

161 stations.extend(station_generator.get_stations()) 

162 return stations 

163 

164 def get_distance_range(self, sources): 

165 distances = num.array( 

166 [sg.get_distance_range(sources) 

167 for sg in self.station_generators]) 

168 return (distances[:, 0].min(), distances[:, 1].max()) 

169 

170 def get_targets(self): 

171 if self._targets: 

172 return self._targets 

173 

174 for station in self.get_stations(): 

175 channel_data = [] 

176 channels = station.get_channels() 

177 if channels: 

178 for channel in channels: 

179 channel_data.append([ 

180 channel.name, 

181 channel.azimuth, 

182 channel.dip]) 

183 

184 else: 

185 for c_name in ['BHZ', 'BHE', 'BHN']: 

186 channel_data.append([ 

187 c_name, 

188 model.guess_azimuth_from_name(c_name), 

189 model.guess_dip_from_name(c_name)]) 

190 

191 for c_name, c_azi, c_dip in channel_data: 

192 

193 target = gf.Target( 

194 codes=( 

195 station.network, 

196 station.station, 

197 station.location, 

198 c_name), 

199 quantity='displacement', 

200 lat=station.lat, 

201 lon=station.lon, 

202 north_shift=station.north_shift, 

203 east_shift=station.east_shift, 

204 depth=station.depth, 

205 store_id=self.store_id, 

206 optimization='enable', 

207 interpolation='nearest_neighbor', 

208 azimuth=c_azi, 

209 dip=c_dip) 

210 

211 self._targets.append(target) 

212 

213 return self._targets 

214 

215 def get_time_range(self, sources): 

216 dmin, dmax = self.get_distance_range(sources) 

217 

218 times = num.array([source.time for source in sources], 

219 dtype=util.get_time_dtype()) 

220 

221 tmin_events = num.min(times) 

222 tmax_events = num.max(times) 

223 

224 tmin = tmin_events + dmin / self.vmax_cut - 10.0 / self.fmin 

225 tmax = tmax_events + dmax / self.vmin_cut + 10.0 / self.fmin 

226 

227 return tmin, tmax 

228 

229 def get_codes_to_deltat(self, engine, sources): 

230 deltats = {} 

231 

232 targets = self.get_targets() 

233 for source in sources: 

234 for target in targets: 

235 deltats[target.codes] = engine.get_store( 

236 target.store_id).config.deltat 

237 

238 return deltats 

239 

240 def get_useful_time_increment(self, engine, sources): 

241 _, dmax = self.get_distance_range(sources) 

242 tinc = dmax / self.vmin_cut + 2.0 / self.fmin 

243 

244 deltats = set(self.get_codes_to_deltat(engine, sources).values()) 

245 deltat = reduce(util.lcm, deltats) 

246 tinc = int(round(tinc / deltat)) * deltat 

247 return tinc 

248 

249 def get_relevant_sources(self, sources, tmin, tmax): 

250 dmin, dmax = self.get_distance_range(sources) 

251 trange = tmax - tmin 

252 tmax_pad = trange + tmax + dmin / self.vmax_cut 

253 tmin_pad = tmin - (dmax / self.vmin_cut + trange) 

254 

255 return [s for s in sources if s.time < tmax_pad and s.time > tmin_pad] 

256 

257 def get_waveforms(self, engine, sources, tmin, tmax): 

258 

259 sources_relevant = self.get_relevant_sources(sources, tmin, tmax) 

260 if not (self.continuous or sources_relevant): 

261 return [] 

262 

263 trs = {} 

264 tts = util.time_to_str 

265 

266 for nslc, deltat in self.get_codes_to_deltat(engine, sources).items(): 

267 tr_tmin = round(tmin / deltat) * deltat 

268 tr_tmax = (round(tmax / deltat)-1) * deltat 

269 nsamples = int(round((tr_tmax - tr_tmin) / deltat)) + 1 

270 

271 tr = trace.Trace( 

272 *nslc, 

273 tmin=tr_tmin, 

274 ydata=num.zeros(nsamples), 

275 deltat=deltat) 

276 

277 self.noise_generator.add_noise(tr) 

278 

279 trs[nslc] = tr 

280 

281 logger.debug('Forward modelling waveforms between %s - %s...' 

282 % (tts(tmin, format='%Y-%m-%d_%H-%M-%S'), 

283 tts(tmax, format='%Y-%m-%d_%H-%M-%S'))) 

284 

285 if not sources_relevant: 

286 return list(trs.values()) 

287 

288 targets = self.get_targets() 

289 response = engine.process(sources_relevant, targets) 

290 for source, target, res in response.iter_results( 

291 get='results'): 

292 

293 if isinstance(res, gf.SeismosizerError): 

294 logger.warning( 

295 'Out of bounds! \nTarget: %s\nSource: %s\n' % ( 

296 '.'.join(target.codes)), source) 

297 continue 

298 

299 tr = res.trace.pyrocko_trace() 

300 

301 candidate = trs[target.codes] 

302 if not candidate.overlaps(tr.tmin, tr.tmax): 

303 continue 

304 

305 if self.compensate_synthetic_offsets: 

306 tr.ydata -= (num.mean(tr.ydata[-3:-1]) + 

307 num.mean(tr.ydata[1:3])) / 2. 

308 

309 if self.taper: 

310 tr.taper(self.taper) 

311 

312 resp = self.get_transfer_function(target.codes) 

313 if resp: 

314 tr = tr.transfer(transfer_function=resp) 

315 

316 candidate.add(tr) 

317 trs[target.codes] = candidate 

318 

319 return list(trs.values()) 

320 

321 def get_onsets(self, engine, sources, *args, **kwargs): 

322 

323 targets = {t.codes[:3]: t for t in self.get_targets()} 

324 

325 markers = [] 

326 for source in sources: 

327 ev = source.pyrocko_event() 

328 markers.append(EventMarker(ev)) 

329 for nsl, target in targets.items(): 

330 store = engine.get_store(target.store_id) 

331 if self.tabulated_phases: 

332 tabulated_phases = self.tabulated_phases 

333 

334 elif self.tabulated_phases_from_store: 

335 tabulated_phases = store.config.tabulated_phases 

336 else: 

337 tabulated_phases = [] 

338 

339 for phase in tabulated_phases: 

340 t = store.t(phase.id, source, target) 

341 if not t: 

342 continue 

343 

344 noise_scale = self.tabulated_phases_noise_scale 

345 if noise_scale != 0.0: 

346 t += num.random.normal(scale=noise_scale) 

347 

348 t += source.time 

349 markers.append( 

350 PhaseMarker( 

351 phasename=phase.id, 

352 tmin=t, 

353 tmax=t, 

354 event=source.pyrocko_event(), 

355 nslc_ids=(nsl+('*',),) 

356 ) 

357 ) 

358 return markers 

359 

360 def get_transfer_function(self, codes): 

361 if self.seismogram_quantity == 'displacement': 

362 return None 

363 elif self.seismogram_quantity == 'velocity': 

364 return trace.DifferentiationResponse(1) 

365 elif self.seismogram_quantity == 'acceleration': 

366 return trace.DifferentiationResponse(2) 

367 elif self.seismogram_quantity == 'counts': 

368 raise NotImplementedError() 

369 

370 def ensure_data(self, engine, sources, path, tmin=None, tmax=None): 

371 self.ensure_waveforms(engine, sources, path, tmin, tmax) 

372 self.ensure_responses(path) 

373 

374 def ensure_waveforms(self, engine, sources, path, tmin=None, tmax=None): 

375 

376 path_waveforms = op.join(path, 'waveforms') 

377 util.ensuredir(path_waveforms) 

378 

379 p = self._get_pile(path_waveforms) 

380 

381 nslc_ids = set(target.codes for target in self.get_targets()) 

382 

383 def have_waveforms(tmin, tmax): 

384 trs_have = p.all( 

385 tmin=tmin, tmax=tmax, 

386 load_data=False, degap=False, 

387 trace_selector=lambda tr: tr.nslc_id in nslc_ids) 

388 

389 return any(tr.data_len() > 0 for tr in trs_have) 

390 

391 def add_files(paths): 

392 p.load_files(paths, fileformat='mseed', show_progress=False) 

393 

394 path_traces = op.join( 

395 path_waveforms, 

396 '%(wmin_year)s', 

397 '%(wmin_month)s', 

398 '%(wmin_day)s', 

399 'waveform_%(network)s_%(station)s_' + 

400 '%(location)s_%(channel)s_%(tmin)s_%(tmax)s.mseed') 

401 

402 tmin_all, tmax_all = self.get_time_range(sources) 

403 tmin = tmin if tmin is not None else tmin_all 

404 tmax = tmax if tmax is not None else tmax_all 

405 tts = util.time_to_str 

406 

407 tinc = self.tinc or self.get_useful_time_increment(engine, sources) 

408 tmin = num.floor(tmin / tinc) * tinc 

409 tmax = num.ceil(tmax / tinc) * tinc 

410 

411 nwin = int(round((tmax - tmin) / tinc)) 

412 

413 pbar = None 

414 for iwin in range(nwin): 

415 tmin_win = tmin + iwin*tinc 

416 tmax_win = tmin + (iwin+1)*tinc 

417 

418 if have_waveforms(tmin_win, tmax_win): 

419 continue 

420 

421 if pbar is None: 

422 pbar = util.progressbar('Generating waveforms', (nwin-iwin)) 

423 

424 pbar.update(iwin) 

425 

426 trs = self.get_waveforms(engine, sources, tmin_win, tmax_win) 

427 

428 try: 

429 wpaths = io.save( 

430 trs, path_traces, 

431 additional=dict( 

432 wmin_year=tts(tmin_win, format='%Y'), 

433 wmin_month=tts(tmin_win, format='%m'), 

434 wmin_day=tts(tmin_win, format='%d'), 

435 wmin=tts(tmin_win, format='%Y-%m-%d_%H-%M-%S'), 

436 wmax_year=tts(tmax_win, format='%Y'), 

437 wmax_month=tts(tmax_win, format='%m'), 

438 wmax_day=tts(tmax_win, format='%d'), 

439 wmax=tts(tmax_win, format='%Y-%m-%d_%H-%M-%S'))) 

440 

441 for wpath in wpaths: 

442 logger.debug('Generated file: %s' % wpath) 

443 

444 add_files(wpaths) 

445 

446 except FileSaveError as e: 

447 raise ScenarioError(str(e)) 

448 

449 if pbar is not None: 

450 pbar.finish() 

451 

452 def ensure_responses(self, path): 

453 from pyrocko.io import stationxml 

454 

455 path_responses = op.join(path, 'meta') 

456 util.ensuredir(path_responses) 

457 

458 fn_stationxml = op.join(path_responses, 'waveform_response.xml') 

459 i = 0 

460 while op.exists(fn_stationxml): 

461 fn_stationxml = op.join( 

462 path_responses, 'waveform_response-%s.xml' % i) 

463 i += 1 

464 

465 logger.debug('Writing waveform meta information to StationXML...') 

466 

467 stations = self.get_stations() 

468 sxml = stationxml.FDSNStationXML.from_pyrocko_stations(stations) 

469 

470 sunit = { 

471 'displacement': 'M', 

472 'velocity': 'M/S', 

473 'acceleration': 'M/S**2', 

474 'counts': 'COUNTS'}[self.seismogram_quantity] 

475 

476 response = stationxml.Response( 

477 instrument_sensitivity=stationxml.Sensitivity( 

478 value=1., 

479 frequency=1., 

480 input_units=stationxml.Units(sunit), 

481 output_units=stationxml.Units('COUNTS')), 

482 stage_list=[]) 

483 

484 for net, station, channel in sxml.iter_network_station_channels(): 

485 channel.response = response 

486 

487 sxml.dump_xml(filename=fn_stationxml) 

488 

489 def add_map_artists(self, engine, sources, automap): 

490 automap.add_stations(self.get_stations())