1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from pyrocko.guts import StringChoice, Object, String, List 

7from pyrocko import util 

8 

9from pyrocko.squirrel.model import CodesNSLCE 

10from pyrocko.squirrel.operators.base import CodesPatternFiltering 

11from pyrocko.squirrel.model import codes_patterns_for_kind, to_kind_id 

12 

13guts_prefix = 'squirrel' 

14 

15 

16def get_matching(coverages, coverage): 

17 matching = [] 

18 for candidate in coverages: 

19 if candidate.codes == coverage.codes: 

20 matching.append(candidate) 

21 

22 matching.sort( 

23 key=lambda c: (coverage.deltat == c.deltat, not c.deltat)) 

24 

25 matching.reverse() 

26 

27 return matching 

28 

29 

30class SquirrelCheckProblemType(StringChoice): 

31 ''' 

32 Potential dataset/metadata problem types. 

33 

34 .. list-table:: Squirrel check problem types 

35 :widths: 10 90 

36 :header-rows: 1 

37 

38 * - Type 

39 - Description 

40%%(table)s 

41 

42 ''' 

43 

44 types = { 

45 'p1': 'Waveform duplicates.', 

46 'p2': 'Overlaps in channel/response epochs.', 

47 'p3': 'No waveforms available for a channel/response listed in ' 

48 'metadata.', 

49 'p4': 'Channel/response information missing for an available ' 

50 'waveform.', 

51 'p5': 'Multiple channel/response entries matching an available ' 

52 'waveform.', 

53 'p6': 'Sampling rate of waveform does not match rate listed in ' 

54 'metadata.', 

55 'p7': 'Waveform incompletely covered by channel/response epochs.'} 

56 

57 choices = list(types.keys()) 

58 

59 

60SquirrelCheckProblemType.__doc__ %= { 

61 'table': '\n'.join(''' 

62 * - %s 

63 - %s''' % (k, v) for (k, v) in SquirrelCheckProblemType.types.items()) 

64} 

65 

66 

67class SquirrelCheckProblem(Object): 

68 ''' 

69 Diagnostics about a potential problem reported by Squirrel check. 

70 ''' 

71 type = SquirrelCheckProblemType.T( 

72 help='Coding indicating the type of problem detected.') 

73 symptom = String.T( 

74 help='Short description of the problem.') 

75 details = List.T( 

76 String.T(), 

77 help='Details about the problem.') 

78 

79 

80class KindChoiceWCR(StringChoice): 

81 choices = ['waveform', 'channel', 'response'] 

82 

83 

84class SquirrelCheckEntry(Object): 

85 ''' 

86 Squirrel check result for a given channel/response/waveform. 

87 ''' 

88 codes = CodesNSLCE.T( 

89 help='Codes denominating a seismic channel.') 

90 available = List.T( 

91 KindChoiceWCR.T(), 

92 help='Available content kinds.') 

93 problems = List.T( 

94 SquirrelCheckProblem.T(), 

95 help='Potential problems detected.') 

96 

97 def get_text(self): 

98 lines = [] 

99 lines.append(' %s: %s' % ( 

100 self.codes.channel 

101 + ('.%s' % self.codes.extra if self.codes.extra != '' else ''), 

102 ', '.join(self.available))) 

103 

104 for problem in self.problems: 

105 lines.append(' - %s [%s]' % (problem.symptom, problem.type)) 

106 for detail in problem.details: 

107 lines.append(' - %s' % detail) 

108 

109 return '\n'.join(lines) 

110 

111 

112class SquirrelCheck(Object): 

113 ''' 

114 Container for Squirrel check results. 

115 ''' 

116 entries = List.T(SquirrelCheckEntry.T(), help='') 

117 

118 def get_nproblems(self): 

119 ''' 

120 Total number of problems detected. 

121 

122 :rtype: int 

123 ''' 

124 return sum(len(entry.problems) for entry in self.entries) 

125 

126 def get_summary(self): 

127 ''' 

128 Textual summary of check result. 

129 

130 :rtype: str 

131 ''' 

132 nproblems = self.get_nproblems() 

133 lines = [] 

134 lines.append('%i potential problem%s discovered.' % ( 

135 nproblems, util.plural_s(nproblems))) 

136 

137 by_type = {} 

138 for entry in self.entries: 

139 for problem in entry.problems: 

140 t = problem.type 

141 if t not in by_type: 

142 by_type[t] = 0 

143 

144 by_type[t] += 1 

145 

146 for t in sorted(by_type.keys()): 

147 lines.append(' %5i [%s]: %s' % ( 

148 by_type[t], t, SquirrelCheckProblemType.types[t])) 

149 

150 return '\n'.join(lines) 

151 

152 def get_text(self, verbosity=0): 

153 ''' 

154 Textual representation of check result. 

155 

156 :param verbosity: 

157 Set verbosity level. 

158 :type verbosity: 

159 int 

160 

161 :rtype: str 

162 ''' 

163 lines = [] 

164 by_nsl = {} 

165 for entry in self.entries: 

166 nsl = entry.codes.codes_nsl 

167 if nsl not in by_nsl: 

168 by_nsl[nsl] = [] 

169 

170 by_nsl[nsl].append(entry) 

171 

172 for nsl in sorted(by_nsl.keys()): 

173 entries_this = by_nsl[nsl] 

174 nproblems = sum(len(entry.problems) for entry in entries_this) 

175 ok = nproblems == 0 

176 if ok and verbosity >= 1: 

177 lines.append('') 

178 lines.append('%s: ok' % str(nsl)) 

179 

180 if not ok: 

181 lines.append('') 

182 lines.append('%s: %i potential problem%s' % ( 

183 str(nsl), 

184 nproblems, 

185 util.plural_s(nproblems))) 

186 

187 if not ok or verbosity >= 2: 

188 for entry in entries_this: 

189 lines.append(entry.get_text()) 

190 

191 if self.get_nproblems() > 0 or verbosity >= 1: 

192 lines.append('') 

193 lines.append(self.get_summary()) 

194 

195 return '\n'.join(lines) 

196 

197 

198def do_check(squirrel, codes=None, tmin=None, tmax=None, time=None, ignore=[]): 

199 ''' 

200 Check for common data/metadata problems. 

201 

202 :param squirrel: 

203 The Squirrel instance to be checked. 

204 :type squirrel: 

205 :py:class:`~pyrocko.squirrel.Squirrel` 

206 

207 :param tmin: 

208 Start time of query interval. 

209 :type tmin: 

210 timestamp 

211 

212 :param tmax: 

213 End time of query interval. 

214 :type tmax: 

215 timestamp 

216 

217 :param time: 

218 Time instant to query. Equivalent to setting ``tmin`` and ``tmax`` 

219 to the same value. 

220 :type time: 

221 timestamp 

222 

223 :param codes: 

224 Pattern of channel codes to query. 

225 :type codes: 

226 :class:`list` of :py:class:`~pyrocko.squirrel.model.CodesNSLCE` 

227 objects 

228 

229 :param ignore: 

230 Problem types to be ignored. 

231 :type ignore: 

232 :class:`list` of :class:`str` (:py:class:`SquirrelCheckProblemType`) 

233 

234 :returns: 

235 :py:class:`SquirrelCheck` object containing the results of the check. 

236 ''' 

237 

238 codes_set = set() 

239 for kind in ['waveform', 'channel', 'response']: 

240 if codes is not None: 

241 codes_pat = codes_patterns_for_kind(to_kind_id(kind), codes) 

242 else: 

243 codes_pat = None 

244 

245 codes_filter = CodesPatternFiltering(codes=codes_pat) 

246 codes_set.update( 

247 codes_filter.filter(squirrel.get_codes(kind=kind))) 

248 

249 entries = [] 

250 for codes_ in list(sorted(codes_set)): 

251 problems = [] 

252 coverage = {} 

253 for kind in ['waveform', 'channel', 'response']: 

254 coverage[kind] = squirrel.get_coverage( 

255 kind, 

256 codes=[codes_], 

257 tmin=tmin if tmin is not None else time, 

258 tmax=tmax if tmax is not None else time) 

259 

260 available = [ 

261 kind for kind in ['waveform', 'channel', 'response'] 

262 if coverage[kind] and any( 

263 cov.total is not None for cov in coverage[kind])] 

264 

265 for kind in ['waveform']: 

266 for cov in coverage[kind]: 

267 if any(count > 1 for (_, count) in cov.changes): 

268 problems.append(SquirrelCheckProblem( 

269 type='p1', 

270 symptom='%s: %s' % (kind, 'duplicates'))) 

271 

272 for kind in ['channel', 'response']: 

273 for cov in coverage[kind]: 

274 if any(count > 1 for (_, count) in cov.changes): 

275 problems.append(SquirrelCheckProblem( 

276 type='p2', 

277 symptom='%s: %s' % (kind, 'overlapping epochs'))) 

278 

279 if 'waveform' not in available: 

280 problems.append(SquirrelCheckProblem( 

281 type='p3', 

282 symptom='no waveforms')) 

283 

284 for cw in coverage['waveform']: 

285 for kind in ['channel', 'response']: 

286 ccs = get_matching(coverage[kind], cw) 

287 if not ccs: 

288 problems.append(SquirrelCheckProblem( 

289 type='p4', 

290 symptom='no %s information' % kind)) 

291 

292 elif len(ccs) > 1: 

293 problems.append(SquirrelCheckProblem( 

294 type='p5', 

295 symptom='multiple %s matches (waveform: %g Hz, %s: %s)' 

296 % (kind, 1.0 / cw.deltat, kind, ', '.join( 

297 '%g Hz' % (1.0 / cc.deltat) 

298 if cc.deltat else '? Hz' for cc in ccs)))) 

299 

300 if ccs: 

301 cc = ccs[0] 

302 if cc.deltat and cc.deltat != cw.deltat: 

303 problems.append(SquirrelCheckProblem( 

304 type='p6', 

305 symptom='sampling rate mismatch ' 

306 '(waveform: %g Hz, %s: %g Hz)' % ( 

307 1.0 / cw.deltat, kind, 1.0 / cc.deltat))) 

308 

309 uncovered_spans = list(cw.iter_uncovered_by_combined(cc)) 

310 if uncovered_spans: 

311 problems.append(SquirrelCheckProblem( 

312 type='p7', 

313 symptom='incompletely covered by %s:' % kind, 

314 details=[ 

315 '%s - %s' % ( 

316 util.time_to_str(span[0]), 

317 util.time_to_str(span[1])) 

318 for span in uncovered_spans])) 

319 

320 entries.append(SquirrelCheckEntry( 

321 codes=codes_, 

322 available=available, 

323 problems=[p for p in problems if p.type not in ignore])) 

324 

325 return SquirrelCheck(entries=entries) 

326 

327 

328__all__ = [ 

329 'SquirrelCheckProblemType', 

330 'SquirrelCheckProblem', 

331 'SquirrelCheckEntry', 

332 'SquirrelCheck', 

333 'do_check']