Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/check.py: 25%

122 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-04 09:52 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Functionality to check for common data/metadata problems. 

8''' 

9 

10from pyrocko.guts import StringChoice, Object, String, List 

11from pyrocko import util 

12 

13from pyrocko.squirrel.model import CodesNSLCE 

14from pyrocko.squirrel.operators.base import CodesPatternFiltering 

15from pyrocko.squirrel.model import codes_patterns_for_kind, to_kind_id 

16 

17guts_prefix = 'squirrel' 

18 

19 

20def get_matching(coverages, coverage): 

21 matching = [] 

22 for candidate in coverages: 

23 if candidate.codes == coverage.codes: 

24 matching.append(candidate) 

25 

26 matching.sort( 

27 key=lambda c: (coverage.deltat == c.deltat, not c.deltat)) 

28 

29 matching.reverse() 

30 

31 return matching 

32 

33 

34class SquirrelCheckProblemType(StringChoice): 

35 ''' 

36 Potential dataset/metadata problem types. 

37 

38 .. list-table:: Squirrel check problem types 

39 :widths: 10 90 

40 :header-rows: 1 

41 

42 * - Type 

43 - Description 

44%%(table)s 

45 

46 ''' 

47 

48 types = { 

49 'p1': 'Waveform duplicates.', 

50 'p2': 'Overlaps in channel/response epochs.', 

51 'p3': 'No waveforms available for a channel/response listed in ' 

52 'metadata.', 

53 'p4': 'Channel/response information missing for an available ' 

54 'waveform.', 

55 'p5': 'Multiple channel/response entries matching an available ' 

56 'waveform.', 

57 'p6': 'Sampling rate of waveform does not match rate listed in ' 

58 'metadata.', 

59 'p7': 'Waveform incompletely covered by channel/response epochs.'} 

60 

61 choices = list(types.keys()) 

62 

63 

64SquirrelCheckProblemType.__doc__ %= { 

65 'table': '\n'.join(''' 

66 * - %s 

67 - %s''' % (k, v) for (k, v) in SquirrelCheckProblemType.types.items()) 

68} 

69 

70 

71class SquirrelCheckProblem(Object): 

72 ''' 

73 Diagnostics about a potential problem reported by Squirrel check. 

74 ''' 

75 type = SquirrelCheckProblemType.T( 

76 help='Coding indicating the type of problem detected.') 

77 symptom = String.T( 

78 help='Short description of the problem.') 

79 details = List.T( 

80 String.T(), 

81 help='Details about the problem.') 

82 

83 

84class KindChoiceWCR(StringChoice): 

85 choices = ['waveform', 'channel', 'response'] 

86 

87 

88class SquirrelCheckEntry(Object): 

89 ''' 

90 Squirrel check result for a given channel/response/waveform. 

91 ''' 

92 codes = CodesNSLCE.T( 

93 help='Codes denominating a seismic channel.') 

94 available = List.T( 

95 KindChoiceWCR.T(), 

96 help='Available content kinds.') 

97 problems = List.T( 

98 SquirrelCheckProblem.T(), 

99 help='Potential problems detected.') 

100 

101 def get_text(self): 

102 lines = [] 

103 lines.append(' %s: %s' % ( 

104 self.codes.channel 

105 + ('.%s' % self.codes.extra if self.codes.extra != '' else ''), 

106 ', '.join(self.available))) 

107 

108 for problem in self.problems: 

109 lines.append(' - %s [%s]' % (problem.symptom, problem.type)) 

110 for detail in problem.details: 

111 lines.append(' - %s' % detail) 

112 

113 return '\n'.join(lines) 

114 

115 

116class SquirrelCheck(Object): 

117 ''' 

118 Container for Squirrel check results. 

119 ''' 

120 entries = List.T(SquirrelCheckEntry.T(), help='') 

121 

122 def get_nproblems(self): 

123 ''' 

124 Total number of problems detected. 

125 

126 :rtype: int 

127 ''' 

128 return sum(len(entry.problems) for entry in self.entries) 

129 

130 def get_summary(self): 

131 ''' 

132 Textual summary of check result. 

133 

134 :rtype: str 

135 ''' 

136 nproblems = self.get_nproblems() 

137 lines = [] 

138 lines.append('%i potential problem%s discovered.' % ( 

139 nproblems, util.plural_s(nproblems))) 

140 

141 by_type = {} 

142 for entry in self.entries: 

143 for problem in entry.problems: 

144 t = problem.type 

145 if t not in by_type: 

146 by_type[t] = 0 

147 

148 by_type[t] += 1 

149 

150 for t in sorted(by_type.keys()): 

151 lines.append(' %5i [%s]: %s' % ( 

152 by_type[t], t, SquirrelCheckProblemType.types[t])) 

153 

154 return '\n'.join(lines) 

155 

156 def get_text(self, verbosity=0): 

157 ''' 

158 Textual representation of check result. 

159 

160 :param verbosity: 

161 Set verbosity level. 

162 :type verbosity: 

163 int 

164 

165 :rtype: str 

166 ''' 

167 lines = [] 

168 by_nsl = {} 

169 for entry in self.entries: 

170 nsl = entry.codes.codes_nsl 

171 if nsl not in by_nsl: 

172 by_nsl[nsl] = [] 

173 

174 by_nsl[nsl].append(entry) 

175 

176 for nsl in sorted(by_nsl.keys()): 

177 entries_this = by_nsl[nsl] 

178 nproblems = sum(len(entry.problems) for entry in entries_this) 

179 ok = nproblems == 0 

180 if ok and verbosity >= 1: 

181 lines.append('') 

182 lines.append('%s: ok' % str(nsl)) 

183 

184 if not ok: 

185 lines.append('') 

186 lines.append('%s: %i potential problem%s' % ( 

187 str(nsl), 

188 nproblems, 

189 util.plural_s(nproblems))) 

190 

191 if not ok or verbosity >= 2: 

192 for entry in entries_this: 

193 lines.append(entry.get_text()) 

194 

195 if self.get_nproblems() > 0 or verbosity >= 1: 

196 lines.append('') 

197 lines.append(self.get_summary()) 

198 

199 return '\n'.join(lines) 

200 

201 

202def do_check(squirrel, codes=None, tmin=None, tmax=None, time=None, ignore=[]): 

203 ''' 

204 Check for common data/metadata problems. 

205 

206 :param squirrel: 

207 The Squirrel instance to be checked. 

208 :type squirrel: 

209 :py:class:`~pyrocko.squirrel.base.Squirrel` 

210 

211 :param tmin: 

212 Start time of query interval. 

213 :type tmin: 

214 :py:func:`pyrocko.util.get_time_float` 

215 

216 :param tmax: 

217 End time of query interval. 

218 :type tmax: 

219 :py:func:`pyrocko.util.get_time_float` 

220 

221 :param time: 

222 Time instant to query. Equivalent to setting ``tmin`` and ``tmax`` 

223 to the same value. 

224 :type time: 

225 :py:func:`pyrocko.util.get_time_float` 

226 

227 :param codes: 

228 Pattern of channel codes to query. 

229 :type codes: 

230 :class:`list` of :py:class:`~pyrocko.squirrel.model.CodesNSLCE` 

231 objects 

232 

233 :param ignore: 

234 Problem types to be ignored. 

235 :type ignore: 

236 :class:`list` of :class:`str` (:py:class:`SquirrelCheckProblemType`) 

237 

238 :returns: 

239 :py:class:`SquirrelCheck` object containing the results of the check. 

240 ''' 

241 

242 codes_set = set() 

243 for kind in ['waveform', 'channel', 'response']: 

244 if codes is not None: 

245 codes_pat = codes_patterns_for_kind(to_kind_id(kind), codes) 

246 else: 

247 codes_pat = None 

248 

249 codes_filter = CodesPatternFiltering(codes=codes_pat) 

250 codes_set.update( 

251 codes_filter.filter(squirrel.get_codes(kind=kind))) 

252 

253 entries = [] 

254 for codes_ in list(sorted(codes_set)): 

255 problems = [] 

256 coverage = {} 

257 for kind in ['waveform', 'channel', 'response']: 

258 coverage[kind] = squirrel.get_coverage( 

259 kind, 

260 codes=[codes_], 

261 tmin=tmin if tmin is not None else time, 

262 tmax=tmax if tmax is not None else time) 

263 

264 available = [ 

265 kind for kind in ['waveform', 'channel', 'response'] 

266 if coverage[kind] and any( 

267 cov.total is not None for cov in coverage[kind])] 

268 

269 for kind in ['waveform']: 

270 for cov in coverage[kind]: 

271 if any(count > 1 for (_, count) in cov.changes): 

272 problems.append(SquirrelCheckProblem( 

273 type='p1', 

274 symptom='%s: %s' % (kind, 'duplicates'))) 

275 

276 for kind in ['channel', 'response']: 

277 for cov in coverage[kind]: 

278 if any(count > 1 for (_, count) in cov.changes): 

279 problems.append(SquirrelCheckProblem( 

280 type='p2', 

281 symptom='%s: %s' % (kind, 'overlapping epochs'))) 

282 

283 if 'waveform' not in available: 

284 problems.append(SquirrelCheckProblem( 

285 type='p3', 

286 symptom='no waveforms')) 

287 

288 for cw in coverage['waveform']: 

289 for kind in ['channel', 'response']: 

290 ccs = get_matching(coverage[kind], cw) 

291 if not ccs: 

292 problems.append(SquirrelCheckProblem( 

293 type='p4', 

294 symptom='no %s information' % kind)) 

295 

296 elif len(ccs) > 1: 

297 problems.append(SquirrelCheckProblem( 

298 type='p5', 

299 symptom='multiple %s matches (waveform: %g Hz, %s: %s)' 

300 % (kind, 1.0 / cw.deltat, kind, ', '.join( 

301 '%g Hz' % (1.0 / cc.deltat) 

302 if cc.deltat else '? Hz' for cc in ccs)))) 

303 

304 if ccs: 

305 cc = ccs[0] 

306 if cc.deltat and cc.deltat != cw.deltat: 

307 problems.append(SquirrelCheckProblem( 

308 type='p6', 

309 symptom='sampling rate mismatch ' 

310 '(waveform: %g Hz, %s: %g Hz)' % ( 

311 1.0 / cw.deltat, kind, 1.0 / cc.deltat))) 

312 

313 uncovered_spans = list(cw.iter_uncovered_by_combined(cc)) 

314 if uncovered_spans: 

315 problems.append(SquirrelCheckProblem( 

316 type='p7', 

317 symptom='incompletely covered by %s:' % kind, 

318 details=[ 

319 '%s - %s' % ( 

320 util.time_to_str(span[0]), 

321 util.time_to_str(span[1])) 

322 for span in uncovered_spans])) 

323 

324 entries.append(SquirrelCheckEntry( 

325 codes=codes_, 

326 available=available, 

327 problems=[p for p in problems if p.type not in ignore])) 

328 

329 return SquirrelCheck(entries=entries) 

330 

331 

332__all__ = [ 

333 'SquirrelCheckProblemType', 

334 'SquirrelCheckProblem', 

335 'SquirrelCheckEntry', 

336 'SquirrelCheck', 

337 'do_check']