Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/check.py: 25%
122 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Functionality to check for common data/metadata problems.
8'''
10from pyrocko.guts import StringChoice, Object, String, List
11from pyrocko import util
13from pyrocko.squirrel.model import CodesNSLCE
14from pyrocko.squirrel.operators.base import CodesPatternFiltering
15from pyrocko.squirrel.model import codes_patterns_for_kind, to_kind_id
17guts_prefix = 'squirrel'
20def get_matching(coverages, coverage):
21 matching = []
22 for candidate in coverages:
23 if candidate.codes == coverage.codes:
24 matching.append(candidate)
26 matching.sort(
27 key=lambda c: (coverage.deltat == c.deltat, not c.deltat))
29 matching.reverse()
31 return matching
34class SquirrelCheckProblemType(StringChoice):
35 '''
36 Potential dataset/metadata problem types.
38 .. list-table:: Squirrel check problem types
39 :widths: 10 90
40 :header-rows: 1
42 * - Type
43 - Description
44%%(table)s
46 '''
48 types = {
49 'p1': 'Waveform duplicates.',
50 'p2': 'Overlaps in channel/response epochs.',
51 'p3': 'No waveforms available for a channel/response listed in '
52 'metadata.',
53 'p4': 'Channel/response information missing for an available '
54 'waveform.',
55 'p5': 'Multiple channel/response entries matching an available '
56 'waveform.',
57 'p6': 'Sampling rate of waveform does not match rate listed in '
58 'metadata.',
59 'p7': 'Waveform incompletely covered by channel/response epochs.'}
61 choices = list(types.keys())
64SquirrelCheckProblemType.__doc__ %= {
65 'table': '\n'.join('''
66 * - %s
67 - %s''' % (k, v) for (k, v) in SquirrelCheckProblemType.types.items())
68}
71class SquirrelCheckProblem(Object):
72 '''
73 Diagnostics about a potential problem reported by Squirrel check.
74 '''
75 type = SquirrelCheckProblemType.T(
76 help='Coding indicating the type of problem detected.')
77 symptom = String.T(
78 help='Short description of the problem.')
79 details = List.T(
80 String.T(),
81 help='Details about the problem.')
84class KindChoiceWCR(StringChoice):
85 choices = ['waveform', 'channel', 'response']
88class SquirrelCheckEntry(Object):
89 '''
90 Squirrel check result for a given channel/response/waveform.
91 '''
92 codes = CodesNSLCE.T(
93 help='Codes denominating a seismic channel.')
94 available = List.T(
95 KindChoiceWCR.T(),
96 help='Available content kinds.')
97 problems = List.T(
98 SquirrelCheckProblem.T(),
99 help='Potential problems detected.')
101 def get_text(self):
102 lines = []
103 lines.append(' %s: %s' % (
104 self.codes.channel
105 + ('.%s' % self.codes.extra if self.codes.extra != '' else ''),
106 ', '.join(self.available)))
108 for problem in self.problems:
109 lines.append(' - %s [%s]' % (problem.symptom, problem.type))
110 for detail in problem.details:
111 lines.append(' - %s' % detail)
113 return '\n'.join(lines)
116class SquirrelCheck(Object):
117 '''
118 Container for Squirrel check results.
119 '''
120 entries = List.T(SquirrelCheckEntry.T(), help='')
122 def get_nproblems(self):
123 '''
124 Total number of problems detected.
126 :rtype: int
127 '''
128 return sum(len(entry.problems) for entry in self.entries)
130 def get_summary(self):
131 '''
132 Textual summary of check result.
134 :rtype: str
135 '''
136 nproblems = self.get_nproblems()
137 lines = []
138 lines.append('%i potential problem%s discovered.' % (
139 nproblems, util.plural_s(nproblems)))
141 by_type = {}
142 for entry in self.entries:
143 for problem in entry.problems:
144 t = problem.type
145 if t not in by_type:
146 by_type[t] = 0
148 by_type[t] += 1
150 for t in sorted(by_type.keys()):
151 lines.append(' %5i [%s]: %s' % (
152 by_type[t], t, SquirrelCheckProblemType.types[t]))
154 return '\n'.join(lines)
156 def get_text(self, verbosity=0):
157 '''
158 Textual representation of check result.
160 :param verbosity:
161 Set verbosity level.
162 :type verbosity:
163 int
165 :rtype: str
166 '''
167 lines = []
168 by_nsl = {}
169 for entry in self.entries:
170 nsl = entry.codes.codes_nsl
171 if nsl not in by_nsl:
172 by_nsl[nsl] = []
174 by_nsl[nsl].append(entry)
176 for nsl in sorted(by_nsl.keys()):
177 entries_this = by_nsl[nsl]
178 nproblems = sum(len(entry.problems) for entry in entries_this)
179 ok = nproblems == 0
180 if ok and verbosity >= 1:
181 lines.append('')
182 lines.append('%s: ok' % str(nsl))
184 if not ok:
185 lines.append('')
186 lines.append('%s: %i potential problem%s' % (
187 str(nsl),
188 nproblems,
189 util.plural_s(nproblems)))
191 if not ok or verbosity >= 2:
192 for entry in entries_this:
193 lines.append(entry.get_text())
195 if self.get_nproblems() > 0 or verbosity >= 1:
196 lines.append('')
197 lines.append(self.get_summary())
199 return '\n'.join(lines)
202def do_check(squirrel, codes=None, tmin=None, tmax=None, time=None, ignore=[]):
203 '''
204 Check for common data/metadata problems.
206 :param squirrel:
207 The Squirrel instance to be checked.
208 :type squirrel:
209 :py:class:`~pyrocko.squirrel.base.Squirrel`
211 :param tmin:
212 Start time of query interval.
213 :type tmin:
214 :py:func:`pyrocko.util.get_time_float`
216 :param tmax:
217 End time of query interval.
218 :type tmax:
219 :py:func:`pyrocko.util.get_time_float`
221 :param time:
222 Time instant to query. Equivalent to setting ``tmin`` and ``tmax``
223 to the same value.
224 :type time:
225 :py:func:`pyrocko.util.get_time_float`
227 :param codes:
228 Pattern of channel codes to query.
229 :type codes:
230 :class:`list` of :py:class:`~pyrocko.squirrel.model.CodesNSLCE`
231 objects
233 :param ignore:
234 Problem types to be ignored.
235 :type ignore:
236 :class:`list` of :class:`str` (:py:class:`SquirrelCheckProblemType`)
238 :returns:
239 :py:class:`SquirrelCheck` object containing the results of the check.
240 '''
242 codes_set = set()
243 for kind in ['waveform', 'channel', 'response']:
244 if codes is not None:
245 codes_pat = codes_patterns_for_kind(to_kind_id(kind), codes)
246 else:
247 codes_pat = None
249 codes_filter = CodesPatternFiltering(codes=codes_pat)
250 codes_set.update(
251 codes_filter.filter(squirrel.get_codes(kind=kind)))
253 entries = []
254 for codes_ in list(sorted(codes_set)):
255 problems = []
256 coverage = {}
257 for kind in ['waveform', 'channel', 'response']:
258 coverage[kind] = squirrel.get_coverage(
259 kind,
260 codes=[codes_],
261 tmin=tmin if tmin is not None else time,
262 tmax=tmax if tmax is not None else time)
264 available = [
265 kind for kind in ['waveform', 'channel', 'response']
266 if coverage[kind] and any(
267 cov.total is not None for cov in coverage[kind])]
269 for kind in ['waveform']:
270 for cov in coverage[kind]:
271 if any(count > 1 for (_, count) in cov.changes):
272 problems.append(SquirrelCheckProblem(
273 type='p1',
274 symptom='%s: %s' % (kind, 'duplicates')))
276 for kind in ['channel', 'response']:
277 for cov in coverage[kind]:
278 if any(count > 1 for (_, count) in cov.changes):
279 problems.append(SquirrelCheckProblem(
280 type='p2',
281 symptom='%s: %s' % (kind, 'overlapping epochs')))
283 if 'waveform' not in available:
284 problems.append(SquirrelCheckProblem(
285 type='p3',
286 symptom='no waveforms'))
288 for cw in coverage['waveform']:
289 for kind in ['channel', 'response']:
290 ccs = get_matching(coverage[kind], cw)
291 if not ccs:
292 problems.append(SquirrelCheckProblem(
293 type='p4',
294 symptom='no %s information' % kind))
296 elif len(ccs) > 1:
297 problems.append(SquirrelCheckProblem(
298 type='p5',
299 symptom='multiple %s matches (waveform: %g Hz, %s: %s)'
300 % (kind, 1.0 / cw.deltat, kind, ', '.join(
301 '%g Hz' % (1.0 / cc.deltat)
302 if cc.deltat else '? Hz' for cc in ccs))))
304 if ccs:
305 cc = ccs[0]
306 if cc.deltat and cc.deltat != cw.deltat:
307 problems.append(SquirrelCheckProblem(
308 type='p6',
309 symptom='sampling rate mismatch '
310 '(waveform: %g Hz, %s: %g Hz)' % (
311 1.0 / cw.deltat, kind, 1.0 / cc.deltat)))
313 uncovered_spans = list(cw.iter_uncovered_by_combined(cc))
314 if uncovered_spans:
315 problems.append(SquirrelCheckProblem(
316 type='p7',
317 symptom='incompletely covered by %s:' % kind,
318 details=[
319 '%s - %s' % (
320 util.time_to_str(span[0]),
321 util.time_to_str(span[1]))
322 for span in uncovered_spans]))
324 entries.append(SquirrelCheckEntry(
325 codes=codes_,
326 available=available,
327 problems=[p for p in problems if p.type not in ignore]))
329 return SquirrelCheck(entries=entries)
332__all__ = [
333 'SquirrelCheckProblemType',
334 'SquirrelCheckProblem',
335 'SquirrelCheckEntry',
336 'SquirrelCheck',
337 'do_check']