1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from pyrocko.guts import StringChoice, Object, String, List
7from pyrocko import util
9from pyrocko.squirrel.model import CodesNSLCE
10from pyrocko.squirrel.operators.base import CodesPatternFiltering
11from pyrocko.squirrel.model import codes_patterns_for_kind, to_kind_id
13guts_prefix = 'squirrel'
16def get_matching(coverages, coverage):
17 matching = []
18 for candidate in coverages:
19 if candidate.codes == coverage.codes:
20 matching.append(candidate)
22 matching.sort(
23 key=lambda c: (coverage.deltat == c.deltat, not c.deltat))
25 matching.reverse()
27 return matching
30class SquirrelCheckProblemType(StringChoice):
31 '''
32 Potential dataset/metadata problem types.
34 .. list-table:: Squirrel check problem types
35 :widths: 10 90
36 :header-rows: 1
38 * - Type
39 - Description
40%%(table)s
42 '''
44 types = {
45 'p1': 'Waveform duplicates.',
46 'p2': 'Overlaps in channel/response epochs.',
47 'p3': 'No waveforms available for a channel/response listed in '
48 'metadata.',
49 'p4': 'Channel/response information missing for an available '
50 'waveform.',
51 'p5': 'Multiple channel/response entries matching an available '
52 'waveform.',
53 'p6': 'Sampling rate of waveform does not match rate listed in '
54 'metadata.',
55 'p7': 'Waveform incompletely covered by channel/response epochs.'}
57 choices = list(types.keys())
60SquirrelCheckProblemType.__doc__ %= {
61 'table': '\n'.join('''
62 * - %s
63 - %s''' % (k, v) for (k, v) in SquirrelCheckProblemType.types.items())
64}
67class SquirrelCheckProblem(Object):
68 '''
69 Diagnostics about a potential problem reported by Squirrel check.
70 '''
71 type = SquirrelCheckProblemType.T(
72 help='Coding indicating the type of problem detected.')
73 symptom = String.T(
74 help='Short description of the problem.')
75 details = List.T(
76 String.T(),
77 help='Details about the problem.')
80class KindChoiceWCR(StringChoice):
81 choices = ['waveform', 'channel', 'response']
84class SquirrelCheckEntry(Object):
85 '''
86 Squirrel check result for a given channel/response/waveform.
87 '''
88 codes = CodesNSLCE.T(
89 help='Codes denominating a seismic channel.')
90 available = List.T(
91 KindChoiceWCR.T(),
92 help='Available content kinds.')
93 problems = List.T(
94 SquirrelCheckProblem.T(),
95 help='Potential problems detected.')
97 def get_text(self):
98 lines = []
99 lines.append(' %s: %s' % (
100 self.codes.channel
101 + ('.%s' % self.codes.extra if self.codes.extra != '' else ''),
102 ', '.join(self.available)))
104 for problem in self.problems:
105 lines.append(' - %s [%s]' % (problem.symptom, problem.type))
106 for detail in problem.details:
107 lines.append(' - %s' % detail)
109 return '\n'.join(lines)
112class SquirrelCheck(Object):
113 '''
114 Container for Squirrel check results.
115 '''
116 entries = List.T(SquirrelCheckEntry.T(), help='')
118 def get_nproblems(self):
119 '''
120 Total number of problems detected.
122 :rtype: int
123 '''
124 return sum(len(entry.problems) for entry in self.entries)
126 def get_summary(self):
127 '''
128 Textual summary of check result.
130 :rtype: str
131 '''
132 nproblems = self.get_nproblems()
133 lines = []
134 lines.append('%i potential problem%s discovered.' % (
135 nproblems, util.plural_s(nproblems)))
137 by_type = {}
138 for entry in self.entries:
139 for problem in entry.problems:
140 t = problem.type
141 if t not in by_type:
142 by_type[t] = 0
144 by_type[t] += 1
146 for t in sorted(by_type.keys()):
147 lines.append(' %5i [%s]: %s' % (
148 by_type[t], t, SquirrelCheckProblemType.types[t]))
150 return '\n'.join(lines)
152 def get_text(self, verbosity=0):
153 '''
154 Textual representation of check result.
156 :param verbosity:
157 Set verbosity level.
158 :type verbosity:
159 int
161 :rtype: str
162 '''
163 lines = []
164 by_nsl = {}
165 for entry in self.entries:
166 nsl = entry.codes.codes_nsl
167 if nsl not in by_nsl:
168 by_nsl[nsl] = []
170 by_nsl[nsl].append(entry)
172 for nsl in sorted(by_nsl.keys()):
173 entries_this = by_nsl[nsl]
174 nproblems = sum(len(entry.problems) for entry in entries_this)
175 ok = nproblems == 0
176 if ok and verbosity >= 1:
177 lines.append('')
178 lines.append('%s: ok' % str(nsl))
180 if not ok:
181 lines.append('')
182 lines.append('%s: %i potential problem%s' % (
183 str(nsl),
184 nproblems,
185 util.plural_s(nproblems)))
187 if not ok or verbosity >= 2:
188 for entry in entries_this:
189 lines.append(entry.get_text())
191 if self.get_nproblems() > 0 or verbosity >= 1:
192 lines.append('')
193 lines.append(self.get_summary())
195 return '\n'.join(lines)
198def do_check(squirrel, codes=None, tmin=None, tmax=None, time=None, ignore=[]):
199 '''
200 Check for common data/metadata problems.
202 :param squirrel:
203 The Squirrel instance to be checked.
204 :type squirrel:
205 :py:class:`~pyrocko.squirrel.Squirrel`
207 :param tmin:
208 Start time of query interval.
209 :type tmin:
210 timestamp
212 :param tmax:
213 End time of query interval.
214 :type tmax:
215 timestamp
217 :param time:
218 Time instant to query. Equivalent to setting ``tmin`` and ``tmax``
219 to the same value.
220 :type time:
221 timestamp
223 :param codes:
224 Pattern of channel codes to query.
225 :type codes:
226 :class:`list` of :py:class:`~pyrocko.squirrel.model.CodesNSLCE`
227 objects
229 :param ignore:
230 Problem types to be ignored.
231 :type ignore:
232 :class:`list` of :class:`str` (:py:class:`SquirrelCheckProblemType`)
234 :returns:
235 :py:class:`SquirrelCheck` object containing the results of the check.
236 '''
238 codes_set = set()
239 for kind in ['waveform', 'channel', 'response']:
240 if codes is not None:
241 codes_pat = codes_patterns_for_kind(to_kind_id(kind), codes)
242 else:
243 codes_pat = None
245 codes_filter = CodesPatternFiltering(codes=codes_pat)
246 codes_set.update(
247 codes_filter.filter(squirrel.get_codes(kind=kind)))
249 entries = []
250 for codes_ in list(sorted(codes_set)):
251 problems = []
252 coverage = {}
253 for kind in ['waveform', 'channel', 'response']:
254 coverage[kind] = squirrel.get_coverage(
255 kind,
256 codes=[codes_],
257 tmin=tmin if tmin is not None else time,
258 tmax=tmax if tmax is not None else time)
260 available = [
261 kind for kind in ['waveform', 'channel', 'response']
262 if coverage[kind] and any(
263 cov.total is not None for cov in coverage[kind])]
265 for kind in ['waveform']:
266 for cov in coverage[kind]:
267 if any(count > 1 for (_, count) in cov.changes):
268 problems.append(SquirrelCheckProblem(
269 type='p1',
270 symptom='%s: %s' % (kind, 'duplicates')))
272 for kind in ['channel', 'response']:
273 for cov in coverage[kind]:
274 if any(count > 1 for (_, count) in cov.changes):
275 problems.append(SquirrelCheckProblem(
276 type='p2',
277 symptom='%s: %s' % (kind, 'overlapping epochs')))
279 if 'waveform' not in available:
280 problems.append(SquirrelCheckProblem(
281 type='p3',
282 symptom='no waveforms'))
284 for cw in coverage['waveform']:
285 for kind in ['channel', 'response']:
286 ccs = get_matching(coverage[kind], cw)
287 if not ccs:
288 problems.append(SquirrelCheckProblem(
289 type='p4',
290 symptom='no %s information' % kind))
292 elif len(ccs) > 1:
293 problems.append(SquirrelCheckProblem(
294 type='p5',
295 symptom='multiple %s matches (waveform: %g Hz, %s: %s)'
296 % (kind, 1.0 / cw.deltat, kind, ', '.join(
297 '%g Hz' % (1.0 / cc.deltat)
298 if cc.deltat else '? Hz' for cc in ccs))))
300 if ccs:
301 cc = ccs[0]
302 if cc.deltat and cc.deltat != cw.deltat:
303 problems.append(SquirrelCheckProblem(
304 type='p6',
305 symptom='sampling rate mismatch '
306 '(waveform: %g Hz, %s: %g Hz)' % (
307 1.0 / cw.deltat, kind, 1.0 / cc.deltat)))
309 uncovered_spans = list(cw.iter_uncovered_by_combined(cc))
310 if uncovered_spans:
311 problems.append(SquirrelCheckProblem(
312 type='p7',
313 symptom='incompletely covered by %s:' % kind,
314 details=[
315 '%s - %s' % (
316 util.time_to_str(span[0]),
317 util.time_to_str(span[1]))
318 for span in uncovered_spans]))
320 entries.append(SquirrelCheckEntry(
321 codes=codes_,
322 available=available,
323 problems=[p for p in problems if p.type not in ignore]))
325 return SquirrelCheck(entries=entries)
328__all__ = [
329 'SquirrelCheckProblemType',
330 'SquirrelCheckProblem',
331 'SquirrelCheckEntry',
332 'SquirrelCheck',
333 'do_check']