1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, print_function
7from pyrocko import util
9headline = 'Check dataset consistency.'
12def get_matching(coverages, coverage):
13 matching = []
14 for candidate in coverages:
15 if candidate.codes == coverage.codes:
16 matching.append(candidate)
18 matching.sort(
19 key=lambda c: (coverage.deltat == c.deltat, not c.deltat))
21 matching.reverse()
23 return matching
26def make_subparser(subparsers):
27 return subparsers.add_parser(
28 'check',
29 help=headline,
30 description=headline + '''
32A report listing potential data/metadata problems for a given data collection
33is printed to standard output. The following problems are detected:
35 [E1] Overlaps in channel/response epochs, waveform duplicates.
36 [E2] No waveforms available for a channel/response listed in metadata.
37 [E3] Channel/response information missing for an available waveform.
38 [E4] Multiple channel/response entries matching an available waveform.
39 [E5] Sampling rate of waveform does not match rate listed in metadata.
40 [E6] Waveform is not incompletely covered by channel/response epochs.
42''')
45def setup(parser):
46 parser.add_squirrel_selection_arguments()
49def run(parser, args):
50 squirrel = args.make_squirrel()
52 codes_set = set()
53 for kind in ['waveform', 'channel', 'response']:
54 codes_set.update(squirrel.get_codes(kind=kind))
56 nsl = None
57 problems = False
58 lines = []
59 for codes in list(sorted(codes_set)):
60 nsl_this = codes.codes_nsl
61 if nsl is None or nsl != nsl_this:
62 if lines:
63 if problems:
64 print('\n'.join(lines) + '\n')
65 else:
66 print(lines[0] + ' ok' + '\n')
68 lines = []
69 problems = False
70 lines.append('%s:' % str(nsl_this))
72 nsl = nsl_this
74 coverage = {}
75 for kind in ['waveform', 'channel', 'response']:
76 coverage[kind] = squirrel.get_coverage(kind, codes=[codes])
78 available = [
79 kind for kind in ['waveform', 'channel', 'response']
80 if coverage[kind]]
82 lines.append(
83 ' %s: %s' % (
84 codes.channel
85 + ('.%s' % codes.extra if codes.extra != '' else ''),
86 ', '.join(available)))
88 for kind in ['waveform', 'channel', 'response']:
89 for cov in coverage[kind]:
90 if any(count > 1 for (_, count) in cov.changes):
91 problems = True
92 lines.append(' - %s: %s [E1]' % (
93 kind,
94 'duplicates'
95 if kind == 'waveform' else
96 'overlapping epochs'))
98 if 'waveform' not in available:
99 problems = True
100 lines.append(' - no waveforms [E2]')
102 for cw in coverage['waveform']:
103 for kind in ['channel', 'response']:
104 ccs = get_matching(coverage[kind], cw)
105 if not ccs:
106 problems = True
107 lines.append(' - no %s information [E3]' % kind)
109 elif len(ccs) > 1:
110 problems = True
111 lines.append(
112 ' - multiple %s matches (waveform: %g Hz, %s: %s) '
113 '[E4]' % (kind, 1.0 / cw.deltat, kind, ', '.join(
114 '%g Hz' % (1.0 / cc.deltat)
115 if cc.deltat else '? Hz' for cc in ccs)))
117 if ccs:
118 cc = ccs[0]
119 if cc.deltat and cc.deltat != cw.deltat:
120 lines.append(
121 ' - sampling rate mismatch '
122 '(waveform %g Hz, %s: %g Hz) [E5]' % (
123 1.0 / cw.deltat, kind, 1.0 / cc.deltat))
125 uncovered_spans = list(cw.iter_uncovered_by_combined(cc))
126 if uncovered_spans:
127 problems = True
128 lines.append(
129 ' - incompletely covered by %s [E6]:' % kind)
131 for span in uncovered_spans:
132 lines.append(
133 ' - %s - %s' % (
134 util.time_to_str(span[0]),
135 util.time_to_str(span[1])))
137 if problems:
138 print('\n'.join(lines) + '\n')