1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7from pyrocko import util 

8 

9headline = 'Check dataset consistency.' 

10 

11 

12def get_matching(coverages, coverage): 

13 matching = [] 

14 for candidate in coverages: 

15 if candidate.codes == coverage.codes: 

16 matching.append(candidate) 

17 

18 matching.sort( 

19 key=lambda c: (coverage.deltat == c.deltat, not c.deltat)) 

20 

21 matching.reverse() 

22 

23 return matching 

24 

25 

26def make_subparser(subparsers): 

27 return subparsers.add_parser( 

28 'check', 

29 help=headline, 

30 description=headline + ''' 

31 

32A report listing potential data/metadata problems for a given data collection 

33is printed to standard output. The following problems are detected: 

34 

35 [E1] Overlaps in channel/response epochs, waveform duplicates. 

36 [E2] No waveforms available for a channel/response listed in metadata. 

37 [E3] Channel/response information missing for an available waveform. 

38 [E4] Multiple channel/response entries matching an available waveform. 

39 [E5] Sampling rate of waveform does not match rate listed in metadata. 

40 [E6] Waveform is not incompletely covered by channel/response epochs. 

41 

42''') 

43 

44 

45def setup(parser): 

46 parser.add_squirrel_selection_arguments() 

47 

48 

49def run(parser, args): 

50 squirrel = args.make_squirrel() 

51 

52 codes_set = set() 

53 for kind in ['waveform', 'channel', 'response']: 

54 codes_set.update(squirrel.get_codes(kind=kind)) 

55 

56 nsl = None 

57 problems = False 

58 lines = [] 

59 for codes in list(sorted(codes_set)): 

60 nsl_this = codes.codes_nsl 

61 if nsl is None or nsl != nsl_this: 

62 if lines: 

63 if problems: 

64 print('\n'.join(lines) + '\n') 

65 else: 

66 print(lines[0] + ' ok' + '\n') 

67 

68 lines = [] 

69 problems = False 

70 lines.append('%s:' % str(nsl_this)) 

71 

72 nsl = nsl_this 

73 

74 coverage = {} 

75 for kind in ['waveform', 'channel', 'response']: 

76 coverage[kind] = squirrel.get_coverage(kind, codes=[codes]) 

77 

78 available = [ 

79 kind for kind in ['waveform', 'channel', 'response'] 

80 if coverage[kind]] 

81 

82 lines.append( 

83 ' %s: %s' % ( 

84 codes.channel 

85 + ('.%s' % codes.extra if codes.extra != '' else ''), 

86 ', '.join(available))) 

87 

88 for kind in ['waveform', 'channel', 'response']: 

89 for cov in coverage[kind]: 

90 if any(count > 1 for (_, count) in cov.changes): 

91 problems = True 

92 lines.append(' - %s: %s [E1]' % ( 

93 kind, 

94 'duplicates' 

95 if kind == 'waveform' else 

96 'overlapping epochs')) 

97 

98 if 'waveform' not in available: 

99 problems = True 

100 lines.append(' - no waveforms [E2]') 

101 

102 for cw in coverage['waveform']: 

103 for kind in ['channel', 'response']: 

104 ccs = get_matching(coverage[kind], cw) 

105 if not ccs: 

106 problems = True 

107 lines.append(' - no %s information [E3]' % kind) 

108 

109 elif len(ccs) > 1: 

110 problems = True 

111 lines.append( 

112 ' - multiple %s matches (waveform: %g Hz, %s: %s) ' 

113 '[E4]' % (kind, 1.0 / cw.deltat, kind, ', '.join( 

114 '%g Hz' % (1.0 / cc.deltat) 

115 if cc.deltat else '? Hz' for cc in ccs))) 

116 

117 if ccs: 

118 cc = ccs[0] 

119 if cc.deltat and cc.deltat != cw.deltat: 

120 lines.append( 

121 ' - sampling rate mismatch ' 

122 '(waveform %g Hz, %s: %g Hz) [E5]' % ( 

123 1.0 / cw.deltat, kind, 1.0 / cc.deltat)) 

124 

125 uncovered_spans = list(cw.iter_uncovered_by_combined(cc)) 

126 if uncovered_spans: 

127 problems = True 

128 lines.append( 

129 ' - incompletely covered by %s [E6]:' % kind) 

130 

131 for span in uncovered_spans: 

132 lines.append( 

133 ' - %s - %s' % ( 

134 util.time_to_str(span[0]), 

135 util.time_to_str(span[1]))) 

136 

137 if problems: 

138 print('\n'.join(lines) + '\n')