Coverage for /usr/local/lib/python3.13/dist-packages/pyrocko/squirrel/tool/commands/benchmark.py: 31%

116 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2025-12-04 10:41 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Implementation of :app:`squirrel benchmark`. 

8''' 

9 

10import time 

11from pyrocko.guts import Object, Float, Int, Timestamp, List 

12from pyrocko import util, guts 

13from pyrocko.squirrel.error import ToolError 

14from pyrocko.squirrel.base import Batch 

15from pyrocko.squirrel.io.backends import mseed as sq_mseed 

16from pyrocko.io import mseed 

17from ..common import ldq 

18 

19 

20headline = 'Perform benchmark tests.' 

21 

22 

23def make_subparser(subparsers): 

24 return subparsers.add_parser( 

25 'benchmark', 

26 help=headline, 

27 description=headline + ''' 

28 

29The following benchmarks are available: 

30 

31chopper-waveforms 

32 

33Test performance of waveform data reading in typical continuous waveform 

34processing schemes. Use ``--dataset`` or ``--add`` to select input data and 

35``--tinc`` to set a processing time-window duration. Query options ``--codes``, 

36``--tmin`` and ``--tmax`` can be used to restrict the reading to a specific 

37subset of the available data. 

38 

39mseed-files-parse 

40 

41Test speed of the mseed waveform decoding which is performed in the libmseed 

42functions without the overhead of Squirrel's database querying. This test 

43sequentially reads all files in the set-up data selection. Any query options 

44are ignored in this test. 

45 

46mseed-files-disk-io 

47 

48Test speed of raw disk io without the overhead of decoding the mseed data. This 

49test sequentially reads all files in the set-up data selection. Any query 

50options are ignored in this test. Note: the io speed is reported under traces, 

51even though no traces are decoded in this test. 

52 

53''') 

54 

55 

56def setup(parser): 

57 benchmark_choices = [ 

58 'chopper-waveforms', 

59 'mseed-files-parse', 

60 'mseed-files-disk-io'] 

61 

62 parser.add_argument( 

63 'benchmark', 

64 metavar='BENCHMARK', 

65 help='Benchmark to perform. Choices: %s.' 

66 % ldq(benchmark_choices)) 

67 

68 parser.add_squirrel_selection_arguments() 

69 parser.add_squirrel_query_arguments(without=['kinds', 'time']) 

70 

71 parser.add_argument( 

72 '--tinc', 

73 dest='tinc', 

74 type=guts.parse_duration, 

75 metavar='DURATION', 

76 default=3600., 

77 help='Set processing time interval for ```chopper``` benchmark [s].') 

78 

79 

80class BatchInfo(Object): 

81 tmin = Float.T() 

82 tmax = Float.T() 

83 i = Int.T() 

84 n = Int.T() 

85 

86 def __str__(self): 

87 return ('[%' + str(len(str(self.n))) + 'i/%i %3.0f%% %s - %s]') % ( 

88 self.i+1, 

89 self.n, 

90 (self.i+1) / self.n * 100.0, 

91 util.time_to_str(self.tmin, format='%Y-%m-%d %H:%M:%S'), 

92 util.time_to_str(self.tmax, format='%Y-%m-%d %H:%M:%S')) 

93 

94 @classmethod 

95 def make(cls, batch): 

96 return cls( 

97 tmin=batch.tmin, 

98 tmax=batch.tmax, 

99 i=batch.i, 

100 n=batch.n) 

101 

102 

103class ThroughputHistory: 

104 

105 def __init__(self): 

106 self._history = [] 

107 self._nbytes = 0 

108 self._nsamples = 0 

109 

110 def update(self, batch=None, nbytes=None): 

111 if nbytes is not None: 

112 self._nbytes += nbytes 

113 else: 

114 self._nbytes += sum(tr.ydata.nbytes for tr in batch.traces) 

115 self._nsamples += sum(tr.ydata.size for tr in batch.traces) 

116 

117 self._history.append(( 

118 time.time(), 

119 BatchInfo.make(batch), 

120 mseed.g_bytes_read, 

121 self._nbytes, 

122 self._nsamples)) 

123 

124 def get_stats(self): 

125 return ThroughputStats.make(self._history) 

126 

127 

128def total_and_rates(label, total, rates, format=util.human_bytesize): 

129 return '%s: %s (%s)' % ( 

130 label, 

131 format(total), 

132 ', '.join('%s/s' % format(rate) for rate in rates)) 

133 

134 

135class ThroughputStats(Object): 

136 time = Timestamp.T() 

137 batch = BatchInfo.T(optional=True) 

138 nbytes_mseed = Int.T() 

139 nbytes_traces = Int.T() 

140 nsamples = Int.T() 

141 time_averages = List.T(Float.T()) 

142 nbytes_mseed_rates = List.T(Float.T()) 

143 nbytes_traces_rates = List.T(Float.T()) 

144 nsamples_rates = List.T(Float.T()) 

145 

146 def __str__(self): 

147 return '''%s 

148 %s 

149 %s 

150 %s''' % ( 

151 str(self.batch) if self.batch else '', 

152 total_and_rates( 

153 'mseed', 

154 self.nbytes_mseed, 

155 self.nbytes_mseed_rates), 

156 total_and_rates( 

157 'traces', 

158 self.nbytes_traces, 

159 self.nbytes_traces_rates), 

160 total_and_rates( 

161 'samples', 

162 self.nsamples, 

163 self.nsamples_rates, 

164 format=util.human_intsize) 

165 ) 

166 

167 @classmethod 

168 def make(cls, history, time_averages=(1., 3., 10., None)): 

169 end = history[-1] 

170 t, batch, nbytes_mseed, nbytes_traces, nsamples = end 

171 stats = cls( 

172 time=t, 

173 batch=batch, 

174 nbytes_mseed=nbytes_mseed, 

175 nbytes_traces=nbytes_traces, 

176 nsamples=nsamples) 

177 

178 begins = [] 

179 for time_average in time_averages: 

180 if time_average is None and len(history) > 1: 

181 begins.append(history[0]) 

182 else: 

183 for i in range(len(history)-2, 0, -1): 

184 if history[i][0] < t - time_average: 

185 begins.append(history[i]) 

186 break 

187 

188 for begin in begins: 

189 time_delta = end[0] - begin[0] 

190 if time_delta > 0: 

191 nbytes_mseed_rate, nbytes_traces_rate, nsamples_rate = [ 

192 (end[i] - begin[i]) / time_delta for i in range(2, 5)] 

193 

194 stats.nbytes_mseed_rates.append(nbytes_mseed_rate) 

195 stats.nbytes_traces_rates.append(nbytes_traces_rate) 

196 stats.nsamples_rates.append(nsamples_rate) 

197 stats.time_averages.append(time_delta) 

198 

199 return stats 

200 

201 

202def run(parser, args): 

203 sq = args.make_squirrel() 

204 

205 history = ThroughputHistory() 

206 

207 with util.SignalQuitable() as quitable: 

208 tlast = time.time() 

209 

210 if args.benchmark == 'chopper-waveforms': 

211 

212 for batch in sq.chopper_waveforms( 

213 tinc=args.tinc, 

214 **args.squirrel_query): 

215 

216 history.update(batch) 

217 tnow = time.time() 

218 if tnow > tlast + 1.0: 

219 print(history.get_stats()) 

220 tlast = tnow 

221 

222 if quitable.quit_requested: 

223 break 

224 

225 elif args.benchmark == 'mseed-files-parse': 

226 if args.tinc != 3600. or any( 

227 x is not None for x in args.squirrel_query.values()): 

228 

229 raise ToolError( 

230 'Invalid options given for benchmark "%s".' 

231 % args.benchmark) 

232 

233 paths = sq.get_paths(format='mseed') 

234 for ipath, path in enumerate(paths): 

235 nuts = list(sq_mseed.iload('mseed', path, None, ('waveform',))) 

236 

237 if not nuts: 

238 continue 

239 

240 batch = Batch( 

241 i=ipath, 

242 n=len(paths), 

243 igroup=0, 

244 ngroups=0, 

245 tmin=min(nut.content.tmin for nut in nuts), 

246 tmax=max(nut.content.tmax for nut in nuts), 

247 traces=[nut.content for nut in nuts]) 

248 

249 history.update(batch) 

250 tnow = time.time() 

251 if tnow > tlast + 1.0: 

252 print(history.get_stats()) 

253 tlast = tnow 

254 

255 if quitable.quit_requested: 

256 break 

257 

258 elif args.benchmark == 'mseed-files-disk-io': 

259 if args.tinc != 3600. or any( 

260 x is not None for x in args.squirrel_query.values()): 

261 

262 raise ToolError( 

263 'Invalid options given for benchmark "%s".' 

264 % args.benchmark) 

265 

266 paths = sq.get_paths(format='mseed') 

267 for ipath, path in enumerate(paths): 

268 

269 with open(path, 'rb') as f: 

270 data = f.read() 

271 

272 batch = Batch( 

273 i=ipath, 

274 n=len(paths), 

275 igroup=0, 

276 ngroups=0, 

277 tmin=0.0, 

278 tmax=0.0, 

279 traces=[]) 

280 

281 history.update(batch, nbytes=len(data)) 

282 tnow = time.time() 

283 if tnow > tlast + 1.0: 

284 print(history.get_stats()) 

285 tlast = tnow 

286 

287 if quitable.quit_requested: 

288 break 

289 

290 print(history.get_stats())