Coverage for /usr/local/lib/python3.13/dist-packages/pyrocko/squirrel/tool/commands/benchmark.py: 31%

1# http://pyrocko.org - GPLv3

3# The Pyrocko Developers, 21st Century

4# ---|P------/S----------~Lg----------

6'''

7Implementation of :app:`squirrel benchmark`.

8'''

10import time

11from pyrocko.guts import Object, Float, Int, Timestamp, List

12from pyrocko import util, guts

13from pyrocko.squirrel.error import ToolError

14from pyrocko.squirrel.base import Batch

15from pyrocko.squirrel.io.backends import mseed as sq_mseed

16from pyrocko.io import mseed

17from ..common import ldq

20headline = 'Perform benchmark tests.'

23def make_subparser(subparsers):

24 return subparsers.add_parser(

25 'benchmark',

26 help=headline,

27 description=headline + '''

29The following benchmarks are available:

31chopper-waveforms

33Test performance of waveform data reading in typical continuous waveform

34processing schemes. Use ``--dataset`` or ``--add`` to select input data and

35``--tinc`` to set a processing time-window duration. Query options ``--codes``,

36``--tmin`` and ``--tmax`` can be used to restrict the reading to a specific

37subset of the available data.

39mseed-files-parse

41Test speed of the mseed waveform decoding which is performed in the libmseed

42functions without the overhead of Squirrel's database querying. This test

43sequentially reads all files in the set-up data selection. Any query options

44are ignored in this test.

46mseed-files-disk-io

48Test speed of raw disk io without the overhead of decoding the mseed data. This

49test sequentially reads all files in the set-up data selection. Any query

50options are ignored in this test. Note: the io speed is reported under traces,

51even though no traces are decoded in this test.

53''')

56def setup(parser):

57 benchmark_choices = [

58 'chopper-waveforms',

59 'mseed-files-parse',

60 'mseed-files-disk-io']

62 parser.add_argument(

63 'benchmark',

64 metavar='BENCHMARK',

65 help='Benchmark to perform. Choices: %s.'

66 % ldq(benchmark_choices))

68 parser.add_squirrel_selection_arguments()

69 parser.add_squirrel_query_arguments(without=['kinds', 'time'])

71 parser.add_argument(

72 '--tinc',

73 dest='tinc',

74 type=guts.parse_duration,

75 metavar='DURATION',

76 default=3600.,

77 help='Set processing time interval for ```chopper``` benchmark [s].')

80class BatchInfo(Object):

81 tmin = Float.T()

82 tmax = Float.T()

83 i = Int.T()

84 n = Int.T()

86 def __str__(self):

87 return ('[%' + str(len(str(self.n))) + 'i/%i %3.0f%% %s - %s]') % (

88 self.i+1,

89 self.n,

90 (self.i+1) / self.n * 100.0,

91 util.time_to_str(self.tmin, format='%Y-%m-%d %H:%M:%S'),

92 util.time_to_str(self.tmax, format='%Y-%m-%d %H:%M:%S'))

94 @classmethod

95 def make(cls, batch):

96 return cls(

97 tmin=batch.tmin,

98 tmax=batch.tmax,

99 i=batch.i,

100 n=batch.n)

101

102

103class ThroughputHistory:

104

105 def __init__(self):

106 self._history = []

107 self._nbytes = 0

108 self._nsamples = 0

109

110 def update(self, batch=None, nbytes=None):

111 if nbytes is not None:

112 self._nbytes += nbytes

113 else:

114 self._nbytes += sum(tr.ydata.nbytes for tr in batch.traces)

115 self._nsamples += sum(tr.ydata.size for tr in batch.traces)

116

117 self._history.append((

118 time.time(),

119 BatchInfo.make(batch),

120 mseed.g_bytes_read,

121 self._nbytes,

122 self._nsamples))

123

124 def get_stats(self):

125 return ThroughputStats.make(self._history)

126

127

128def total_and_rates(label, total, rates, format=util.human_bytesize):

129 return '%s: %s (%s)' % (

130 label,

131 format(total),

132 ', '.join('%s/s' % format(rate) for rate in rates))

133

134

135class ThroughputStats(Object):

136 time = Timestamp.T()

137 batch = BatchInfo.T(optional=True)

138 nbytes_mseed = Int.T()

139 nbytes_traces = Int.T()

140 nsamples = Int.T()

141 time_averages = List.T(Float.T())

142 nbytes_mseed_rates = List.T(Float.T())

143 nbytes_traces_rates = List.T(Float.T())

144 nsamples_rates = List.T(Float.T())

145

146 def __str__(self):

147 return '''%s

148 %s

149 %s

150 %s''' % (

151 str(self.batch) if self.batch else '',

152 total_and_rates(

153 'mseed',

154 self.nbytes_mseed,

155 self.nbytes_mseed_rates),

156 total_and_rates(

157 'traces',

158 self.nbytes_traces,

159 self.nbytes_traces_rates),

160 total_and_rates(

161 'samples',

162 self.nsamples,

163 self.nsamples_rates,

164 format=util.human_intsize)

165 )

166

167 @classmethod

168 def make(cls, history, time_averages=(1., 3., 10., None)):

169 end = history[-1]

170 t, batch, nbytes_mseed, nbytes_traces, nsamples = end

171 stats = cls(

172 time=t,

173 batch=batch,

174 nbytes_mseed=nbytes_mseed,

175 nbytes_traces=nbytes_traces,

176 nsamples=nsamples)

177

178 begins = []

179 for time_average in time_averages:

180 if time_average is None and len(history) > 1:

181 begins.append(history[0])

182 else:

183 for i in range(len(history)-2, 0, -1):

184 if history[i][0] < t - time_average:

185 begins.append(history[i])

186 break

187

188 for begin in begins:

189 time_delta = end[0] - begin[0]

190 if time_delta > 0:

191 nbytes_mseed_rate, nbytes_traces_rate, nsamples_rate = [

192 (end[i] - begin[i]) / time_delta for i in range(2, 5)]

193

194 stats.nbytes_mseed_rates.append(nbytes_mseed_rate)

195 stats.nbytes_traces_rates.append(nbytes_traces_rate)

196 stats.nsamples_rates.append(nsamples_rate)

197 stats.time_averages.append(time_delta)

198

199 return stats

200

201

202def run(parser, args):

203 sq = args.make_squirrel()

204

205 history = ThroughputHistory()

206

207 with util.SignalQuitable() as quitable:

208 tlast = time.time()

209

210 if args.benchmark == 'chopper-waveforms':

211

212 for batch in sq.chopper_waveforms(

213 tinc=args.tinc,

214 **args.squirrel_query):

215

216 history.update(batch)

217 tnow = time.time()

218 if tnow > tlast + 1.0:

219 print(history.get_stats())

220 tlast = tnow

221

222 if quitable.quit_requested:

223 break

224

225 elif args.benchmark == 'mseed-files-parse':

226 if args.tinc != 3600. or any(

227 x is not None for x in args.squirrel_query.values()):

228

229 raise ToolError(

230 'Invalid options given for benchmark "%s".'

231 % args.benchmark)

232

233 paths = sq.get_paths(format='mseed')

234 for ipath, path in enumerate(paths):

235 nuts = list(sq_mseed.iload('mseed', path, None, ('waveform',)))

236

237 if not nuts:

238 continue

239

240 batch = Batch(

241 i=ipath,

242 n=len(paths),

243 igroup=0,

244 ngroups=0,

245 tmin=min(nut.content.tmin for nut in nuts),

246 tmax=max(nut.content.tmax for nut in nuts),

247 traces=[nut.content for nut in nuts])

248

249 history.update(batch)

250 tnow = time.time()

251 if tnow > tlast + 1.0:

252 print(history.get_stats())

253 tlast = tnow

254

255 if quitable.quit_requested:

256 break

257

258 elif args.benchmark == 'mseed-files-disk-io':

259 if args.tinc != 3600. or any(

260 x is not None for x in args.squirrel_query.values()):

261

262 raise ToolError(

263 'Invalid options given for benchmark "%s".'

264 % args.benchmark)

265

266 paths = sq.get_paths(format='mseed')

267 for ipath, path in enumerate(paths):

268

269 with open(path, 'rb') as f:

270 data = f.read()

271

272 batch = Batch(

273 i=ipath,

274 n=len(paths),

275 igroup=0,

276 ngroups=0,

277 tmin=0.0,

278 tmax=0.0,

279 traces=[])

280

281 history.update(batch, nbytes=len(data))

282 tnow = time.time()

283 if tnow > tlast + 1.0:

284 print(history.get_stats())

285 tlast = tnow

286

287 if quitable.quit_requested:

288 break

289

290 print(history.get_stats())