Coverage for /usr/local/lib/python3.13/dist-packages/pyrocko/squirrel/tool/commands/benchmark.py: 31%
116 statements
« prev ^ index » next coverage.py v7.6.0, created at 2025-12-04 10:41 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2025-12-04 10:41 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Implementation of :app:`squirrel benchmark`.
8'''
10import time
11from pyrocko.guts import Object, Float, Int, Timestamp, List
12from pyrocko import util, guts
13from pyrocko.squirrel.error import ToolError
14from pyrocko.squirrel.base import Batch
15from pyrocko.squirrel.io.backends import mseed as sq_mseed
16from pyrocko.io import mseed
17from ..common import ldq
20headline = 'Perform benchmark tests.'
23def make_subparser(subparsers):
24 return subparsers.add_parser(
25 'benchmark',
26 help=headline,
27 description=headline + '''
29The following benchmarks are available:
31chopper-waveforms
33Test performance of waveform data reading in typical continuous waveform
34processing schemes. Use ``--dataset`` or ``--add`` to select input data and
35``--tinc`` to set a processing time-window duration. Query options ``--codes``,
36``--tmin`` and ``--tmax`` can be used to restrict the reading to a specific
37subset of the available data.
39mseed-files-parse
41Test speed of the mseed waveform decoding which is performed in the libmseed
42functions without the overhead of Squirrel's database querying. This test
43sequentially reads all files in the set-up data selection. Any query options
44are ignored in this test.
46mseed-files-disk-io
48Test speed of raw disk io without the overhead of decoding the mseed data. This
49test sequentially reads all files in the set-up data selection. Any query
50options are ignored in this test. Note: the io speed is reported under traces,
51even though no traces are decoded in this test.
53''')
56def setup(parser):
57 benchmark_choices = [
58 'chopper-waveforms',
59 'mseed-files-parse',
60 'mseed-files-disk-io']
62 parser.add_argument(
63 'benchmark',
64 metavar='BENCHMARK',
65 help='Benchmark to perform. Choices: %s.'
66 % ldq(benchmark_choices))
68 parser.add_squirrel_selection_arguments()
69 parser.add_squirrel_query_arguments(without=['kinds', 'time'])
71 parser.add_argument(
72 '--tinc',
73 dest='tinc',
74 type=guts.parse_duration,
75 metavar='DURATION',
76 default=3600.,
77 help='Set processing time interval for ```chopper``` benchmark [s].')
80class BatchInfo(Object):
81 tmin = Float.T()
82 tmax = Float.T()
83 i = Int.T()
84 n = Int.T()
86 def __str__(self):
87 return ('[%' + str(len(str(self.n))) + 'i/%i %3.0f%% %s - %s]') % (
88 self.i+1,
89 self.n,
90 (self.i+1) / self.n * 100.0,
91 util.time_to_str(self.tmin, format='%Y-%m-%d %H:%M:%S'),
92 util.time_to_str(self.tmax, format='%Y-%m-%d %H:%M:%S'))
94 @classmethod
95 def make(cls, batch):
96 return cls(
97 tmin=batch.tmin,
98 tmax=batch.tmax,
99 i=batch.i,
100 n=batch.n)
103class ThroughputHistory:
105 def __init__(self):
106 self._history = []
107 self._nbytes = 0
108 self._nsamples = 0
110 def update(self, batch=None, nbytes=None):
111 if nbytes is not None:
112 self._nbytes += nbytes
113 else:
114 self._nbytes += sum(tr.ydata.nbytes for tr in batch.traces)
115 self._nsamples += sum(tr.ydata.size for tr in batch.traces)
117 self._history.append((
118 time.time(),
119 BatchInfo.make(batch),
120 mseed.g_bytes_read,
121 self._nbytes,
122 self._nsamples))
124 def get_stats(self):
125 return ThroughputStats.make(self._history)
128def total_and_rates(label, total, rates, format=util.human_bytesize):
129 return '%s: %s (%s)' % (
130 label,
131 format(total),
132 ', '.join('%s/s' % format(rate) for rate in rates))
135class ThroughputStats(Object):
136 time = Timestamp.T()
137 batch = BatchInfo.T(optional=True)
138 nbytes_mseed = Int.T()
139 nbytes_traces = Int.T()
140 nsamples = Int.T()
141 time_averages = List.T(Float.T())
142 nbytes_mseed_rates = List.T(Float.T())
143 nbytes_traces_rates = List.T(Float.T())
144 nsamples_rates = List.T(Float.T())
146 def __str__(self):
147 return '''%s
148 %s
149 %s
150 %s''' % (
151 str(self.batch) if self.batch else '',
152 total_and_rates(
153 'mseed',
154 self.nbytes_mseed,
155 self.nbytes_mseed_rates),
156 total_and_rates(
157 'traces',
158 self.nbytes_traces,
159 self.nbytes_traces_rates),
160 total_and_rates(
161 'samples',
162 self.nsamples,
163 self.nsamples_rates,
164 format=util.human_intsize)
165 )
167 @classmethod
168 def make(cls, history, time_averages=(1., 3., 10., None)):
169 end = history[-1]
170 t, batch, nbytes_mseed, nbytes_traces, nsamples = end
171 stats = cls(
172 time=t,
173 batch=batch,
174 nbytes_mseed=nbytes_mseed,
175 nbytes_traces=nbytes_traces,
176 nsamples=nsamples)
178 begins = []
179 for time_average in time_averages:
180 if time_average is None and len(history) > 1:
181 begins.append(history[0])
182 else:
183 for i in range(len(history)-2, 0, -1):
184 if history[i][0] < t - time_average:
185 begins.append(history[i])
186 break
188 for begin in begins:
189 time_delta = end[0] - begin[0]
190 if time_delta > 0:
191 nbytes_mseed_rate, nbytes_traces_rate, nsamples_rate = [
192 (end[i] - begin[i]) / time_delta for i in range(2, 5)]
194 stats.nbytes_mseed_rates.append(nbytes_mseed_rate)
195 stats.nbytes_traces_rates.append(nbytes_traces_rate)
196 stats.nsamples_rates.append(nsamples_rate)
197 stats.time_averages.append(time_delta)
199 return stats
202def run(parser, args):
203 sq = args.make_squirrel()
205 history = ThroughputHistory()
207 with util.SignalQuitable() as quitable:
208 tlast = time.time()
210 if args.benchmark == 'chopper-waveforms':
212 for batch in sq.chopper_waveforms(
213 tinc=args.tinc,
214 **args.squirrel_query):
216 history.update(batch)
217 tnow = time.time()
218 if tnow > tlast + 1.0:
219 print(history.get_stats())
220 tlast = tnow
222 if quitable.quit_requested:
223 break
225 elif args.benchmark == 'mseed-files-parse':
226 if args.tinc != 3600. or any(
227 x is not None for x in args.squirrel_query.values()):
229 raise ToolError(
230 'Invalid options given for benchmark "%s".'
231 % args.benchmark)
233 paths = sq.get_paths(format='mseed')
234 for ipath, path in enumerate(paths):
235 nuts = list(sq_mseed.iload('mseed', path, None, ('waveform',)))
237 if not nuts:
238 continue
240 batch = Batch(
241 i=ipath,
242 n=len(paths),
243 igroup=0,
244 ngroups=0,
245 tmin=min(nut.content.tmin for nut in nuts),
246 tmax=max(nut.content.tmax for nut in nuts),
247 traces=[nut.content for nut in nuts])
249 history.update(batch)
250 tnow = time.time()
251 if tnow > tlast + 1.0:
252 print(history.get_stats())
253 tlast = tnow
255 if quitable.quit_requested:
256 break
258 elif args.benchmark == 'mseed-files-disk-io':
259 if args.tinc != 3600. or any(
260 x is not None for x in args.squirrel_query.values()):
262 raise ToolError(
263 'Invalid options given for benchmark "%s".'
264 % args.benchmark)
266 paths = sq.get_paths(format='mseed')
267 for ipath, path in enumerate(paths):
269 with open(path, 'rb') as f:
270 data = f.read()
272 batch = Batch(
273 i=ipath,
274 n=len(paths),
275 igroup=0,
276 ngroups=0,
277 tmin=0.0,
278 tmax=0.0,
279 traces=[])
281 history.update(batch, nbytes=len(data))
282 tnow = time.time()
283 if tnow > tlast + 1.0:
284 print(history.get_stats())
285 tlast = tnow
287 if quitable.quit_requested:
288 break
290 print(history.get_stats())