Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/table.py: 82%
342 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 06:59 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-06 06:59 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7A slim table-like data structure for when pandas are too fat.
8'''
10import math
11import numpy as num
12from pyrocko.guts import Object, String, Unicode, List, Int, SObject, Any
13from pyrocko.guts_array import Array
14from pyrocko import geometry, cake
15from pyrocko import orthodrome as od
16from pyrocko.util import num_full
19guts_prefix = 'pf'
22def nextpow2(i):
23 return 2**int(math.ceil(math.log(i)/math.log(2.)))
26def ncols(arr):
27 return 1 if arr.ndim == 1 else arr.shape[1]
30def nrows(arr):
31 return arr.shape[0]
34def resize_shape(shape, n):
35 return (n, ) if len(shape) == 1 else (n, shape[1])
38class DType(SObject):
39 '''
40 Guts placeholder for :py:class:`numpy.dtype`.
41 '''
42 dummy_for = num.dtype
43 dummy_for_description = 'numpy.dtype'
46class SubHeader(Object):
47 name = String.T()
48 unit = Unicode.T(optional=True)
49 default = Any.T(optional=True)
50 label = Unicode.T(optional=True)
52 def __init__(self, name, unit=None, default=None, label=None, **kwargs):
53 Object.__init__(
54 self, name=name, unit=unit, default=default, label=label, **kwargs)
56 def get_caption(self):
57 s = self.label or self.name
58 if self.unit:
59 s += ' [%s]' % self.unit
61 return s
63 def get_ncols(self):
64 return 1
67class Header(SubHeader):
68 sub_headers = List.T(SubHeader.T())
69 dtype = DType.T(default=num.dtype('float64'), optional=True)
71 def __init__(
72 self, name,
73 unit=None,
74 sub_headers=[],
75 dtype=None,
76 default=None,
77 label=None):
79 sub_headers = [anything_to_sub_header(sh) for sh in sub_headers]
81 kwargs = dict(sub_headers=sub_headers, dtype=dtype)
83 SubHeader.__init__(self, name, unit, default, label, **kwargs)
85 def get_ncols(self):
86 return max(1, len(self.sub_headers))
88 def default_array(self, nrows):
89 val = self.dtype(self.default)
90 if not self.sub_headers:
91 return num_full((nrows,), val, dtype=self.dtype)
92 else:
93 return num_full((nrows, self.get_ncols()), val, dtype=self.dtype)
96def anything_to_header(args):
97 if isinstance(args, Header):
98 return args
99 elif isinstance(args, str):
100 return Header(name=args)
101 elif isinstance(args, tuple):
102 return Header(*args)
103 else:
104 raise ValueError('argument of type Header, str or tuple expected')
107def anything_to_sub_header(args):
108 if isinstance(args, SubHeader):
109 return args
110 elif isinstance(args, str):
111 return SubHeader(name=args)
112 elif isinstance(args, tuple):
113 return SubHeader(*args)
114 else:
115 raise ValueError('argument of type SubHeader, str or tuple expected')
118class Description(Object):
119 name = String.T(optional=True)
120 headers = List.T(Header.T())
121 nrows = Int.T()
122 ncols = Int.T()
124 def __init__(self, table=None, **kwargs):
125 if table:
126 Object.__init__(
127 self,
128 name=table._name,
129 headers=table._headers,
130 nrows=table.get_nrows(),
131 ncols=table.get_ncols())
132 else:
133 Object.__init__(self, **kwargs)
136class NoSuchRecipe(Exception):
137 pass
140class NoSuchCol(Exception):
141 pass
144class Recipe(Object):
146 def __init__(self):
147 self._table = None
148 self._table = Table()
150 self._required_headers = []
151 self._headers = []
152 self._col_update_map = {}
153 self._name_to_header = {}
155 def has_col(self, name):
156 return name in self._name_to_header
158 def get_col_names(self):
159 names = []
160 for h in self._headers:
161 names.append(h.name)
162 for sh in h.sub_headers:
163 names.append(sh.name)
165 return names
167 def get_table(self):
168 return self._table
170 def get_header(self, name):
171 try:
172 return self._name_to_header[name]
173 except KeyError:
174 for h in self._required_headers:
175 if h.name == name:
176 return h
178 raise KeyError(name)
180 def _add_required_cols(self, table):
181 for h in self._headers:
182 if not table.has_col(h.name):
183 table.add_col(h)
185 def _update_col(self, table, name):
186 if not self._table.has_col(name):
187 self._col_update_map[name](table)
189 def _add_rows_handler(self, table, nrows_added):
190 pass
192 def _register_required_col(self, header):
193 self._required_headers.append(header)
195 def _register_computed_col(self, header, updater):
196 self._headers.append(header)
197 self._name_to_header[header.name] = header
198 self._col_update_map[header.name] = updater
199 for sh in header.sub_headers:
200 self._col_update_map[sh.name] = updater
201 self._name_to_header[sh.name] = sh
204class Table(Object):
206 description__ = Description.T()
207 arrays__ = List.T(Array.T(serialize_as='base64+meta'))
208 recipes__ = List.T(Recipe.T())
210 def __init__(
211 self,
212 name=None,
213 nrows_capacity=None,
214 nrows_capacity_min=0,
215 description=None,
216 arrays=None,
217 recipes=[]):
219 Object.__init__(self, init_props=False)
221 self._name = name
222 self._buffers = []
223 self._arrays = []
224 self._headers = []
225 self._cols = {}
226 self._recipes = []
227 self.nrows_capacity_min = nrows_capacity_min
228 self._nrows_capacity = 0
229 if nrows_capacity is not None:
230 self.set_nrows_capacity(max(nrows_capacity, nrows_capacity_min))
232 if description and arrays:
233 self.T.get_property('arrays').validate(
234 arrays, regularize=True, depth=0)
235 self._name = description.name
236 self.add_cols(description.headers, arrays)
237 for recipe in recipes:
238 self.add_recipe(recipe)
240 @property
241 def description(self):
242 return self.get_description()
244 @property
245 def arrays(self):
246 return self._arrays
248 @property
249 def recipes(self):
250 return self._recipes
252 def add_recipe(self, recipe):
253 self._recipes.append(recipe)
254 # recipe._add_required_cols(self)
256 def get_nrows(self):
257 if not self._arrays:
258 return 0
259 else:
260 return nrows(self._arrays[0])
262 def get_nrows_capacity(self):
263 return self._nrows_capacity
265 def set_nrows_capacity(self, nrows_capacity_new):
266 if self.get_nrows_capacity() != nrows_capacity_new:
267 if self.get_nrows() > nrows_capacity_new:
268 raise ValueError('new capacity too small to hold current data')
270 new_buffers = []
271 for buf in self._buffers:
272 shape = resize_shape(buf.shape, nrows_capacity_new)
273 new_buffers.append(num.zeros(shape, dtype=buf.dtype))
275 ncopy = min(self.get_nrows(), nrows_capacity_new)
277 new_arrays = []
278 for arr, buf in zip(self._arrays, new_buffers):
279 buf[:ncopy, ...] = arr[:ncopy, ...]
280 new_arrays.append(buf[:ncopy, ...])
282 self._buffers = new_buffers
283 self._arrays = new_arrays
284 self._nrows_capacity = nrows_capacity_new
286 def get_ncols(self):
287 return len(self._arrays)
289 def add_col(self, header, array=None):
290 header = anything_to_header(header)
292 nrows_current = self.get_nrows()
293 if array is None:
294 array = header.default_array(nrows_current)
296 array = num.asarray(array)
298 assert header.get_ncols() == ncols(array)
299 assert array.ndim in (1, 2)
300 if self._arrays:
301 assert nrows(array) == nrows_current
303 if nrows_current == 0:
304 nrows_current = nrows(array)
305 self.set_nrows_capacity(
306 max(nrows_current, self.nrows_capacity_min))
308 iarr = len(self._arrays)
310 shape = resize_shape(array.shape, self.get_nrows_capacity())
311 if shape != array.shape:
312 buf = num.zeros(shape, dtype=array.dtype)
313 buf[:nrows_current, ...] = array[:, ...]
314 else:
315 buf = array
317 self._buffers.append(buf)
318 self._arrays.append(buf[:nrows_current, ...])
319 self._headers.append(header)
321 self._cols[header.name] = iarr, None
323 for icol, sub_header in enumerate(header.sub_headers):
324 self._cols[sub_header.name] = iarr, icol
326 def add_cols(self, headers, arrays=None):
327 if arrays is None:
328 arrays = [None] * len(headers)
330 for header, array in zip(headers, arrays):
331 self.add_col(header, array)
333 def add_rows(self, arrays):
334 assert self.get_ncols() == len(arrays)
335 arrays = [num.asarray(arr) for arr in arrays]
337 nrows_add = nrows(arrays[0])
338 nrows_current = self.get_nrows()
339 nrows_new = nrows_current + nrows_add
340 if self.get_nrows_capacity() < nrows_new:
341 self.set_nrows_capacity(max(
342 self.nrows_capacity_min, nextpow2(nrows_new)))
344 new_arrays = []
345 for buf, arr in zip(self._buffers, arrays):
346 assert ncols(arr) == ncols(buf)
347 assert nrows(arr) == nrows_add
348 buf[nrows_current:nrows_new, ...] = arr[:, ...]
349 new_arrays.append(buf[:nrows_new, ...])
351 self._arrays = new_arrays
353 for recipe in self._recipes:
354 recipe._add_rows_handler(self, nrows_add)
356 def get_col(self, name, mask=slice(None)):
357 if name in self._cols:
358 if isinstance(mask, str):
359 mask = self.get_col(mask)
361 iarr, icol = self._cols[name]
362 if icol is None:
363 return self._arrays[iarr][mask]
364 else:
365 return self._arrays[iarr][mask, icol]
366 else:
367 recipe = self.get_recipe_for_col(name)
368 recipe._update_col(self, name)
370 return recipe.get_table().get_col(name, mask)
372 def get_header(self, name):
373 if name in self._cols:
374 iarr, icol = self._cols[name]
375 if icol is None:
376 return self._headers[iarr]
377 else:
378 return self._headers[iarr].sub_headers[icol]
379 else:
380 recipe = self.get_recipe_for_col(name)
381 return recipe.get_header(name)
383 def has_col(self, name):
384 return name in self._cols or \
385 any(rec.has_col(name) for rec in self._recipes)
387 def get_col_names(self, sub_headers=True):
388 names = []
389 for h in self._headers:
390 names.append(h.name)
391 if sub_headers:
392 for sh in h.sub_headers:
393 names.append(sh.name)
395 for recipe in self._recipes:
396 names.extend(recipe.get_col_names())
398 return names
400 def get_recipe_for_col(self, name):
401 for recipe in self._recipes:
402 if recipe.has_col(name):
403 return recipe
405 raise NoSuchCol(name)
407 def get_description(self):
408 return Description(self)
410 def get_as_text(self):
411 scols = []
412 formats = {
413 num.dtype('float64'): '%e'}
415 for name in self.get_col_names(sub_headers=False):
416 array = self.get_col(name)
417 header = self.get_header(name)
418 fmt = formats.get(array.dtype, '%s')
419 if array.ndim == 1:
420 scol = [header.get_caption(), '']
421 for val in array:
422 scol.append(fmt % val)
424 scols.append(scol)
425 else:
426 for icol in range(ncols(array)):
427 sub_header = header.sub_headers[icol]
428 scol = [header.get_caption(), sub_header.get_caption()]
429 for val in array[:, icol]:
430 scol.append(fmt % val)
432 scols.append(scol)
434 for scol in scols:
435 width = max(len(s) for s in scol)
436 for i in range(len(scol)):
437 scol[i] = scol[i].rjust(width)
439 return '\n'.join(' '.join(s for s in srow) for srow in zip(*scols))
441 def add_computed_col(self, header, func):
442 header = anything_to_header(header)
443 self.add_recipe(SimpleRecipe(header, func))
445 def get_row(self, irow):
446 if irow == -1:
447 return [arr[-1:] for arr in self.arrays]
448 else:
449 return [arr[irow:irow+1] for arr in self.arrays]
452class SimpleRecipe(Recipe):
454 def __init__(self, header, func):
455 Recipe.__init__(self)
456 self._col_name = header.name
458 def call_func(tab):
459 self._table.add_col(header, func(tab))
461 self._register_computed_col(header, call_func)
463 def _add_rows_handler(self, table, nrows_added):
464 Recipe._add_rows_handler(self, table, nrows_added)
465 if self._table.has_col(self._col_name):
466 self._table.remove_col(self._col_name)
469class LocationRecipe(Recipe):
471 def __init__(self):
472 Recipe.__init__(self)
473 self.c5_header = Header(name='c5', sub_headers=[
474 SubHeader(name='ref_lat', unit='degrees'),
475 SubHeader(name='ref_lon', unit='degrees'),
476 SubHeader(name='north_shift', unit='m'),
477 SubHeader(name='east_shift', unit='m'),
478 SubHeader(name='depth', unit='m')])
480 self._register_required_col(self.c5_header)
482 self._latlon_header = Header(name='latlon', sub_headers=[
483 SubHeader(name='lat', unit='degrees'),
484 SubHeader(name='lon', unit='degrees')])
486 self._register_computed_col(self._latlon_header, self._update_latlon)
488 self._xyz_header = Header(name='xyz', sub_headers=[
489 SubHeader(name='x', unit='m'),
490 SubHeader(name='y', unit='m'),
491 SubHeader(name='z', unit='m')])
493 self._register_computed_col(self._xyz_header, self._update_xyz)
495 def set_depth_offset(self, depth_offset):
496 self.depth_offset = depth_offset
498 def _add_rows_handler(self, table, nrows_added):
499 Recipe._add_rows_handler(self, table, nrows_added)
501 for colname in ['latlon', 'xyz']:
502 if self._table.has_col(colname):
503 self._table.remove_col(colname)
505 def _update_latlon(self, table):
506 lats, lons = od.ne_to_latlon(
507 table.get_col('ref_lat'),
508 table.get_col('ref_lon'),
509 table.get_col('north_shift'),
510 table.get_col('east_shift'))
512 latlons = num.zeros((lats.size, 2))
513 latlons[:, 0] = lats
514 latlons[:, 1] = lons
516 self._table.add_col(self._latlon_header, latlons)
518 def _update_xyz(self, table):
519 self._update_latlon(table)
521 xyzs = geometry.latlondepth2xyz(
522 num.concatenate((
523 table.get_col('lat').reshape(-1, 1),
524 table.get_col('lon').reshape(-1, 1),
525 table.get_col('depth').reshape(-1, 1)),
526 axis=1),
527 planetradius=cake.earthradius)
529 self._table.add_col(self._xyz_header, xyzs)
532class EventRecipe(LocationRecipe):
534 def __init__(self):
535 LocationRecipe.__init__(self)
536 self._register_required_col(Header(name='time', unit='s'))
537 self._register_required_col(Header(name='magnitude'))
539 def iter_events(self, table):
540 from pyrocko import model
541 for vec in zip(table.get_col(x) for x in [
542 'time', 'lat', 'lon',
543 'north_shift', 'east_shift', 'depth',
544 'magnitude']):
546 yield model.Event(
547 time=vec[0],
548 lat=vec[1],
549 lon=vec[2],
550 north_shift=vec[3],
551 east_shift=vec[4],
552 depth=vec[5],
553 magnitude=vec[6])
555 def get_events(self, table):
556 return list(self.iter_events(table))
559class MomentTensorRecipe(Recipe):
561 def __init__(self):
562 Recipe.__init__(self)
563 self.m6_header = Header(name='m6', sub_headers=[
564 SubHeader(name='Mnn', unit='Nm'),
565 SubHeader(name='Mee', unit='Nm'),
566 SubHeader(name='Mdd', unit='Nm'),
567 SubHeader(name='Mne', unit='Nm'),
568 SubHeader(name='Mnd', unit='Nm'),
569 SubHeader(name='Med', unit='Nm')])
571 self._register_required_col(self.m6_header)