Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/table.py: 82%
341 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7A slim table-like data structure for when pandas are too fat.
8'''
10import math
11import numpy as num
12from pyrocko.guts import Object, String, Unicode, List, Int, SObject, Any
13from pyrocko.guts_array import Array
14from pyrocko import geometry, cake
15from pyrocko import orthodrome as od
18guts_prefix = 'pf'
21def nextpow2(i):
22 return 2**int(math.ceil(math.log(i)/math.log(2.)))
25def ncols(arr):
26 return 1 if arr.ndim == 1 else arr.shape[1]
29def nrows(arr):
30 return arr.shape[0]
33def resize_shape(shape, n):
34 return (n, ) if len(shape) == 1 else (n, shape[1])
37class DType(SObject):
38 '''
39 Guts placeholder for :py:class:`numpy.dtype`.
40 '''
41 dummy_for = num.dtype
42 dummy_for_description = 'numpy.dtype'
45class SubHeader(Object):
46 name = String.T()
47 unit = Unicode.T(optional=True)
48 default = Any.T(optional=True)
49 label = Unicode.T(optional=True)
51 def __init__(self, name, unit=None, default=None, label=None, **kwargs):
52 Object.__init__(
53 self, name=name, unit=unit, default=default, label=label, **kwargs)
55 def get_caption(self):
56 s = self.label or self.name
57 if self.unit:
58 s += ' [%s]' % self.unit
60 return s
62 def get_ncols(self):
63 return 1
66class Header(SubHeader):
67 sub_headers = List.T(SubHeader.T())
68 dtype = DType.T(default=num.dtype('float64'), optional=True)
70 def __init__(
71 self, name,
72 unit=None,
73 sub_headers=[],
74 dtype=None,
75 default=None,
76 label=None):
78 sub_headers = [anything_to_sub_header(sh) for sh in sub_headers]
80 kwargs = dict(sub_headers=sub_headers, dtype=dtype)
82 SubHeader.__init__(self, name, unit, default, label, **kwargs)
84 def get_ncols(self):
85 return max(1, len(self.sub_headers))
87 def default_array(self, nrows):
88 val = self.dtype(self.default)
89 if not self.sub_headers:
90 return num.full((nrows,), val, dtype=self.dtype)
91 else:
92 return num.full((nrows, self.get_ncols()), val, dtype=self.dtype)
95def anything_to_header(args):
96 if isinstance(args, Header):
97 return args
98 elif isinstance(args, str):
99 return Header(name=args)
100 elif isinstance(args, tuple):
101 return Header(*args)
102 else:
103 raise ValueError('argument of type Header, str or tuple expected')
106def anything_to_sub_header(args):
107 if isinstance(args, SubHeader):
108 return args
109 elif isinstance(args, str):
110 return SubHeader(name=args)
111 elif isinstance(args, tuple):
112 return SubHeader(*args)
113 else:
114 raise ValueError('argument of type SubHeader, str or tuple expected')
117class Description(Object):
118 name = String.T(optional=True)
119 headers = List.T(Header.T())
120 nrows = Int.T()
121 ncols = Int.T()
123 def __init__(self, table=None, **kwargs):
124 if table:
125 Object.__init__(
126 self,
127 name=table._name,
128 headers=table._headers,
129 nrows=table.get_nrows(),
130 ncols=table.get_ncols())
131 else:
132 Object.__init__(self, **kwargs)
135class NoSuchRecipe(Exception):
136 pass
139class NoSuchCol(Exception):
140 pass
143class Recipe(Object):
145 def __init__(self):
146 self._table = None
147 self._table = Table()
149 self._required_headers = []
150 self._headers = []
151 self._col_update_map = {}
152 self._name_to_header = {}
154 def has_col(self, name):
155 return name in self._name_to_header
157 def get_col_names(self):
158 names = []
159 for h in self._headers:
160 names.append(h.name)
161 for sh in h.sub_headers:
162 names.append(sh.name)
164 return names
166 def get_table(self):
167 return self._table
169 def get_header(self, name):
170 try:
171 return self._name_to_header[name]
172 except KeyError:
173 for h in self._required_headers:
174 if h.name == name:
175 return h
177 raise KeyError(name)
179 def _add_required_cols(self, table):
180 for h in self._headers:
181 if not table.has_col(h.name):
182 table.add_col(h)
184 def _update_col(self, table, name):
185 if not self._table.has_col(name):
186 self._col_update_map[name](table)
188 def _add_rows_handler(self, table, nrows_added):
189 pass
191 def _register_required_col(self, header):
192 self._required_headers.append(header)
194 def _register_computed_col(self, header, updater):
195 self._headers.append(header)
196 self._name_to_header[header.name] = header
197 self._col_update_map[header.name] = updater
198 for sh in header.sub_headers:
199 self._col_update_map[sh.name] = updater
200 self._name_to_header[sh.name] = sh
203class Table(Object):
205 description__ = Description.T()
206 arrays__ = List.T(Array.T(serialize_as='base64+meta'))
207 recipes__ = List.T(Recipe.T())
209 def __init__(
210 self,
211 name=None,
212 nrows_capacity=None,
213 nrows_capacity_min=0,
214 description=None,
215 arrays=None,
216 recipes=[]):
218 Object.__init__(self, init_props=False)
220 self._name = name
221 self._buffers = []
222 self._arrays = []
223 self._headers = []
224 self._cols = {}
225 self._recipes = []
226 self.nrows_capacity_min = nrows_capacity_min
227 self._nrows_capacity = 0
228 if nrows_capacity is not None:
229 self.set_nrows_capacity(max(nrows_capacity, nrows_capacity_min))
231 if description and arrays:
232 self.T.get_property('arrays').validate(
233 arrays, regularize=True, depth=0)
234 self._name = description.name
235 self.add_cols(description.headers, arrays)
236 for recipe in recipes:
237 self.add_recipe(recipe)
239 @property
240 def description(self):
241 return self.get_description()
243 @property
244 def arrays(self):
245 return self._arrays
247 @property
248 def recipes(self):
249 return self._recipes
251 def add_recipe(self, recipe):
252 self._recipes.append(recipe)
253 # recipe._add_required_cols(self)
255 def get_nrows(self):
256 if not self._arrays:
257 return 0
258 else:
259 return nrows(self._arrays[0])
261 def get_nrows_capacity(self):
262 return self._nrows_capacity
264 def set_nrows_capacity(self, nrows_capacity_new):
265 if self.get_nrows_capacity() != nrows_capacity_new:
266 if self.get_nrows() > nrows_capacity_new:
267 raise ValueError('new capacity too small to hold current data')
269 new_buffers = []
270 for buf in self._buffers:
271 shape = resize_shape(buf.shape, nrows_capacity_new)
272 new_buffers.append(num.zeros(shape, dtype=buf.dtype))
274 ncopy = min(self.get_nrows(), nrows_capacity_new)
276 new_arrays = []
277 for arr, buf in zip(self._arrays, new_buffers):
278 buf[:ncopy, ...] = arr[:ncopy, ...]
279 new_arrays.append(buf[:ncopy, ...])
281 self._buffers = new_buffers
282 self._arrays = new_arrays
283 self._nrows_capacity = nrows_capacity_new
285 def get_ncols(self):
286 return len(self._arrays)
288 def add_col(self, header, array=None):
289 header = anything_to_header(header)
291 nrows_current = self.get_nrows()
292 if array is None:
293 array = header.default_array(nrows_current)
295 array = num.asarray(array)
297 assert header.get_ncols() == ncols(array)
298 assert array.ndim in (1, 2)
299 if self._arrays:
300 assert nrows(array) == nrows_current
302 if nrows_current == 0:
303 nrows_current = nrows(array)
304 self.set_nrows_capacity(
305 max(nrows_current, self.nrows_capacity_min))
307 iarr = len(self._arrays)
309 shape = resize_shape(array.shape, self.get_nrows_capacity())
310 if shape != array.shape:
311 buf = num.zeros(shape, dtype=array.dtype)
312 buf[:nrows_current, ...] = array[:, ...]
313 else:
314 buf = array
316 self._buffers.append(buf)
317 self._arrays.append(buf[:nrows_current, ...])
318 self._headers.append(header)
320 self._cols[header.name] = iarr, None
322 for icol, sub_header in enumerate(header.sub_headers):
323 self._cols[sub_header.name] = iarr, icol
325 def add_cols(self, headers, arrays=None):
326 if arrays is None:
327 arrays = [None] * len(headers)
329 for header, array in zip(headers, arrays):
330 self.add_col(header, array)
332 def add_rows(self, arrays):
333 assert self.get_ncols() == len(arrays)
334 arrays = [num.asarray(arr) for arr in arrays]
336 nrows_add = nrows(arrays[0])
337 nrows_current = self.get_nrows()
338 nrows_new = nrows_current + nrows_add
339 if self.get_nrows_capacity() < nrows_new:
340 self.set_nrows_capacity(max(
341 self.nrows_capacity_min, nextpow2(nrows_new)))
343 new_arrays = []
344 for buf, arr in zip(self._buffers, arrays):
345 assert ncols(arr) == ncols(buf)
346 assert nrows(arr) == nrows_add
347 buf[nrows_current:nrows_new, ...] = arr[:, ...]
348 new_arrays.append(buf[:nrows_new, ...])
350 self._arrays = new_arrays
352 for recipe in self._recipes:
353 recipe._add_rows_handler(self, nrows_add)
355 def get_col(self, name, mask=slice(None)):
356 if name in self._cols:
357 if isinstance(mask, str):
358 mask = self.get_col(mask)
360 iarr, icol = self._cols[name]
361 if icol is None:
362 return self._arrays[iarr][mask]
363 else:
364 return self._arrays[iarr][mask, icol]
365 else:
366 recipe = self.get_recipe_for_col(name)
367 recipe._update_col(self, name)
369 return recipe.get_table().get_col(name, mask)
371 def get_header(self, name):
372 if name in self._cols:
373 iarr, icol = self._cols[name]
374 if icol is None:
375 return self._headers[iarr]
376 else:
377 return self._headers[iarr].sub_headers[icol]
378 else:
379 recipe = self.get_recipe_for_col(name)
380 return recipe.get_header(name)
382 def has_col(self, name):
383 return name in self._cols or \
384 any(rec.has_col(name) for rec in self._recipes)
386 def get_col_names(self, sub_headers=True):
387 names = []
388 for h in self._headers:
389 names.append(h.name)
390 if sub_headers:
391 for sh in h.sub_headers:
392 names.append(sh.name)
394 for recipe in self._recipes:
395 names.extend(recipe.get_col_names())
397 return names
399 def get_recipe_for_col(self, name):
400 for recipe in self._recipes:
401 if recipe.has_col(name):
402 return recipe
404 raise NoSuchCol(name)
406 def get_description(self):
407 return Description(self)
409 def get_as_text(self):
410 scols = []
411 formats = {
412 num.dtype('float64'): '%e'}
414 for name in self.get_col_names(sub_headers=False):
415 array = self.get_col(name)
416 header = self.get_header(name)
417 fmt = formats.get(array.dtype, '%s')
418 if array.ndim == 1:
419 scol = [header.get_caption(), '']
420 for val in array:
421 scol.append(fmt % val)
423 scols.append(scol)
424 else:
425 for icol in range(ncols(array)):
426 sub_header = header.sub_headers[icol]
427 scol = [header.get_caption(), sub_header.get_caption()]
428 for val in array[:, icol]:
429 scol.append(fmt % val)
431 scols.append(scol)
433 for scol in scols:
434 width = max(len(s) for s in scol)
435 for i in range(len(scol)):
436 scol[i] = scol[i].rjust(width)
438 return '\n'.join(' '.join(s for s in srow) for srow in zip(*scols))
440 def add_computed_col(self, header, func):
441 header = anything_to_header(header)
442 self.add_recipe(SimpleRecipe(header, func))
444 def get_row(self, irow):
445 if irow == -1:
446 return [arr[-1:] for arr in self.arrays]
447 else:
448 return [arr[irow:irow+1] for arr in self.arrays]
451class SimpleRecipe(Recipe):
453 def __init__(self, header, func):
454 Recipe.__init__(self)
455 self._col_name = header.name
457 def call_func(tab):
458 self._table.add_col(header, func(tab))
460 self._register_computed_col(header, call_func)
462 def _add_rows_handler(self, table, nrows_added):
463 Recipe._add_rows_handler(self, table, nrows_added)
464 if self._table.has_col(self._col_name):
465 self._table.remove_col(self._col_name)
468class LocationRecipe(Recipe):
470 def __init__(self):
471 Recipe.__init__(self)
472 self.c5_header = Header(name='c5', sub_headers=[
473 SubHeader(name='ref_lat', unit='degrees'),
474 SubHeader(name='ref_lon', unit='degrees'),
475 SubHeader(name='north_shift', unit='m'),
476 SubHeader(name='east_shift', unit='m'),
477 SubHeader(name='depth', unit='m')])
479 self._register_required_col(self.c5_header)
481 self._latlon_header = Header(name='latlon', sub_headers=[
482 SubHeader(name='lat', unit='degrees'),
483 SubHeader(name='lon', unit='degrees')])
485 self._register_computed_col(self._latlon_header, self._update_latlon)
487 self._xyz_header = Header(name='xyz', sub_headers=[
488 SubHeader(name='x', unit='m'),
489 SubHeader(name='y', unit='m'),
490 SubHeader(name='z', unit='m')])
492 self._register_computed_col(self._xyz_header, self._update_xyz)
494 def set_depth_offset(self, depth_offset):
495 self.depth_offset = depth_offset
497 def _add_rows_handler(self, table, nrows_added):
498 Recipe._add_rows_handler(self, table, nrows_added)
500 for colname in ['latlon', 'xyz']:
501 if self._table.has_col(colname):
502 self._table.remove_col(colname)
504 def _update_latlon(self, table):
505 lats, lons = od.ne_to_latlon(
506 table.get_col('ref_lat'),
507 table.get_col('ref_lon'),
508 table.get_col('north_shift'),
509 table.get_col('east_shift'))
511 latlons = num.zeros((lats.size, 2))
512 latlons[:, 0] = lats
513 latlons[:, 1] = lons
515 self._table.add_col(self._latlon_header, latlons)
517 def _update_xyz(self, table):
518 self._update_latlon(table)
520 xyzs = geometry.latlondepth2xyz(
521 num.concatenate((
522 table.get_col('lat').reshape(-1, 1),
523 table.get_col('lon').reshape(-1, 1),
524 table.get_col('depth').reshape(-1, 1)),
525 axis=1),
526 planetradius=cake.earthradius)
528 self._table.add_col(self._xyz_header, xyzs)
531class EventRecipe(LocationRecipe):
533 def __init__(self):
534 LocationRecipe.__init__(self)
535 self._register_required_col(Header(name='time', unit='s'))
536 self._register_required_col(Header(name='magnitude'))
538 def iter_events(self, table):
539 from pyrocko import model
540 for vec in zip(table.get_col(x) for x in [
541 'time', 'lat', 'lon',
542 'north_shift', 'east_shift', 'depth',
543 'magnitude']):
545 yield model.Event(
546 time=vec[0],
547 lat=vec[1],
548 lon=vec[2],
549 north_shift=vec[3],
550 east_shift=vec[4],
551 depth=vec[5],
552 magnitude=vec[6])
554 def get_events(self, table):
555 return list(self.iter_events(table))
558class MomentTensorRecipe(Recipe):
560 def __init__(self):
561 Recipe.__init__(self)
562 self.m6_header = Header(name='m6', sub_headers=[
563 SubHeader(name='Mnn', unit='Nm'),
564 SubHeader(name='Mee', unit='Nm'),
565 SubHeader(name='Mdd', unit='Nm'),
566 SubHeader(name='Mne', unit='Nm'),
567 SubHeader(name='Mnd', unit='Nm'),
568 SubHeader(name='Med', unit='Nm')])
570 self._register_required_col(self.m6_header)