Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/table.py: 82%

341 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-03-07 11:54 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7A slim table-like data structure for when pandas are too fat. 

8''' 

9 

10import math 

11import numpy as num 

12from pyrocko.guts import Object, String, Unicode, List, Int, SObject, Any 

13from pyrocko.guts_array import Array 

14from pyrocko import geometry, cake 

15from pyrocko import orthodrome as od 

16 

17 

18guts_prefix = 'pf' 

19 

20 

21def nextpow2(i): 

22 return 2**int(math.ceil(math.log(i)/math.log(2.))) 

23 

24 

25def ncols(arr): 

26 return 1 if arr.ndim == 1 else arr.shape[1] 

27 

28 

29def nrows(arr): 

30 return arr.shape[0] 

31 

32 

33def resize_shape(shape, n): 

34 return (n, ) if len(shape) == 1 else (n, shape[1]) 

35 

36 

37class DType(SObject): 

38 ''' 

39 Guts placeholder for :py:class:`numpy.dtype`. 

40 ''' 

41 dummy_for = num.dtype 

42 dummy_for_description = 'numpy.dtype' 

43 

44 

45class SubHeader(Object): 

46 name = String.T() 

47 unit = Unicode.T(optional=True) 

48 default = Any.T(optional=True) 

49 label = Unicode.T(optional=True) 

50 

51 def __init__(self, name, unit=None, default=None, label=None, **kwargs): 

52 Object.__init__( 

53 self, name=name, unit=unit, default=default, label=label, **kwargs) 

54 

55 def get_caption(self): 

56 s = self.label or self.name 

57 if self.unit: 

58 s += ' [%s]' % self.unit 

59 

60 return s 

61 

62 def get_ncols(self): 

63 return 1 

64 

65 

66class Header(SubHeader): 

67 sub_headers = List.T(SubHeader.T()) 

68 dtype = DType.T(default=num.dtype('float64'), optional=True) 

69 

70 def __init__( 

71 self, name, 

72 unit=None, 

73 sub_headers=[], 

74 dtype=None, 

75 default=None, 

76 label=None): 

77 

78 sub_headers = [anything_to_sub_header(sh) for sh in sub_headers] 

79 

80 kwargs = dict(sub_headers=sub_headers, dtype=dtype) 

81 

82 SubHeader.__init__(self, name, unit, default, label, **kwargs) 

83 

84 def get_ncols(self): 

85 return max(1, len(self.sub_headers)) 

86 

87 def default_array(self, nrows): 

88 val = self.dtype(self.default) 

89 if not self.sub_headers: 

90 return num.full((nrows,), val, dtype=self.dtype) 

91 else: 

92 return num.full((nrows, self.get_ncols()), val, dtype=self.dtype) 

93 

94 

95def anything_to_header(args): 

96 if isinstance(args, Header): 

97 return args 

98 elif isinstance(args, str): 

99 return Header(name=args) 

100 elif isinstance(args, tuple): 

101 return Header(*args) 

102 else: 

103 raise ValueError('argument of type Header, str or tuple expected') 

104 

105 

106def anything_to_sub_header(args): 

107 if isinstance(args, SubHeader): 

108 return args 

109 elif isinstance(args, str): 

110 return SubHeader(name=args) 

111 elif isinstance(args, tuple): 

112 return SubHeader(*args) 

113 else: 

114 raise ValueError('argument of type SubHeader, str or tuple expected') 

115 

116 

117class Description(Object): 

118 name = String.T(optional=True) 

119 headers = List.T(Header.T()) 

120 nrows = Int.T() 

121 ncols = Int.T() 

122 

123 def __init__(self, table=None, **kwargs): 

124 if table: 

125 Object.__init__( 

126 self, 

127 name=table._name, 

128 headers=table._headers, 

129 nrows=table.get_nrows(), 

130 ncols=table.get_ncols()) 

131 else: 

132 Object.__init__(self, **kwargs) 

133 

134 

135class NoSuchRecipe(Exception): 

136 pass 

137 

138 

139class NoSuchCol(Exception): 

140 pass 

141 

142 

143class Recipe(Object): 

144 

145 def __init__(self): 

146 self._table = None 

147 self._table = Table() 

148 

149 self._required_headers = [] 

150 self._headers = [] 

151 self._col_update_map = {} 

152 self._name_to_header = {} 

153 

154 def has_col(self, name): 

155 return name in self._name_to_header 

156 

157 def get_col_names(self): 

158 names = [] 

159 for h in self._headers: 

160 names.append(h.name) 

161 for sh in h.sub_headers: 

162 names.append(sh.name) 

163 

164 return names 

165 

166 def get_table(self): 

167 return self._table 

168 

169 def get_header(self, name): 

170 try: 

171 return self._name_to_header[name] 

172 except KeyError: 

173 for h in self._required_headers: 

174 if h.name == name: 

175 return h 

176 

177 raise KeyError(name) 

178 

179 def _add_required_cols(self, table): 

180 for h in self._headers: 

181 if not table.has_col(h.name): 

182 table.add_col(h) 

183 

184 def _update_col(self, table, name): 

185 if not self._table.has_col(name): 

186 self._col_update_map[name](table) 

187 

188 def _add_rows_handler(self, table, nrows_added): 

189 pass 

190 

191 def _register_required_col(self, header): 

192 self._required_headers.append(header) 

193 

194 def _register_computed_col(self, header, updater): 

195 self._headers.append(header) 

196 self._name_to_header[header.name] = header 

197 self._col_update_map[header.name] = updater 

198 for sh in header.sub_headers: 

199 self._col_update_map[sh.name] = updater 

200 self._name_to_header[sh.name] = sh 

201 

202 

203class Table(Object): 

204 

205 description__ = Description.T() 

206 arrays__ = List.T(Array.T(serialize_as='base64+meta')) 

207 recipes__ = List.T(Recipe.T()) 

208 

209 def __init__( 

210 self, 

211 name=None, 

212 nrows_capacity=None, 

213 nrows_capacity_min=0, 

214 description=None, 

215 arrays=None, 

216 recipes=[]): 

217 

218 Object.__init__(self, init_props=False) 

219 

220 self._name = name 

221 self._buffers = [] 

222 self._arrays = [] 

223 self._headers = [] 

224 self._cols = {} 

225 self._recipes = [] 

226 self.nrows_capacity_min = nrows_capacity_min 

227 self._nrows_capacity = 0 

228 if nrows_capacity is not None: 

229 self.set_nrows_capacity(max(nrows_capacity, nrows_capacity_min)) 

230 

231 if description and arrays: 

232 self.T.get_property('arrays').validate( 

233 arrays, regularize=True, depth=0) 

234 self._name = description.name 

235 self.add_cols(description.headers, arrays) 

236 for recipe in recipes: 

237 self.add_recipe(recipe) 

238 

239 @property 

240 def description(self): 

241 return self.get_description() 

242 

243 @property 

244 def arrays(self): 

245 return self._arrays 

246 

247 @property 

248 def recipes(self): 

249 return self._recipes 

250 

251 def add_recipe(self, recipe): 

252 self._recipes.append(recipe) 

253 # recipe._add_required_cols(self) 

254 

255 def get_nrows(self): 

256 if not self._arrays: 

257 return 0 

258 else: 

259 return nrows(self._arrays[0]) 

260 

261 def get_nrows_capacity(self): 

262 return self._nrows_capacity 

263 

264 def set_nrows_capacity(self, nrows_capacity_new): 

265 if self.get_nrows_capacity() != nrows_capacity_new: 

266 if self.get_nrows() > nrows_capacity_new: 

267 raise ValueError('new capacity too small to hold current data') 

268 

269 new_buffers = [] 

270 for buf in self._buffers: 

271 shape = resize_shape(buf.shape, nrows_capacity_new) 

272 new_buffers.append(num.zeros(shape, dtype=buf.dtype)) 

273 

274 ncopy = min(self.get_nrows(), nrows_capacity_new) 

275 

276 new_arrays = [] 

277 for arr, buf in zip(self._arrays, new_buffers): 

278 buf[:ncopy, ...] = arr[:ncopy, ...] 

279 new_arrays.append(buf[:ncopy, ...]) 

280 

281 self._buffers = new_buffers 

282 self._arrays = new_arrays 

283 self._nrows_capacity = nrows_capacity_new 

284 

285 def get_ncols(self): 

286 return len(self._arrays) 

287 

288 def add_col(self, header, array=None): 

289 header = anything_to_header(header) 

290 

291 nrows_current = self.get_nrows() 

292 if array is None: 

293 array = header.default_array(nrows_current) 

294 

295 array = num.asarray(array) 

296 

297 assert header.get_ncols() == ncols(array) 

298 assert array.ndim in (1, 2) 

299 if self._arrays: 

300 assert nrows(array) == nrows_current 

301 

302 if nrows_current == 0: 

303 nrows_current = nrows(array) 

304 self.set_nrows_capacity( 

305 max(nrows_current, self.nrows_capacity_min)) 

306 

307 iarr = len(self._arrays) 

308 

309 shape = resize_shape(array.shape, self.get_nrows_capacity()) 

310 if shape != array.shape: 

311 buf = num.zeros(shape, dtype=array.dtype) 

312 buf[:nrows_current, ...] = array[:, ...] 

313 else: 

314 buf = array 

315 

316 self._buffers.append(buf) 

317 self._arrays.append(buf[:nrows_current, ...]) 

318 self._headers.append(header) 

319 

320 self._cols[header.name] = iarr, None 

321 

322 for icol, sub_header in enumerate(header.sub_headers): 

323 self._cols[sub_header.name] = iarr, icol 

324 

325 def add_cols(self, headers, arrays=None): 

326 if arrays is None: 

327 arrays = [None] * len(headers) 

328 

329 for header, array in zip(headers, arrays): 

330 self.add_col(header, array) 

331 

332 def add_rows(self, arrays): 

333 assert self.get_ncols() == len(arrays) 

334 arrays = [num.asarray(arr) for arr in arrays] 

335 

336 nrows_add = nrows(arrays[0]) 

337 nrows_current = self.get_nrows() 

338 nrows_new = nrows_current + nrows_add 

339 if self.get_nrows_capacity() < nrows_new: 

340 self.set_nrows_capacity(max( 

341 self.nrows_capacity_min, nextpow2(nrows_new))) 

342 

343 new_arrays = [] 

344 for buf, arr in zip(self._buffers, arrays): 

345 assert ncols(arr) == ncols(buf) 

346 assert nrows(arr) == nrows_add 

347 buf[nrows_current:nrows_new, ...] = arr[:, ...] 

348 new_arrays.append(buf[:nrows_new, ...]) 

349 

350 self._arrays = new_arrays 

351 

352 for recipe in self._recipes: 

353 recipe._add_rows_handler(self, nrows_add) 

354 

355 def get_col(self, name, mask=slice(None)): 

356 if name in self._cols: 

357 if isinstance(mask, str): 

358 mask = self.get_col(mask) 

359 

360 iarr, icol = self._cols[name] 

361 if icol is None: 

362 return self._arrays[iarr][mask] 

363 else: 

364 return self._arrays[iarr][mask, icol] 

365 else: 

366 recipe = self.get_recipe_for_col(name) 

367 recipe._update_col(self, name) 

368 

369 return recipe.get_table().get_col(name, mask) 

370 

371 def get_header(self, name): 

372 if name in self._cols: 

373 iarr, icol = self._cols[name] 

374 if icol is None: 

375 return self._headers[iarr] 

376 else: 

377 return self._headers[iarr].sub_headers[icol] 

378 else: 

379 recipe = self.get_recipe_for_col(name) 

380 return recipe.get_header(name) 

381 

382 def has_col(self, name): 

383 return name in self._cols or \ 

384 any(rec.has_col(name) for rec in self._recipes) 

385 

386 def get_col_names(self, sub_headers=True): 

387 names = [] 

388 for h in self._headers: 

389 names.append(h.name) 

390 if sub_headers: 

391 for sh in h.sub_headers: 

392 names.append(sh.name) 

393 

394 for recipe in self._recipes: 

395 names.extend(recipe.get_col_names()) 

396 

397 return names 

398 

399 def get_recipe_for_col(self, name): 

400 for recipe in self._recipes: 

401 if recipe.has_col(name): 

402 return recipe 

403 

404 raise NoSuchCol(name) 

405 

406 def get_description(self): 

407 return Description(self) 

408 

409 def get_as_text(self): 

410 scols = [] 

411 formats = { 

412 num.dtype('float64'): '%e'} 

413 

414 for name in self.get_col_names(sub_headers=False): 

415 array = self.get_col(name) 

416 header = self.get_header(name) 

417 fmt = formats.get(array.dtype, '%s') 

418 if array.ndim == 1: 

419 scol = [header.get_caption(), ''] 

420 for val in array: 

421 scol.append(fmt % val) 

422 

423 scols.append(scol) 

424 else: 

425 for icol in range(ncols(array)): 

426 sub_header = header.sub_headers[icol] 

427 scol = [header.get_caption(), sub_header.get_caption()] 

428 for val in array[:, icol]: 

429 scol.append(fmt % val) 

430 

431 scols.append(scol) 

432 

433 for scol in scols: 

434 width = max(len(s) for s in scol) 

435 for i in range(len(scol)): 

436 scol[i] = scol[i].rjust(width) 

437 

438 return '\n'.join(' '.join(s for s in srow) for srow in zip(*scols)) 

439 

440 def add_computed_col(self, header, func): 

441 header = anything_to_header(header) 

442 self.add_recipe(SimpleRecipe(header, func)) 

443 

444 def get_row(self, irow): 

445 if irow == -1: 

446 return [arr[-1:] for arr in self.arrays] 

447 else: 

448 return [arr[irow:irow+1] for arr in self.arrays] 

449 

450 

451class SimpleRecipe(Recipe): 

452 

453 def __init__(self, header, func): 

454 Recipe.__init__(self) 

455 self._col_name = header.name 

456 

457 def call_func(tab): 

458 self._table.add_col(header, func(tab)) 

459 

460 self._register_computed_col(header, call_func) 

461 

462 def _add_rows_handler(self, table, nrows_added): 

463 Recipe._add_rows_handler(self, table, nrows_added) 

464 if self._table.has_col(self._col_name): 

465 self._table.remove_col(self._col_name) 

466 

467 

468class LocationRecipe(Recipe): 

469 

470 def __init__(self): 

471 Recipe.__init__(self) 

472 self.c5_header = Header(name='c5', sub_headers=[ 

473 SubHeader(name='ref_lat', unit='degrees'), 

474 SubHeader(name='ref_lon', unit='degrees'), 

475 SubHeader(name='north_shift', unit='m'), 

476 SubHeader(name='east_shift', unit='m'), 

477 SubHeader(name='depth', unit='m')]) 

478 

479 self._register_required_col(self.c5_header) 

480 

481 self._latlon_header = Header(name='latlon', sub_headers=[ 

482 SubHeader(name='lat', unit='degrees'), 

483 SubHeader(name='lon', unit='degrees')]) 

484 

485 self._register_computed_col(self._latlon_header, self._update_latlon) 

486 

487 self._xyz_header = Header(name='xyz', sub_headers=[ 

488 SubHeader(name='x', unit='m'), 

489 SubHeader(name='y', unit='m'), 

490 SubHeader(name='z', unit='m')]) 

491 

492 self._register_computed_col(self._xyz_header, self._update_xyz) 

493 

494 def set_depth_offset(self, depth_offset): 

495 self.depth_offset = depth_offset 

496 

497 def _add_rows_handler(self, table, nrows_added): 

498 Recipe._add_rows_handler(self, table, nrows_added) 

499 

500 for colname in ['latlon', 'xyz']: 

501 if self._table.has_col(colname): 

502 self._table.remove_col(colname) 

503 

504 def _update_latlon(self, table): 

505 lats, lons = od.ne_to_latlon( 

506 table.get_col('ref_lat'), 

507 table.get_col('ref_lon'), 

508 table.get_col('north_shift'), 

509 table.get_col('east_shift')) 

510 

511 latlons = num.zeros((lats.size, 2)) 

512 latlons[:, 0] = lats 

513 latlons[:, 1] = lons 

514 

515 self._table.add_col(self._latlon_header, latlons) 

516 

517 def _update_xyz(self, table): 

518 self._update_latlon(table) 

519 

520 xyzs = geometry.latlondepth2xyz( 

521 num.concatenate(( 

522 table.get_col('lat').reshape(-1, 1), 

523 table.get_col('lon').reshape(-1, 1), 

524 table.get_col('depth').reshape(-1, 1)), 

525 axis=1), 

526 planetradius=cake.earthradius) 

527 

528 self._table.add_col(self._xyz_header, xyzs) 

529 

530 

531class EventRecipe(LocationRecipe): 

532 

533 def __init__(self): 

534 LocationRecipe.__init__(self) 

535 self._register_required_col(Header(name='time', unit='s')) 

536 self._register_required_col(Header(name='magnitude')) 

537 

538 def iter_events(self, table): 

539 from pyrocko import model 

540 for vec in zip(table.get_col(x) for x in [ 

541 'time', 'lat', 'lon', 

542 'north_shift', 'east_shift', 'depth', 

543 'magnitude']): 

544 

545 yield model.Event( 

546 time=vec[0], 

547 lat=vec[1], 

548 lon=vec[2], 

549 north_shift=vec[3], 

550 east_shift=vec[4], 

551 depth=vec[5], 

552 magnitude=vec[6]) 

553 

554 def get_events(self, table): 

555 return list(self.iter_events(table)) 

556 

557 

558class MomentTensorRecipe(Recipe): 

559 

560 def __init__(self): 

561 Recipe.__init__(self) 

562 self.m6_header = Header(name='m6', sub_headers=[ 

563 SubHeader(name='Mnn', unit='Nm'), 

564 SubHeader(name='Mee', unit='Nm'), 

565 SubHeader(name='Mdd', unit='Nm'), 

566 SubHeader(name='Mne', unit='Nm'), 

567 SubHeader(name='Mnd', unit='Nm'), 

568 SubHeader(name='Med', unit='Nm')]) 

569 

570 self._register_required_col(self.m6_header)