Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/table.py: 82%

342 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-10 10:12 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7A slim table-like data structure for when pandas are too fat. 

8''' 

9 

10import math 

11import numpy as num 

12from pyrocko.guts import Object, String, Unicode, List, Int, SObject, Any 

13from pyrocko.guts_array import Array 

14from pyrocko import geometry, cake 

15from pyrocko import orthodrome as od 

16from pyrocko.util import num_full 

17 

18 

19guts_prefix = 'pf' 

20 

21 

22def nextpow2(i): 

23 return 2**int(math.ceil(math.log(i)/math.log(2.))) 

24 

25 

26def ncols(arr): 

27 return 1 if arr.ndim == 1 else arr.shape[1] 

28 

29 

30def nrows(arr): 

31 return arr.shape[0] 

32 

33 

34def resize_shape(shape, n): 

35 return (n, ) if len(shape) == 1 else (n, shape[1]) 

36 

37 

38class DType(SObject): 

39 ''' 

40 Guts placeholder for :py:class:`numpy.dtype`. 

41 ''' 

42 dummy_for = num.dtype 

43 dummy_for_description = 'numpy.dtype' 

44 

45 

46class SubHeader(Object): 

47 name = String.T() 

48 unit = Unicode.T(optional=True) 

49 default = Any.T(optional=True) 

50 label = Unicode.T(optional=True) 

51 

52 def __init__(self, name, unit=None, default=None, label=None, **kwargs): 

53 Object.__init__( 

54 self, name=name, unit=unit, default=default, label=label, **kwargs) 

55 

56 def get_caption(self): 

57 s = self.label or self.name 

58 if self.unit: 

59 s += ' [%s]' % self.unit 

60 

61 return s 

62 

63 def get_ncols(self): 

64 return 1 

65 

66 

67class Header(SubHeader): 

68 sub_headers = List.T(SubHeader.T()) 

69 dtype = DType.T(default=num.dtype('float64'), optional=True) 

70 

71 def __init__( 

72 self, name, 

73 unit=None, 

74 sub_headers=[], 

75 dtype=None, 

76 default=None, 

77 label=None): 

78 

79 sub_headers = [anything_to_sub_header(sh) for sh in sub_headers] 

80 

81 kwargs = dict(sub_headers=sub_headers, dtype=dtype) 

82 

83 SubHeader.__init__(self, name, unit, default, label, **kwargs) 

84 

85 def get_ncols(self): 

86 return max(1, len(self.sub_headers)) 

87 

88 def default_array(self, nrows): 

89 val = self.dtype(self.default) 

90 if not self.sub_headers: 

91 return num_full((nrows,), val, dtype=self.dtype) 

92 else: 

93 return num_full((nrows, self.get_ncols()), val, dtype=self.dtype) 

94 

95 

96def anything_to_header(args): 

97 if isinstance(args, Header): 

98 return args 

99 elif isinstance(args, str): 

100 return Header(name=args) 

101 elif isinstance(args, tuple): 

102 return Header(*args) 

103 else: 

104 raise ValueError('argument of type Header, str or tuple expected') 

105 

106 

107def anything_to_sub_header(args): 

108 if isinstance(args, SubHeader): 

109 return args 

110 elif isinstance(args, str): 

111 return SubHeader(name=args) 

112 elif isinstance(args, tuple): 

113 return SubHeader(*args) 

114 else: 

115 raise ValueError('argument of type SubHeader, str or tuple expected') 

116 

117 

118class Description(Object): 

119 name = String.T(optional=True) 

120 headers = List.T(Header.T()) 

121 nrows = Int.T() 

122 ncols = Int.T() 

123 

124 def __init__(self, table=None, **kwargs): 

125 if table: 

126 Object.__init__( 

127 self, 

128 name=table._name, 

129 headers=table._headers, 

130 nrows=table.get_nrows(), 

131 ncols=table.get_ncols()) 

132 else: 

133 Object.__init__(self, **kwargs) 

134 

135 

136class NoSuchRecipe(Exception): 

137 pass 

138 

139 

140class NoSuchCol(Exception): 

141 pass 

142 

143 

144class Recipe(Object): 

145 

146 def __init__(self): 

147 self._table = None 

148 self._table = Table() 

149 

150 self._required_headers = [] 

151 self._headers = [] 

152 self._col_update_map = {} 

153 self._name_to_header = {} 

154 

155 def has_col(self, name): 

156 return name in self._name_to_header 

157 

158 def get_col_names(self): 

159 names = [] 

160 for h in self._headers: 

161 names.append(h.name) 

162 for sh in h.sub_headers: 

163 names.append(sh.name) 

164 

165 return names 

166 

167 def get_table(self): 

168 return self._table 

169 

170 def get_header(self, name): 

171 try: 

172 return self._name_to_header[name] 

173 except KeyError: 

174 for h in self._required_headers: 

175 if h.name == name: 

176 return h 

177 

178 raise KeyError(name) 

179 

180 def _add_required_cols(self, table): 

181 for h in self._headers: 

182 if not table.has_col(h.name): 

183 table.add_col(h) 

184 

185 def _update_col(self, table, name): 

186 if not self._table.has_col(name): 

187 self._col_update_map[name](table) 

188 

189 def _add_rows_handler(self, table, nrows_added): 

190 pass 

191 

192 def _register_required_col(self, header): 

193 self._required_headers.append(header) 

194 

195 def _register_computed_col(self, header, updater): 

196 self._headers.append(header) 

197 self._name_to_header[header.name] = header 

198 self._col_update_map[header.name] = updater 

199 for sh in header.sub_headers: 

200 self._col_update_map[sh.name] = updater 

201 self._name_to_header[sh.name] = sh 

202 

203 

204class Table(Object): 

205 

206 description__ = Description.T() 

207 arrays__ = List.T(Array.T(serialize_as='base64+meta')) 

208 recipes__ = List.T(Recipe.T()) 

209 

210 def __init__( 

211 self, 

212 name=None, 

213 nrows_capacity=None, 

214 nrows_capacity_min=0, 

215 description=None, 

216 arrays=None, 

217 recipes=[]): 

218 

219 Object.__init__(self, init_props=False) 

220 

221 self._name = name 

222 self._buffers = [] 

223 self._arrays = [] 

224 self._headers = [] 

225 self._cols = {} 

226 self._recipes = [] 

227 self.nrows_capacity_min = nrows_capacity_min 

228 self._nrows_capacity = 0 

229 if nrows_capacity is not None: 

230 self.set_nrows_capacity(max(nrows_capacity, nrows_capacity_min)) 

231 

232 if description and arrays: 

233 self.T.get_property('arrays').validate( 

234 arrays, regularize=True, depth=0) 

235 self._name = description.name 

236 self.add_cols(description.headers, arrays) 

237 for recipe in recipes: 

238 self.add_recipe(recipe) 

239 

240 @property 

241 def description(self): 

242 return self.get_description() 

243 

244 @property 

245 def arrays(self): 

246 return self._arrays 

247 

248 @property 

249 def recipes(self): 

250 return self._recipes 

251 

252 def add_recipe(self, recipe): 

253 self._recipes.append(recipe) 

254 # recipe._add_required_cols(self) 

255 

256 def get_nrows(self): 

257 if not self._arrays: 

258 return 0 

259 else: 

260 return nrows(self._arrays[0]) 

261 

262 def get_nrows_capacity(self): 

263 return self._nrows_capacity 

264 

265 def set_nrows_capacity(self, nrows_capacity_new): 

266 if self.get_nrows_capacity() != nrows_capacity_new: 

267 if self.get_nrows() > nrows_capacity_new: 

268 raise ValueError('new capacity too small to hold current data') 

269 

270 new_buffers = [] 

271 for buf in self._buffers: 

272 shape = resize_shape(buf.shape, nrows_capacity_new) 

273 new_buffers.append(num.zeros(shape, dtype=buf.dtype)) 

274 

275 ncopy = min(self.get_nrows(), nrows_capacity_new) 

276 

277 new_arrays = [] 

278 for arr, buf in zip(self._arrays, new_buffers): 

279 buf[:ncopy, ...] = arr[:ncopy, ...] 

280 new_arrays.append(buf[:ncopy, ...]) 

281 

282 self._buffers = new_buffers 

283 self._arrays = new_arrays 

284 self._nrows_capacity = nrows_capacity_new 

285 

286 def get_ncols(self): 

287 return len(self._arrays) 

288 

289 def add_col(self, header, array=None): 

290 header = anything_to_header(header) 

291 

292 nrows_current = self.get_nrows() 

293 if array is None: 

294 array = header.default_array(nrows_current) 

295 

296 array = num.asarray(array) 

297 

298 assert header.get_ncols() == ncols(array) 

299 assert array.ndim in (1, 2) 

300 if self._arrays: 

301 assert nrows(array) == nrows_current 

302 

303 if nrows_current == 0: 

304 nrows_current = nrows(array) 

305 self.set_nrows_capacity( 

306 max(nrows_current, self.nrows_capacity_min)) 

307 

308 iarr = len(self._arrays) 

309 

310 shape = resize_shape(array.shape, self.get_nrows_capacity()) 

311 if shape != array.shape: 

312 buf = num.zeros(shape, dtype=array.dtype) 

313 buf[:nrows_current, ...] = array[:, ...] 

314 else: 

315 buf = array 

316 

317 self._buffers.append(buf) 

318 self._arrays.append(buf[:nrows_current, ...]) 

319 self._headers.append(header) 

320 

321 self._cols[header.name] = iarr, None 

322 

323 for icol, sub_header in enumerate(header.sub_headers): 

324 self._cols[sub_header.name] = iarr, icol 

325 

326 def add_cols(self, headers, arrays=None): 

327 if arrays is None: 

328 arrays = [None] * len(headers) 

329 

330 for header, array in zip(headers, arrays): 

331 self.add_col(header, array) 

332 

333 def add_rows(self, arrays): 

334 assert self.get_ncols() == len(arrays) 

335 arrays = [num.asarray(arr) for arr in arrays] 

336 

337 nrows_add = nrows(arrays[0]) 

338 nrows_current = self.get_nrows() 

339 nrows_new = nrows_current + nrows_add 

340 if self.get_nrows_capacity() < nrows_new: 

341 self.set_nrows_capacity(max( 

342 self.nrows_capacity_min, nextpow2(nrows_new))) 

343 

344 new_arrays = [] 

345 for buf, arr in zip(self._buffers, arrays): 

346 assert ncols(arr) == ncols(buf) 

347 assert nrows(arr) == nrows_add 

348 buf[nrows_current:nrows_new, ...] = arr[:, ...] 

349 new_arrays.append(buf[:nrows_new, ...]) 

350 

351 self._arrays = new_arrays 

352 

353 for recipe in self._recipes: 

354 recipe._add_rows_handler(self, nrows_add) 

355 

356 def get_col(self, name, mask=slice(None)): 

357 if name in self._cols: 

358 if isinstance(mask, str): 

359 mask = self.get_col(mask) 

360 

361 iarr, icol = self._cols[name] 

362 if icol is None: 

363 return self._arrays[iarr][mask] 

364 else: 

365 return self._arrays[iarr][mask, icol] 

366 else: 

367 recipe = self.get_recipe_for_col(name) 

368 recipe._update_col(self, name) 

369 

370 return recipe.get_table().get_col(name, mask) 

371 

372 def get_header(self, name): 

373 if name in self._cols: 

374 iarr, icol = self._cols[name] 

375 if icol is None: 

376 return self._headers[iarr] 

377 else: 

378 return self._headers[iarr].sub_headers[icol] 

379 else: 

380 recipe = self.get_recipe_for_col(name) 

381 return recipe.get_header(name) 

382 

383 def has_col(self, name): 

384 return name in self._cols or \ 

385 any(rec.has_col(name) for rec in self._recipes) 

386 

387 def get_col_names(self, sub_headers=True): 

388 names = [] 

389 for h in self._headers: 

390 names.append(h.name) 

391 if sub_headers: 

392 for sh in h.sub_headers: 

393 names.append(sh.name) 

394 

395 for recipe in self._recipes: 

396 names.extend(recipe.get_col_names()) 

397 

398 return names 

399 

400 def get_recipe_for_col(self, name): 

401 for recipe in self._recipes: 

402 if recipe.has_col(name): 

403 return recipe 

404 

405 raise NoSuchCol(name) 

406 

407 def get_description(self): 

408 return Description(self) 

409 

410 def get_as_text(self): 

411 scols = [] 

412 formats = { 

413 num.dtype('float64'): '%e'} 

414 

415 for name in self.get_col_names(sub_headers=False): 

416 array = self.get_col(name) 

417 header = self.get_header(name) 

418 fmt = formats.get(array.dtype, '%s') 

419 if array.ndim == 1: 

420 scol = [header.get_caption(), ''] 

421 for val in array: 

422 scol.append(fmt % val) 

423 

424 scols.append(scol) 

425 else: 

426 for icol in range(ncols(array)): 

427 sub_header = header.sub_headers[icol] 

428 scol = [header.get_caption(), sub_header.get_caption()] 

429 for val in array[:, icol]: 

430 scol.append(fmt % val) 

431 

432 scols.append(scol) 

433 

434 for scol in scols: 

435 width = max(len(s) for s in scol) 

436 for i in range(len(scol)): 

437 scol[i] = scol[i].rjust(width) 

438 

439 return '\n'.join(' '.join(s for s in srow) for srow in zip(*scols)) 

440 

441 def add_computed_col(self, header, func): 

442 header = anything_to_header(header) 

443 self.add_recipe(SimpleRecipe(header, func)) 

444 

445 def get_row(self, irow): 

446 if irow == -1: 

447 return [arr[-1:] for arr in self.arrays] 

448 else: 

449 return [arr[irow:irow+1] for arr in self.arrays] 

450 

451 

452class SimpleRecipe(Recipe): 

453 

454 def __init__(self, header, func): 

455 Recipe.__init__(self) 

456 self._col_name = header.name 

457 

458 def call_func(tab): 

459 self._table.add_col(header, func(tab)) 

460 

461 self._register_computed_col(header, call_func) 

462 

463 def _add_rows_handler(self, table, nrows_added): 

464 Recipe._add_rows_handler(self, table, nrows_added) 

465 if self._table.has_col(self._col_name): 

466 self._table.remove_col(self._col_name) 

467 

468 

469class LocationRecipe(Recipe): 

470 

471 def __init__(self): 

472 Recipe.__init__(self) 

473 self.c5_header = Header(name='c5', sub_headers=[ 

474 SubHeader(name='ref_lat', unit='degrees'), 

475 SubHeader(name='ref_lon', unit='degrees'), 

476 SubHeader(name='north_shift', unit='m'), 

477 SubHeader(name='east_shift', unit='m'), 

478 SubHeader(name='depth', unit='m')]) 

479 

480 self._register_required_col(self.c5_header) 

481 

482 self._latlon_header = Header(name='latlon', sub_headers=[ 

483 SubHeader(name='lat', unit='degrees'), 

484 SubHeader(name='lon', unit='degrees')]) 

485 

486 self._register_computed_col(self._latlon_header, self._update_latlon) 

487 

488 self._xyz_header = Header(name='xyz', sub_headers=[ 

489 SubHeader(name='x', unit='m'), 

490 SubHeader(name='y', unit='m'), 

491 SubHeader(name='z', unit='m')]) 

492 

493 self._register_computed_col(self._xyz_header, self._update_xyz) 

494 

495 def set_depth_offset(self, depth_offset): 

496 self.depth_offset = depth_offset 

497 

498 def _add_rows_handler(self, table, nrows_added): 

499 Recipe._add_rows_handler(self, table, nrows_added) 

500 

501 for colname in ['latlon', 'xyz']: 

502 if self._table.has_col(colname): 

503 self._table.remove_col(colname) 

504 

505 def _update_latlon(self, table): 

506 lats, lons = od.ne_to_latlon( 

507 table.get_col('ref_lat'), 

508 table.get_col('ref_lon'), 

509 table.get_col('north_shift'), 

510 table.get_col('east_shift')) 

511 

512 latlons = num.zeros((lats.size, 2)) 

513 latlons[:, 0] = lats 

514 latlons[:, 1] = lons 

515 

516 self._table.add_col(self._latlon_header, latlons) 

517 

518 def _update_xyz(self, table): 

519 self._update_latlon(table) 

520 

521 xyzs = geometry.latlondepth2xyz( 

522 num.concatenate(( 

523 table.get_col('lat').reshape(-1, 1), 

524 table.get_col('lon').reshape(-1, 1), 

525 table.get_col('depth').reshape(-1, 1)), 

526 axis=1), 

527 planetradius=cake.earthradius) 

528 

529 self._table.add_col(self._xyz_header, xyzs) 

530 

531 

532class EventRecipe(LocationRecipe): 

533 

534 def __init__(self): 

535 LocationRecipe.__init__(self) 

536 self._register_required_col(Header(name='time', unit='s')) 

537 self._register_required_col(Header(name='magnitude')) 

538 

539 def iter_events(self, table): 

540 from pyrocko import model 

541 for vec in zip(table.get_col(x) for x in [ 

542 'time', 'lat', 'lon', 

543 'north_shift', 'east_shift', 'depth', 

544 'magnitude']): 

545 

546 yield model.Event( 

547 time=vec[0], 

548 lat=vec[1], 

549 lon=vec[2], 

550 north_shift=vec[3], 

551 east_shift=vec[4], 

552 depth=vec[5], 

553 magnitude=vec[6]) 

554 

555 def get_events(self, table): 

556 return list(self.iter_events(table)) 

557 

558 

559class MomentTensorRecipe(Recipe): 

560 

561 def __init__(self): 

562 Recipe.__init__(self) 

563 self.m6_header = Header(name='m6', sub_headers=[ 

564 SubHeader(name='Mnn', unit='Nm'), 

565 SubHeader(name='Mee', unit='Nm'), 

566 SubHeader(name='Mdd', unit='Nm'), 

567 SubHeader(name='Mne', unit='Nm'), 

568 SubHeader(name='Mnd', unit='Nm'), 

569 SubHeader(name='Med', unit='Nm')]) 

570 

571 self._register_required_col(self.m6_header)