Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%

871 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-11-03 12:47 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel main classes. 

8''' 

9 

10import sys 

11import os 

12import time 

13import math 

14import logging 

15import threading 

16import queue 

17from collections import defaultdict 

18 

19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

20from pyrocko import util, trace 

21from pyrocko.progress import progress 

22from pyrocko.plot import nice_time_tick_inc_approx_secs 

23 

24from . import model, io, cache, dataset 

25 

26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

28 codes_patterns_for_kind 

29from .client import fdsn, catalog 

30from .selection import Selection, filldocs 

31from .database import abspath 

32from .operators.base import Operator, CodesPatternFiltering 

33from . import client, environment, error 

34 

35logger = logging.getLogger('psq.base') 

36 

37guts_prefix = 'squirrel' 

38 

39 

40def nonef(f, xs): 

41 xs_ = [x for x in xs if x is not None] 

42 if xs_: 

43 return f(xs_) 

44 else: 

45 return None 

46 

47 

48def make_task(*args): 

49 return progress.task(*args, logger=logger) 

50 

51 

52def lpick(condition, seq): 

53 ft = [], [] 

54 for ele in seq: 

55 ft[int(bool(condition(ele)))].append(ele) 

56 

57 return ft 

58 

59 

60def len_plural(obj): 

61 return len(obj), '' if len(obj) == 1 else 's' 

62 

63 

64def blocks(tmin, tmax, deltat, nsamples_block=100000): 

65 tblock = nice_time_tick_inc_approx_secs( 

66 util.to_time_float(deltat * nsamples_block)) 

67 iblock_min = int(math.floor(tmin / tblock)) 

68 iblock_max = int(math.ceil(tmax / tblock)) 

69 for iblock in range(iblock_min, iblock_max): 

70 yield iblock * tblock, (iblock+1) * tblock 

71 

72 

73def gaps(avail, tmin, tmax): 

74 assert tmin < tmax 

75 

76 data = [(tmax, 1), (tmin, -1)] 

77 for (tmin_a, tmax_a) in avail: 

78 assert tmin_a < tmax_a 

79 data.append((tmin_a, 1)) 

80 data.append((tmax_a, -1)) 

81 

82 data.sort() 

83 s = 1 

84 gaps = [] 

85 tmin_g = None 

86 for t, x in data: 

87 if s == 1 and x == -1: 

88 tmin_g = t 

89 elif s == 0 and x == 1 and tmin_g is not None: 

90 tmax_g = t 

91 if tmin_g != tmax_g: 

92 gaps.append((tmin_g, tmax_g)) 

93 

94 s += x 

95 

96 return gaps 

97 

98 

99def order_key(order): 

100 return (order.codes, order.tmin, order.tmax) 

101 

102 

103def _is_exact(pat): 

104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

105 

106 

107def prefix_tree(tups): 

108 if not tups: 

109 return [] 

110 

111 if len(tups[0]) == 1: 

112 return sorted((tup[0], []) for tup in tups) 

113 

114 d = defaultdict(list) 

115 for tup in tups: 

116 d[tup[0]].append(tup[1:]) 

117 

118 sub = [] 

119 for k in sorted(d.keys()): 

120 sub.append((k, prefix_tree(d[k]))) 

121 

122 return sub 

123 

124 

125def match_time_span(tmin, tmax, obj): 

126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

127 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

128 

129 

130class Batch(object): 

131 ''' 

132 Batch of waveforms from window-wise data extraction. 

133 

134 Encapsulates state and results yielded for each window in window-wise 

135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

136 

137 *Attributes:* 

138 

139 .. py:attribute:: tmin 

140 

141 Start of this time window. 

142 

143 .. py:attribute:: tmax 

144 

145 End of this time window. 

146 

147 .. py:attribute:: i 

148 

149 Index of this time window in sequence. 

150 

151 .. py:attribute:: n 

152 

153 Total number of time windows in sequence. 

154 

155 .. py:attribute:: igroup 

156 

157 Index of this time window's sequence group. 

158 

159 .. py:attribute:: ngroups 

160 

161 Total number of sequence groups. 

162 

163 .. py:attribute:: traces 

164 

165 Extracted waveforms for this time window. 

166 ''' 

167 

168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

169 self.tmin = tmin 

170 self.tmax = tmax 

171 self.i = i 

172 self.n = n 

173 self.igroup = igroup 

174 self.ngroups = ngroups 

175 self.traces = traces 

176 

177 

178class Squirrel(Selection): 

179 ''' 

180 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

181 

182 :param env: 

183 Squirrel environment instance or directory path to use as starting 

184 point for its detection. By default, the current directory is used as 

185 starting point. When searching for a usable environment the directory 

186 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

187 directory is used if it exists, otherwise the parent directories are 

188 search upwards for the existence of such a directory. If no such 

189 directory is found, the user's global Squirrel environment 

190 ``'$HOME/.pyrocko/squirrel'`` is used. 

191 :type env: 

192 :py:class:`~pyrocko.squirrel.environment.Environment` or 

193 :py:class:`str` 

194 

195 :param database: 

196 Database instance or path to database. By default the 

197 database found in the detected Squirrel environment is used. 

198 :type database: 

199 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

200 

201 :param cache_path: 

202 Directory path to use for data caching. By default, the ``'cache'`` 

203 directory in the detected Squirrel environment is used. 

204 :type cache_path: 

205 :py:class:`str` 

206 

207 :param persistent: 

208 If given a name, create a persistent selection. 

209 :type persistent: 

210 :py:class:`str` 

211 

212 This is the central class of the Squirrel framework. It provides a unified 

213 interface to query and access seismic waveforms, station meta-data and 

214 event information from local file collections and remote data sources. For 

215 prompt responses, a profound database setup is used under the hood. To 

216 speed up assemblage of ad-hoc data selections, files are indexed on first 

217 use and the extracted meta-data is remembered in the database for 

218 subsequent accesses. Bulk data is lazily loaded from disk and remote 

219 sources, just when requested. Once loaded, data is cached in memory to 

220 expedite typical access patterns. Files and data sources can be dynamically 

221 added to and removed from the Squirrel selection at runtime. 

222 

223 Queries are restricted to the contents of the files currently added to the 

224 Squirrel selection (usually a subset of the file meta-information 

225 collection in the database). This list of files is referred to here as the 

226 "selection". By default, temporary tables are created in the attached 

227 database to hold the names of the files in the selection as well as various 

228 indices and counters. These tables are only visible inside the application 

229 which created them and are deleted when the database connection is closed 

230 or the application exits. To create a selection which is not deleted at 

231 exit, supply a name to the ``persistent`` argument of the Squirrel 

232 constructor. Persistent selections are shared among applications using the 

233 same database. 

234 

235 **Method summary** 

236 

237 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

238 :py:class:`~pyrocko.squirrel.selection.Selection`. 

239 

240 .. autosummary:: 

241 

242 ~Squirrel.add 

243 ~Squirrel.add_source 

244 ~Squirrel.add_fdsn 

245 ~Squirrel.add_catalog 

246 ~Squirrel.add_dataset 

247 ~Squirrel.add_virtual 

248 ~Squirrel.update 

249 ~Squirrel.update_waveform_promises 

250 ~Squirrel.advance_accessor 

251 ~Squirrel.clear_accessor 

252 ~Squirrel.reload 

253 ~pyrocko.squirrel.selection.Selection.iter_paths 

254 ~Squirrel.iter_nuts 

255 ~Squirrel.iter_kinds 

256 ~Squirrel.iter_deltats 

257 ~Squirrel.iter_codes 

258 ~pyrocko.squirrel.selection.Selection.get_paths 

259 ~Squirrel.get_nuts 

260 ~Squirrel.get_kinds 

261 ~Squirrel.get_deltats 

262 ~Squirrel.get_codes 

263 ~Squirrel.get_counts 

264 ~Squirrel.get_time_span 

265 ~Squirrel.get_deltat_span 

266 ~Squirrel.get_nfiles 

267 ~Squirrel.get_nnuts 

268 ~Squirrel.get_total_size 

269 ~Squirrel.get_stats 

270 ~Squirrel.get_content 

271 ~Squirrel.get_stations 

272 ~Squirrel.get_channels 

273 ~Squirrel.get_responses 

274 ~Squirrel.get_events 

275 ~Squirrel.get_waveform_nuts 

276 ~Squirrel.get_waveforms 

277 ~Squirrel.chopper_waveforms 

278 ~Squirrel.get_coverage 

279 ~Squirrel.pile 

280 ~Squirrel.snuffle 

281 ~Squirrel.glob_codes 

282 ~pyrocko.squirrel.selection.Selection.get_database 

283 ~Squirrel.print_tables 

284 ''' 

285 

286 def __init__( 

287 self, env=None, database=None, cache_path=None, persistent=None): 

288 

289 if not isinstance(env, environment.Environment): 

290 env = environment.get_environment(env) 

291 

292 if database is None: 

293 database = env.expand_path(env.database_path) 

294 

295 if cache_path is None: 

296 cache_path = env.expand_path(env.cache_path) 

297 

298 if persistent is None: 

299 persistent = env.persistent 

300 

301 Selection.__init__( 

302 self, database=database, persistent=persistent) 

303 

304 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

305 

306 self._content_caches = { 

307 'waveform': cache.ContentCache(), 

308 'default': cache.ContentCache()} 

309 

310 self._cache_path = cache_path 

311 

312 self._sources = [] 

313 self._operators = [] 

314 self._operator_registry = {} 

315 

316 self._pending_orders = [] 

317 

318 self._pile = None 

319 self._n_choppers_active = 0 

320 

321 self.downloads_enabled = True 

322 

323 self._names.update({ 

324 'nuts': self.name + '_nuts', 

325 'kind_codes_count': self.name + '_kind_codes_count', 

326 'coverage': self.name + '_coverage'}) 

327 

328 with self.transaction('create tables') as cursor: 

329 self._create_tables_squirrel(cursor) 

330 

331 def _create_tables_squirrel(self, cursor): 

332 

333 cursor.execute(self._register_table(self._sql( 

334 ''' 

335 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

336 nut_id integer PRIMARY KEY, 

337 file_id integer, 

338 file_segment integer, 

339 file_element integer, 

340 kind_id integer, 

341 kind_codes_id integer, 

342 tmin_seconds integer, 

343 tmin_offset integer, 

344 tmax_seconds integer, 

345 tmax_offset integer, 

346 kscale integer) 

347 '''))) 

348 

349 cursor.execute(self._register_table(self._sql( 

350 ''' 

351 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

352 kind_codes_id integer PRIMARY KEY, 

353 count integer) 

354 '''))) 

355 

356 cursor.execute(self._sql( 

357 ''' 

358 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

359 ON %(nuts)s (file_id, file_segment, file_element) 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

365 ON %(nuts)s (file_id) 

366 ''')) 

367 

368 cursor.execute(self._sql( 

369 ''' 

370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

371 ON %(nuts)s (kind_id, tmin_seconds) 

372 ''')) 

373 

374 cursor.execute(self._sql( 

375 ''' 

376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

377 ON %(nuts)s (kind_id, tmax_seconds) 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

383 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

384 ''')) 

385 

386 cursor.execute(self._sql( 

387 ''' 

388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

389 BEFORE DELETE ON main.files FOR EACH ROW 

390 BEGIN 

391 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

392 END 

393 ''')) 

394 

395 # trigger only on size to make silent update of mtime possible 

396 cursor.execute(self._sql( 

397 ''' 

398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

399 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

400 BEGIN 

401 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

402 END 

403 ''')) 

404 

405 cursor.execute(self._sql( 

406 ''' 

407 CREATE TRIGGER IF NOT EXISTS 

408 %(db)s.%(file_states)s_delete_files 

409 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

410 BEGIN 

411 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

412 END 

413 ''')) 

414 

415 cursor.execute(self._sql( 

416 ''' 

417 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

418 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

419 BEGIN 

420 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

421 (new.kind_codes_id, 0); 

422 UPDATE %(kind_codes_count)s 

423 SET count = count + 1 

424 WHERE new.kind_codes_id 

425 == %(kind_codes_count)s.kind_codes_id; 

426 END 

427 ''')) 

428 

429 cursor.execute(self._sql( 

430 ''' 

431 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

432 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

433 BEGIN 

434 UPDATE %(kind_codes_count)s 

435 SET count = count - 1 

436 WHERE old.kind_codes_id 

437 == %(kind_codes_count)s.kind_codes_id; 

438 END 

439 ''')) 

440 

441 cursor.execute(self._register_table(self._sql( 

442 ''' 

443 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

444 kind_codes_id integer, 

445 time_seconds integer, 

446 time_offset integer, 

447 step integer) 

448 '''))) 

449 

450 cursor.execute(self._sql( 

451 ''' 

452 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

453 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

454 ''')) 

455 

456 cursor.execute(self._sql( 

457 ''' 

458 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

459 AFTER INSERT ON %(nuts)s FOR EACH ROW 

460 BEGIN 

461 INSERT OR IGNORE INTO %(coverage)s VALUES 

462 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

463 ; 

464 UPDATE %(coverage)s 

465 SET step = step + 1 

466 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

467 AND new.tmin_seconds == %(coverage)s.time_seconds 

468 AND new.tmin_offset == %(coverage)s.time_offset 

469 ; 

470 INSERT OR IGNORE INTO %(coverage)s VALUES 

471 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

472 ; 

473 UPDATE %(coverage)s 

474 SET step = step - 1 

475 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

476 AND new.tmax_seconds == %(coverage)s.time_seconds 

477 AND new.tmax_offset == %(coverage)s.time_offset 

478 ; 

479 DELETE FROM %(coverage)s 

480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

481 AND new.tmin_seconds == %(coverage)s.time_seconds 

482 AND new.tmin_offset == %(coverage)s.time_offset 

483 AND step == 0 

484 ; 

485 DELETE FROM %(coverage)s 

486 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

487 AND new.tmax_seconds == %(coverage)s.time_seconds 

488 AND new.tmax_offset == %(coverage)s.time_offset 

489 AND step == 0 

490 ; 

491 END 

492 ''')) 

493 

494 cursor.execute(self._sql( 

495 ''' 

496 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

497 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

498 BEGIN 

499 INSERT OR IGNORE INTO %(coverage)s VALUES 

500 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

501 ; 

502 UPDATE %(coverage)s 

503 SET step = step - 1 

504 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

505 AND old.tmin_seconds == %(coverage)s.time_seconds 

506 AND old.tmin_offset == %(coverage)s.time_offset 

507 ; 

508 INSERT OR IGNORE INTO %(coverage)s VALUES 

509 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

510 ; 

511 UPDATE %(coverage)s 

512 SET step = step + 1 

513 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

514 AND old.tmax_seconds == %(coverage)s.time_seconds 

515 AND old.tmax_offset == %(coverage)s.time_offset 

516 ; 

517 DELETE FROM %(coverage)s 

518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

519 AND old.tmin_seconds == %(coverage)s.time_seconds 

520 AND old.tmin_offset == %(coverage)s.time_offset 

521 AND step == 0 

522 ; 

523 DELETE FROM %(coverage)s 

524 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

525 AND old.tmax_seconds == %(coverage)s.time_seconds 

526 AND old.tmax_offset == %(coverage)s.time_offset 

527 AND step == 0 

528 ; 

529 END 

530 ''')) 

531 

532 def _delete(self): 

533 '''Delete database tables associated with this Squirrel.''' 

534 

535 with self.transaction('delete tables') as cursor: 

536 for s in ''' 

537 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

538 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

539 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

540 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

541 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

542 DROP TABLE %(db)s.%(nuts)s; 

543 DROP TABLE %(db)s.%(kind_codes_count)s; 

544 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

545 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

546 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

547 '''.strip().splitlines(): 

548 

549 cursor.execute(self._sql(s)) 

550 

551 Selection._delete(self) 

552 

553 @filldocs 

554 def add(self, 

555 paths, 

556 kinds=None, 

557 format='detect', 

558 include=None, 

559 exclude=None, 

560 check=True): 

561 

562 ''' 

563 Add files to the selection. 

564 

565 :param paths: 

566 Iterator yielding paths to files or directories to be added to the 

567 selection. Recurses into directories. If given a ``str``, it 

568 is treated as a single path to be added. 

569 :type paths: 

570 :py:class:`list` of :py:class:`str` 

571 

572 :param kinds: 

573 Content types to be made available through the Squirrel selection. 

574 By default, all known content types are accepted. 

575 :type kinds: 

576 :py:class:`list` of :py:class:`str` 

577 

578 :param format: 

579 File format identifier or ``'detect'`` to enable auto-detection 

580 (available: %(file_formats)s). 

581 :type format: 

582 str 

583 

584 :param include: 

585 If not ``None``, files are only included if their paths match the 

586 given regular expression pattern. 

587 :type format: 

588 str 

589 

590 :param exclude: 

591 If not ``None``, files are only included if their paths do not 

592 match the given regular expression pattern. 

593 :type format: 

594 str 

595 

596 :param check: 

597 If ``True``, all file modification times are checked to see if 

598 cached information has to be updated (slow). If ``False``, only 

599 previously unknown files are indexed and cached information is used 

600 for known files, regardless of file state (fast, corrresponds to 

601 Squirrel's ``--optimistic`` mode). File deletions will go 

602 undetected in the latter case. 

603 :type check: 

604 bool 

605 

606 :Complexity: 

607 O(log N) 

608 ''' 

609 

610 if isinstance(kinds, str): 

611 kinds = (kinds,) 

612 

613 if isinstance(paths, str): 

614 paths = [paths] 

615 

616 kind_mask = model.to_kind_mask(kinds) 

617 

618 with progress.view(): 

619 Selection.add( 

620 self, util.iter_select_files( 

621 paths, 

622 show_progress=False, 

623 include=include, 

624 exclude=exclude, 

625 pass_through=lambda path: path.startswith('virtual:') 

626 ), kind_mask, format) 

627 

628 self._load(check) 

629 self._update_nuts() 

630 

631 def reload(self): 

632 ''' 

633 Check for modifications and reindex modified files. 

634 

635 Based on file modification times. 

636 ''' 

637 

638 self._set_file_states_force_check() 

639 self._load(check=True) 

640 self._update_nuts() 

641 

642 def add_virtual(self, nuts, virtual_paths=None): 

643 ''' 

644 Add content which is not backed by files. 

645 

646 :param nuts: 

647 Content pieces to be added. 

648 :type nuts: 

649 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

650 

651 :param virtual_paths: 

652 List of virtual paths to prevent creating a temporary list of the 

653 nuts while aggregating the file paths for the selection. 

654 :type virtual_paths: 

655 :py:class:`list` of :py:class:`str` 

656 

657 Stores to the main database and the selection. 

658 ''' 

659 

660 if isinstance(virtual_paths, str): 

661 virtual_paths = [virtual_paths] 

662 

663 if virtual_paths is None: 

664 if not isinstance(nuts, list): 

665 nuts = list(nuts) 

666 virtual_paths = set(nut.file_path for nut in nuts) 

667 

668 Selection.add(self, virtual_paths) 

669 self.get_database().dig(nuts) 

670 self._update_nuts() 

671 

672 def add_volatile(self, nuts): 

673 if not isinstance(nuts, list): 

674 nuts = list(nuts) 

675 

676 paths = list(set(nut.file_path for nut in nuts)) 

677 io.backends.virtual.add_nuts(nuts) 

678 self.add_virtual(nuts, paths) 

679 self._volatile_paths.extend(paths) 

680 

681 def add_volatile_waveforms(self, traces): 

682 ''' 

683 Add in-memory waveforms which will be removed when the app closes. 

684 ''' 

685 

686 name = model.random_name() 

687 

688 path = 'virtual:volatile:%s' % name 

689 

690 nuts = [] 

691 for itr, tr in enumerate(traces): 

692 assert tr.tmin <= tr.tmax 

693 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

694 tmax_seconds, tmax_offset = model.tsplit( 

695 tr.tmin + tr.data_len()*tr.deltat) 

696 

697 nuts.append(model.Nut( 

698 file_path=path, 

699 file_format='virtual', 

700 file_segment=itr, 

701 file_element=0, 

702 file_mtime=0, 

703 codes=tr.codes, 

704 tmin_seconds=tmin_seconds, 

705 tmin_offset=tmin_offset, 

706 tmax_seconds=tmax_seconds, 

707 tmax_offset=tmax_offset, 

708 deltat=tr.deltat, 

709 kind_id=to_kind_id('waveform'), 

710 content=tr)) 

711 

712 self.add_volatile(nuts) 

713 return path 

714 

715 def _load(self, check): 

716 for _ in io.iload( 

717 self, 

718 content=[], 

719 skip_unchanged=True, 

720 check=check): 

721 pass 

722 

723 def _update_nuts(self, transaction=None): 

724 transaction = transaction or self.transaction('update nuts') 

725 with make_task('Aggregating selection') as task, \ 

726 transaction as cursor: 

727 

728 self._conn.set_progress_handler(task.update, 100000) 

729 nrows = cursor.execute(self._sql( 

730 ''' 

731 INSERT INTO %(db)s.%(nuts)s 

732 SELECT NULL, 

733 nuts.file_id, nuts.file_segment, nuts.file_element, 

734 nuts.kind_id, nuts.kind_codes_id, 

735 nuts.tmin_seconds, nuts.tmin_offset, 

736 nuts.tmax_seconds, nuts.tmax_offset, 

737 nuts.kscale 

738 FROM %(db)s.%(file_states)s 

739 INNER JOIN nuts 

740 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

741 INNER JOIN kind_codes 

742 ON nuts.kind_codes_id == 

743 kind_codes.kind_codes_id 

744 WHERE %(db)s.%(file_states)s.file_state != 2 

745 AND (((1 << kind_codes.kind_id) 

746 & %(db)s.%(file_states)s.kind_mask) != 0) 

747 ''')).rowcount 

748 

749 task.update(nrows) 

750 self._set_file_states_known(transaction) 

751 self._conn.set_progress_handler(None, 0) 

752 

753 def add_source(self, source, check=True): 

754 ''' 

755 Add remote resource. 

756 

757 :param source: 

758 Remote data access client instance. 

759 :type source: 

760 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

761 ''' 

762 

763 self._sources.append(source) 

764 source.setup(self, check=check) 

765 

766 def add_fdsn(self, *args, **kwargs): 

767 ''' 

768 Add FDSN site for transparent remote data access. 

769 

770 Arguments are passed to 

771 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

772 ''' 

773 

774 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

775 

776 def add_catalog(self, *args, **kwargs): 

777 ''' 

778 Add online catalog for transparent event data access. 

779 

780 Arguments are passed to 

781 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

782 ''' 

783 

784 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

785 

786 def add_dataset(self, ds, check=True): 

787 ''' 

788 Read dataset description from file and add its contents. 

789 

790 :param ds: 

791 Path to dataset description file or dataset description object 

792 . See :py:mod:`~pyrocko.squirrel.dataset`. 

793 :type ds: 

794 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

795 

796 :param check: 

797 If ``True``, all file modification times are checked to see if 

798 cached information has to be updated (slow). If ``False``, only 

799 previously unknown files are indexed and cached information is used 

800 for known files, regardless of file state (fast, corrresponds to 

801 Squirrel's ``--optimistic`` mode). File deletions will go 

802 undetected in the latter case. 

803 :type check: 

804 bool 

805 ''' 

806 if isinstance(ds, str): 

807 ds = dataset.read_dataset(ds) 

808 

809 ds.setup(self, check=check) 

810 

811 def _get_selection_args( 

812 self, kind_id, 

813 obj=None, tmin=None, tmax=None, time=None, codes=None): 

814 

815 if codes is not None: 

816 codes = codes_patterns_for_kind(kind_id, codes) 

817 

818 if time is not None: 

819 tmin = time 

820 tmax = time 

821 

822 if obj is not None: 

823 tmin = tmin if tmin is not None else obj.tmin 

824 tmax = tmax if tmax is not None else obj.tmax 

825 codes = codes if codes is not None else codes_patterns_for_kind( 

826 kind_id, obj.codes) 

827 

828 return tmin, tmax, codes 

829 

830 def _get_selection_args_str(self, *args, **kwargs): 

831 

832 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

833 return 'tmin: %s, tmax: %s, codes: %s' % ( 

834 util.time_to_str(tmin) if tmin is not None else 'none', 

835 util.time_to_str(tmax) if tmax is not None else 'none', 

836 ','.join(str(entry) for entry in codes)) 

837 

838 def _selection_args_to_kwargs( 

839 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

840 

841 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

842 

843 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

844 

845 tmin_seconds, tmin_offset = model.tsplit(tmin) 

846 tmax_seconds, tmax_offset = model.tsplit(tmax) 

847 if naiv: 

848 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

849 args.append(tmax_seconds) 

850 else: 

851 tscale_edges = model.tscale_edges 

852 tmin_cond = [] 

853 for kscale in range(tscale_edges.size + 1): 

854 if kscale != tscale_edges.size: 

855 tscale = int(tscale_edges[kscale]) 

856 tmin_cond.append(''' 

857 (%(db)s.%(nuts)s.kind_id = ? 

858 AND %(db)s.%(nuts)s.kscale == ? 

859 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

860 ''') 

861 args.extend( 

862 (to_kind_id(kind), kscale, 

863 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

864 

865 else: 

866 tmin_cond.append(''' 

867 (%(db)s.%(nuts)s.kind_id == ? 

868 AND %(db)s.%(nuts)s.kscale == ? 

869 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

870 ''') 

871 

872 args.extend( 

873 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

874 if tmin_cond: 

875 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

876 

877 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

878 args.append(tmin_seconds) 

879 

880 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

881 pats = codes_patterns_for_kind(kind_id, codes) 

882 if pats is None: 

883 return 

884 

885 pats_exact = [] 

886 pats_nonexact = [] 

887 for pat in pats: 

888 spat = pat.safe_str 

889 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

890 

891 codes_cond = [] 

892 if pats_exact: 

893 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

894 '?'*len(pats_exact))) 

895 

896 args.extend(pats_exact) 

897 

898 if pats_nonexact: 

899 codes_cond.append(' ( %s ) ' % ' OR '.join( 

900 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

901 

902 args.extend(pats_nonexact) 

903 

904 if codes_cond: 

905 cond.append('%s ( %s )' % ( 

906 'NOT' if not positive else '', 

907 ' OR '.join(codes_cond))) 

908 

909 def iter_nuts( 

910 self, kind=None, tmin=None, tmax=None, codes=None, 

911 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

912 naiv=False, kind_codes_ids=None, path=None, limit=None): 

913 

914 ''' 

915 Iterate over content entities matching given constraints. 

916 

917 :param kind: 

918 Content kind (or kinds) to extract. 

919 :type kind: 

920 :py:class:`str`, :py:class:`list` of :py:class:`str` 

921 

922 :param tmin: 

923 Start time of query interval. 

924 :type tmin: 

925 :py:func:`~pyrocko.util.get_time_float` 

926 

927 :param tmax: 

928 End time of query interval. 

929 :type tmax: 

930 :py:func:`~pyrocko.util.get_time_float` 

931 

932 :param codes: 

933 List of code patterns to query. 

934 :type codes: 

935 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

936 objects appropriate for the queried content type, or anything which 

937 can be converted to such objects. 

938 

939 :param naiv: 

940 Bypass time span lookup through indices (slow, for testing). 

941 :type naiv: 

942 :py:class:`bool` 

943 

944 :param kind_codes_ids: 

945 Kind-codes IDs of contents to be retrieved (internal use). 

946 :type kind_codes_ids: 

947 :py:class:`list` of :py:class:`int` 

948 

949 :yields: 

950 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

951 intersecting content. 

952 

953 :complexity: 

954 O(log N) for the time selection part due to heavy use of database 

955 indices. 

956 

957 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

958 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

959 closed-interval so that content intersecting with the time instant ``t 

960 = tmin = tmax`` is returned (otherwise nothing would be returned as 

961 ``[t, t)`` never matches anything). 

962 

963 Time spans of content entities to be matched are also treated as half 

964 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

965 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

966 modified to closed-interval when the content time span is an empty 

967 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

968 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

969 ''' 

970 

971 if not isinstance(kind, str): 

972 if kind is None: 

973 kind = model.g_content_kinds 

974 for kind_ in kind: 

975 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

976 yield nut 

977 

978 return 

979 

980 kind_id = to_kind_id(kind) 

981 

982 cond = [] 

983 args = [] 

984 if tmin is not None or tmax is not None: 

985 assert kind is not None 

986 if tmin is None: 

987 tmin = self.get_time_span()[0] 

988 if tmax is None: 

989 tmax = self.get_time_span()[1] + 1.0 

990 

991 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

992 

993 cond.append('kind_codes.kind_id == ?') 

994 args.append(kind_id) 

995 

996 if codes is not None: 

997 self._codes_match_sql(True, kind_id, codes, cond, args) 

998 

999 if codes_exclude is not None: 

1000 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

1001 

1002 if sample_rate_min is not None: 

1003 cond.append('kind_codes.deltat <= ?') 

1004 args.append(1.0/sample_rate_min) 

1005 

1006 if sample_rate_max is not None: 

1007 cond.append('? <= kind_codes.deltat') 

1008 args.append(1.0/sample_rate_max) 

1009 

1010 if kind_codes_ids is not None: 

1011 cond.append( 

1012 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1013 '?'*len(kind_codes_ids))) 

1014 

1015 args.extend(kind_codes_ids) 

1016 

1017 db = self.get_database() 

1018 if path is not None: 

1019 cond.append('files.path == ?') 

1020 args.append(db.relpath(abspath(path))) 

1021 

1022 sql = (''' 

1023 SELECT 

1024 files.path, 

1025 files.format, 

1026 files.mtime, 

1027 files.size, 

1028 %(db)s.%(nuts)s.file_segment, 

1029 %(db)s.%(nuts)s.file_element, 

1030 kind_codes.kind_id, 

1031 kind_codes.codes, 

1032 %(db)s.%(nuts)s.tmin_seconds, 

1033 %(db)s.%(nuts)s.tmin_offset, 

1034 %(db)s.%(nuts)s.tmax_seconds, 

1035 %(db)s.%(nuts)s.tmax_offset, 

1036 kind_codes.deltat 

1037 FROM files 

1038 INNER JOIN %(db)s.%(nuts)s 

1039 ON files.file_id == %(db)s.%(nuts)s.file_id 

1040 INNER JOIN kind_codes 

1041 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1042 ''') 

1043 

1044 if cond: 

1045 sql += ''' WHERE ''' + ' AND '.join(cond) 

1046 

1047 if limit is not None: 

1048 sql += ''' LIMIT %i''' % limit 

1049 

1050 sql = self._sql(sql) 

1051 if tmin is None and tmax is None: 

1052 for row in self._conn.execute(sql, args): 

1053 row = (db.abspath(row[0]),) + row[1:] 

1054 nut = model.Nut(values_nocheck=row) 

1055 yield nut 

1056 else: 

1057 assert tmin is not None and tmax is not None 

1058 if tmin == tmax: 

1059 for row in self._conn.execute(sql, args): 

1060 row = (db.abspath(row[0]),) + row[1:] 

1061 nut = model.Nut(values_nocheck=row) 

1062 if (nut.tmin <= tmin < nut.tmax) \ 

1063 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1064 

1065 yield nut 

1066 else: 

1067 for row in self._conn.execute(sql, args): 

1068 row = (db.abspath(row[0]),) + row[1:] 

1069 nut = model.Nut(values_nocheck=row) 

1070 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1071 or (nut.tmin == nut.tmax 

1072 and tmin <= nut.tmin < tmax): 

1073 

1074 yield nut 

1075 

1076 def get_nuts(self, *args, **kwargs): 

1077 ''' 

1078 Get content entities matching given constraints. 

1079 

1080 Like :py:meth:`iter_nuts` but returns results as a list. 

1081 ''' 

1082 

1083 return list(self.iter_nuts(*args, **kwargs)) 

1084 

1085 def _split_nuts( 

1086 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1087 

1088 kind_id = to_kind_id(kind) 

1089 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1090 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1091 

1092 names_main_nuts = dict(self._names) 

1093 names_main_nuts.update(db='main', nuts='nuts') 

1094 

1095 db = self.get_database() 

1096 

1097 def main_nuts(s): 

1098 return s % names_main_nuts 

1099 

1100 with self.transaction('split nuts') as cursor: 

1101 # modify selection and main 

1102 for sql_subst in [ 

1103 self._sql, main_nuts]: 

1104 

1105 cond = [] 

1106 args = [] 

1107 

1108 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1109 

1110 if codes is not None: 

1111 self._codes_match_sql(True, kind_id, codes, cond, args) 

1112 

1113 if path is not None: 

1114 cond.append('files.path == ?') 

1115 args.append(db.relpath(abspath(path))) 

1116 

1117 sql = sql_subst(''' 

1118 SELECT 

1119 %(db)s.%(nuts)s.nut_id, 

1120 %(db)s.%(nuts)s.tmin_seconds, 

1121 %(db)s.%(nuts)s.tmin_offset, 

1122 %(db)s.%(nuts)s.tmax_seconds, 

1123 %(db)s.%(nuts)s.tmax_offset, 

1124 kind_codes.deltat 

1125 FROM files 

1126 INNER JOIN %(db)s.%(nuts)s 

1127 ON files.file_id == %(db)s.%(nuts)s.file_id 

1128 INNER JOIN kind_codes 

1129 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1130 WHERE ''' + ' AND '.join(cond)) # noqa 

1131 

1132 insert = [] 

1133 delete = [] 

1134 for row in cursor.execute(sql, args): 

1135 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1136 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1137 

1138 nut_tmin = model.tjoin( 

1139 nut_tmin_seconds, nut_tmin_offset) 

1140 nut_tmax = model.tjoin( 

1141 nut_tmax_seconds, nut_tmax_offset) 

1142 

1143 if nut_tmin < tmax and tmin < nut_tmax: 

1144 if nut_tmin < tmin: 

1145 insert.append(( 

1146 nut_tmin_seconds, nut_tmin_offset, 

1147 tmin_seconds, tmin_offset, 

1148 model.tscale_to_kscale( 

1149 tmin_seconds - nut_tmin_seconds), 

1150 nut_id)) 

1151 

1152 if tmax < nut_tmax: 

1153 insert.append(( 

1154 tmax_seconds, tmax_offset, 

1155 nut_tmax_seconds, nut_tmax_offset, 

1156 model.tscale_to_kscale( 

1157 nut_tmax_seconds - tmax_seconds), 

1158 nut_id)) 

1159 

1160 delete.append((nut_id,)) 

1161 

1162 sql_add = ''' 

1163 INSERT INTO %(db)s.%(nuts)s ( 

1164 file_id, file_segment, file_element, kind_id, 

1165 kind_codes_id, tmin_seconds, tmin_offset, 

1166 tmax_seconds, tmax_offset, kscale ) 

1167 SELECT 

1168 file_id, file_segment, file_element, 

1169 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1170 FROM %(db)s.%(nuts)s 

1171 WHERE nut_id == ? 

1172 ''' 

1173 cursor.executemany(sql_subst(sql_add), insert) 

1174 

1175 sql_delete = ''' 

1176 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1177 ''' 

1178 cursor.executemany(sql_subst(sql_delete), delete) 

1179 

1180 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1181 ''' 

1182 Get time interval over all content in selection. 

1183 

1184 :param kinds: 

1185 If not ``None``, restrict query to given content kinds. 

1186 :type kind: 

1187 list of str 

1188 

1189 :complexity: 

1190 O(1), independent of the number of nuts. 

1191 

1192 :returns: 

1193 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1194 ''' 

1195 

1196 sql_min = self._sql(''' 

1197 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1198 FROM %(db)s.%(nuts)s 

1199 WHERE kind_id == ? 

1200 AND tmin_seconds == ( 

1201 SELECT MIN(tmin_seconds) 

1202 FROM %(db)s.%(nuts)s 

1203 WHERE kind_id == ?) 

1204 ''') 

1205 

1206 sql_max = self._sql(''' 

1207 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1208 FROM %(db)s.%(nuts)s 

1209 WHERE kind_id == ? 

1210 AND tmax_seconds == ( 

1211 SELECT MAX(tmax_seconds) 

1212 FROM %(db)s.%(nuts)s 

1213 WHERE kind_id == ?) 

1214 ''') 

1215 

1216 gtmin = None 

1217 gtmax = None 

1218 

1219 if isinstance(kinds, str): 

1220 kinds = [kinds] 

1221 

1222 if kinds is None: 

1223 kind_ids = model.g_content_kind_ids 

1224 else: 

1225 kind_ids = model.to_kind_ids(kinds) 

1226 

1227 tmins = [] 

1228 tmaxs = [] 

1229 for kind_id in kind_ids: 

1230 for tmin_seconds, tmin_offset in self._conn.execute( 

1231 sql_min, (kind_id, kind_id)): 

1232 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1233 

1234 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1235 sql_max, (kind_id, kind_id)): 

1236 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1237 

1238 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1239 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1240 

1241 if tight: 

1242 gtmin = nonef(min, tmins) 

1243 gtmax = nonef(max, tmaxs) 

1244 else: 

1245 gtmin = None if None in tmins else nonef(min, tmins) 

1246 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1247 

1248 if dummy_limits: 

1249 if gtmin is None: 

1250 gtmin = model.g_tmin 

1251 if gtmax is None: 

1252 gtmax = model.g_tmax 

1253 

1254 return gtmin, gtmax 

1255 

1256 def has(self, kinds): 

1257 ''' 

1258 Check availability of given content kinds. 

1259 

1260 :param kinds: 

1261 Content kinds to query. 

1262 :type kind: 

1263 list of str 

1264 

1265 :returns: 

1266 ``True`` if any of the queried content kinds is available 

1267 in the selection. 

1268 ''' 

1269 self_tmin, self_tmax = self.get_time_span( 

1270 kinds, dummy_limits=False) 

1271 

1272 return None not in (self_tmin, self_tmax) 

1273 

1274 def get_deltat_span(self, kind): 

1275 ''' 

1276 Get min and max sampling interval of all content of given kind. 

1277 

1278 :param kind: 

1279 Content kind 

1280 :type kind: 

1281 str 

1282 

1283 :returns: ``(deltat_min, deltat_max)`` 

1284 ''' 

1285 

1286 deltats = [ 

1287 deltat for deltat in self.get_deltats(kind) 

1288 if deltat is not None] 

1289 

1290 if deltats: 

1291 return min(deltats), max(deltats) 

1292 else: 

1293 return None, None 

1294 

1295 def iter_kinds(self, codes=None): 

1296 ''' 

1297 Iterate over content types available in selection. 

1298 

1299 :param codes: 

1300 If given, get kinds only for selected codes identifier. 

1301 Only a single identifier may be given here and no pattern matching 

1302 is done, currently. 

1303 :type codes: 

1304 :py:class:`~pyrocko.squirrel.model.Codes` 

1305 

1306 :yields: 

1307 Available content kinds as :py:class:`str`. 

1308 

1309 :complexity: 

1310 O(1), independent of number of nuts. 

1311 ''' 

1312 

1313 return self._database._iter_kinds( 

1314 codes=codes, 

1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1316 

1317 def iter_deltats(self, kind=None): 

1318 ''' 

1319 Iterate over sampling intervals available in selection. 

1320 

1321 :param kind: 

1322 If given, get sampling intervals only for a given content type. 

1323 :type kind: 

1324 str 

1325 

1326 :yields: 

1327 :py:class:`float` values. 

1328 

1329 :complexity: 

1330 O(1), independent of number of nuts. 

1331 ''' 

1332 return self._database._iter_deltats( 

1333 kind=kind, 

1334 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1335 

1336 def iter_codes(self, kind=None): 

1337 ''' 

1338 Iterate over content identifier code sequences available in selection. 

1339 

1340 :param kind: 

1341 If given, get codes only for a given content type. 

1342 :type kind: 

1343 str 

1344 

1345 :yields: 

1346 :py:class:`tuple` of :py:class:`str` 

1347 

1348 :complexity: 

1349 O(1), independent of number of nuts. 

1350 ''' 

1351 return self._database._iter_codes( 

1352 kind=kind, 

1353 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1354 

1355 def _iter_codes_info(self, kind=None, codes=None): 

1356 ''' 

1357 Iterate over number of occurrences of any (kind, codes) combination. 

1358 

1359 :param kind: 

1360 If given, get counts only for selected content type. 

1361 :type kind: 

1362 str 

1363 

1364 :yields: 

1365 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1366 

1367 :complexity: 

1368 O(1), independent of number of nuts. 

1369 ''' 

1370 return self._database._iter_codes_info( 

1371 kind=kind, 

1372 codes=codes, 

1373 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1374 

1375 def get_kinds(self, codes=None): 

1376 ''' 

1377 Get content types available in selection. 

1378 

1379 :param codes: 

1380 If given, get kinds only for selected codes identifier. 

1381 Only a single identifier may be given here and no pattern matching 

1382 is done, currently. 

1383 :type codes: 

1384 :py:class:`~pyrocko.squirrel.model.Codes` 

1385 

1386 :returns: 

1387 Sorted list of available content types. 

1388 :rtype: 

1389 py:class:`list` of :py:class:`str` 

1390 

1391 :complexity: 

1392 O(1), independent of number of nuts. 

1393 

1394 ''' 

1395 return sorted(list(self.iter_kinds(codes=codes))) 

1396 

1397 def get_deltats(self, kind=None): 

1398 ''' 

1399 Get sampling intervals available in selection. 

1400 

1401 :param kind: 

1402 If given, get sampling intervals only for selected content type. 

1403 :type kind: 

1404 str 

1405 

1406 :complexity: 

1407 O(1), independent of number of nuts. 

1408 

1409 :returns: Sorted list of available sampling intervals. 

1410 ''' 

1411 return sorted(list(self.iter_deltats(kind=kind))) 

1412 

1413 def get_codes(self, kind=None): 

1414 ''' 

1415 Get identifier code sequences available in selection. 

1416 

1417 :param kind: 

1418 If given, get codes only for selected content type. 

1419 :type kind: 

1420 str 

1421 

1422 :complexity: 

1423 O(1), independent of number of nuts. 

1424 

1425 :returns: Sorted list of available codes as tuples of strings. 

1426 ''' 

1427 return sorted(list(self.iter_codes(kind=kind))) 

1428 

1429 def get_counts(self, kind=None): 

1430 ''' 

1431 Get number of occurrences of any (kind, codes) combination. 

1432 

1433 :param kind: 

1434 If given, get codes only for selected content type. 

1435 :type kind: 

1436 str 

1437 

1438 :complexity: 

1439 O(1), independent of number of nuts. 

1440 

1441 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1442 if kind is not ``None`` 

1443 ''' 

1444 d = {} 

1445 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1446 if kind_id not in d: 

1447 v = d[kind_id] = {} 

1448 else: 

1449 v = d[kind_id] 

1450 

1451 if codes not in v: 

1452 v[codes] = 0 

1453 

1454 v[codes] += count 

1455 

1456 if kind is not None: 

1457 return d[to_kind_id(kind)] 

1458 else: 

1459 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1460 

1461 def glob_codes(self, kind, codes): 

1462 ''' 

1463 Find codes matching given patterns. 

1464 

1465 :param kind: 

1466 Content kind to be queried. 

1467 :type kind: 

1468 str 

1469 

1470 :param codes: 

1471 List of code patterns to query. 

1472 :type codes: 

1473 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1474 objects appropriate for the queried content type, or anything which 

1475 can be converted to such objects. 

1476 

1477 :returns: 

1478 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1479 ''' 

1480 

1481 kind_id = to_kind_id(kind) 

1482 args = [kind_id] 

1483 pats = codes_patterns_for_kind(kind_id, codes) 

1484 

1485 if pats: 

1486 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1487 ('kind_codes.codes GLOB ?',) * len(pats)) 

1488 

1489 args.extend(pat.safe_str for pat in pats) 

1490 else: 

1491 codes_cond = '' 

1492 

1493 sql = self._sql(''' 

1494 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1495 WHERE 

1496 kind_id == ? ''' + codes_cond) 

1497 

1498 return list(map(list, self._conn.execute(sql, args))) 

1499 

1500 def update(self, constraint=None, **kwargs): 

1501 ''' 

1502 Update or partially update channel and event inventories. 

1503 

1504 :param constraint: 

1505 Selection of times or areas to be brought up to date. 

1506 :type constraint: 

1507 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1508 

1509 :param \\*\\*kwargs: 

1510 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1511 

1512 This function triggers all attached remote sources, to check for 

1513 updates in the meta-data. The sources will only submit queries when 

1514 their expiration date has passed, or if the selection spans into 

1515 previously unseen times or areas. 

1516 ''' 

1517 

1518 if constraint is None: 

1519 constraint = client.Constraint(**kwargs) 

1520 

1521 for source in self._sources: 

1522 source.update_channel_inventory(self, constraint) 

1523 source.update_event_inventory(self, constraint) 

1524 

1525 def update_waveform_promises(self, constraint=None, **kwargs): 

1526 ''' 

1527 Permit downloading of remote waveforms. 

1528 

1529 :param constraint: 

1530 Remote waveforms compatible with the given constraint are enabled 

1531 for download. 

1532 :type constraint: 

1533 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1534 

1535 :param \\*\\*kwargs: 

1536 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1537 

1538 Calling this method permits Squirrel to download waveforms from remote 

1539 sources when processing subsequent waveform requests. This works by 

1540 inserting so called waveform promises into the database. It will look 

1541 into the available channels for each remote source and create a promise 

1542 for each channel compatible with the given constraint. If the promise 

1543 then matches in a waveform request, Squirrel tries to download the 

1544 waveform. If the download is successful, the downloaded waveform is 

1545 added to the Squirrel and the promise is deleted. If the download 

1546 fails, the promise is kept if the reason of failure looks like being 

1547 temporary, e.g. because of a network failure. If the cause of failure 

1548 however seems to be permanent, the promise is deleted so that no 

1549 further attempts are made to download a waveform which might not be 

1550 available from that server at all. To force re-scheduling after a 

1551 permanent failure, call :py:meth:`update_waveform_promises` 

1552 yet another time. 

1553 ''' 

1554 

1555 if constraint is None: 

1556 constraint = client.Constraint(**kwargs) 

1557 

1558 for source in self._sources: 

1559 source.update_waveform_promises(self, constraint) 

1560 

1561 def remove_waveform_promises(self, from_database='selection'): 

1562 ''' 

1563 Remove waveform promises from live selection or global database. 

1564 

1565 Calling this function removes all waveform promises provided by the 

1566 attached sources. 

1567 

1568 :param from_database: 

1569 Remove from live selection ``'selection'`` or global database 

1570 ``'global'``. 

1571 ''' 

1572 for source in self._sources: 

1573 source.remove_waveform_promises(self, from_database=from_database) 

1574 

1575 def update_responses(self, constraint=None, **kwargs): 

1576 if constraint is None: 

1577 constraint = client.Constraint(**kwargs) 

1578 

1579 for source in self._sources: 

1580 source.update_response_inventory(self, constraint) 

1581 

1582 def get_nfiles(self): 

1583 ''' 

1584 Get number of files in selection. 

1585 ''' 

1586 

1587 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1588 for row in self._conn.execute(sql): 

1589 return row[0] 

1590 

1591 def get_nnuts(self): 

1592 ''' 

1593 Get number of nuts in selection. 

1594 ''' 

1595 

1596 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1597 for row in self._conn.execute(sql): 

1598 return row[0] 

1599 

1600 def get_total_size(self): 

1601 ''' 

1602 Get aggregated file size available in selection. 

1603 ''' 

1604 

1605 sql = self._sql(''' 

1606 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1607 INNER JOIN files 

1608 ON %(db)s.%(file_states)s.file_id = files.file_id 

1609 ''') 

1610 

1611 for row in self._conn.execute(sql): 

1612 return row[0] or 0 

1613 

1614 def get_stats(self): 

1615 ''' 

1616 Get statistics on contents available through this selection. 

1617 ''' 

1618 

1619 kinds = self.get_kinds() 

1620 time_spans = {} 

1621 for kind in kinds: 

1622 time_spans[kind] = self.get_time_span([kind]) 

1623 

1624 return SquirrelStats( 

1625 nfiles=self.get_nfiles(), 

1626 nnuts=self.get_nnuts(), 

1627 kinds=kinds, 

1628 codes=self.get_codes(), 

1629 total_size=self.get_total_size(), 

1630 counts=self.get_counts(), 

1631 time_spans=time_spans, 

1632 sources=[s.describe() for s in self._sources], 

1633 operators=[op.describe() for op in self._operators]) 

1634 

1635 @filldocs 

1636 def check( 

1637 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1638 ignore=[]): 

1639 ''' 

1640 Check for common data/metadata problems. 

1641 

1642 %(query_args)s 

1643 

1644 :param ignore: 

1645 Problem types to be ignored. 

1646 :type ignore: 

1647 :class:`list` of :class:`str` 

1648 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1649 

1650 :returns: 

1651 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1652 containing the results of the check. 

1653 

1654 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1655 ''' 

1656 

1657 from .check import do_check 

1658 tmin, tmax, codes = self._get_selection_args( 

1659 CHANNEL, obj, tmin, tmax, time, codes) 

1660 

1661 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1662 

1663 def get_content( 

1664 self, 

1665 nut, 

1666 cache_id='default', 

1667 accessor_id='default', 

1668 show_progress=False, 

1669 model='squirrel'): 

1670 

1671 ''' 

1672 Get and possibly load full content for a given index entry from file. 

1673 

1674 Loads the actual content objects (channel, station, waveform, ...) from 

1675 file. For efficiency, sibling content (all stuff in the same file 

1676 segment) will also be loaded as a side effect. The loaded contents are 

1677 cached in the Squirrel object. 

1678 ''' 

1679 

1680 content_cache = self._content_caches[cache_id] 

1681 if not content_cache.has(nut): 

1682 

1683 for nut_loaded in io.iload( 

1684 nut.file_path, 

1685 segment=nut.file_segment, 

1686 format=nut.file_format, 

1687 database=self._database, 

1688 update_selection=self, 

1689 show_progress=show_progress): 

1690 

1691 content_cache.put(nut_loaded) 

1692 

1693 try: 

1694 return content_cache.get(nut, accessor_id, model) 

1695 

1696 except KeyError: 

1697 raise error.NotAvailable( 

1698 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1699 

1700 def advance_accessor(self, accessor_id='default', cache_id=None): 

1701 ''' 

1702 Notify memory caches about consumer moving to a new data batch. 

1703 

1704 :param accessor_id: 

1705 Name of accessing consumer to be advanced. 

1706 :type accessor_id: 

1707 str 

1708 

1709 :param cache_id: 

1710 Name of cache to for which the accessor should be advanced. By 

1711 default the named accessor is advanced in all registered caches. 

1712 By default, two caches named ``'default'`` and ``'waveform'`` are 

1713 available. 

1714 :type cache_id: 

1715 str 

1716 

1717 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1718 Squirrel's memory caching works and can be tuned. Default behaviour is 

1719 to release data when it has not been used in the latest data 

1720 window/batch. If the accessor is never advanced, data is cached 

1721 indefinitely - which is often desired e.g. for station meta-data. 

1722 Methods for consecutive data traversal, like 

1723 :py:meth:`chopper_waveforms` automatically advance and clear 

1724 their accessor. 

1725 ''' 

1726 for cache_ in ( 

1727 self._content_caches.keys() 

1728 if cache_id is None 

1729 else [cache_id]): 

1730 

1731 self._content_caches[cache_].advance_accessor(accessor_id) 

1732 

1733 def clear_accessor(self, accessor_id, cache_id=None): 

1734 ''' 

1735 Notify memory caches about a consumer having finished. 

1736 

1737 :param accessor_id: 

1738 Name of accessor to be cleared. 

1739 :type accessor_id: 

1740 str 

1741 

1742 :param cache_id: 

1743 Name of cache for which the accessor should be cleared. By default 

1744 the named accessor is cleared from all registered caches. By 

1745 default, two caches named ``'default'`` and ``'waveform'`` are 

1746 available. 

1747 :type cache_id: 

1748 str 

1749 

1750 Calling this method clears all references to cache entries held by the 

1751 named accessor. Cache entries are then freed if not referenced by any 

1752 other accessor. 

1753 ''' 

1754 

1755 for cache_ in ( 

1756 self._content_caches.keys() 

1757 if cache_id is None 

1758 else [cache_id]): 

1759 

1760 self._content_caches[cache_].clear_accessor(accessor_id) 

1761 

1762 def get_cache_stats(self, cache_id): 

1763 return self._content_caches[cache_id].get_stats() 

1764 

1765 @filldocs 

1766 def get_stations( 

1767 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1768 model='squirrel', on_error='raise'): 

1769 

1770 ''' 

1771 Get stations matching given constraints. 

1772 

1773 %(query_args)s 

1774 

1775 :param model: 

1776 Select object model for returned values: ``'squirrel'`` to get 

1777 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1778 objects with channel information attached. 

1779 :type model: 

1780 str 

1781 

1782 :returns: 

1783 List of :py:class:`pyrocko.squirrel.Station 

1784 <pyrocko.squirrel.model.Station>` objects by default or list of 

1785 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1786 objects if ``model='pyrocko'`` is requested. 

1787 

1788 See :py:meth:`iter_nuts` for details on time span matching. 

1789 ''' 

1790 

1791 if model == 'pyrocko': 

1792 return self._get_pyrocko_stations( 

1793 obj, tmin, tmax, time, codes, on_error=on_error) 

1794 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1795 args = self._get_selection_args( 

1796 STATION, obj, tmin, tmax, time, codes) 

1797 

1798 nuts = sorted( 

1799 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1800 

1801 return [self.get_content(nut, model=model) for nut in nuts] 

1802 else: 

1803 raise ValueError('Invalid station model: %s' % model) 

1804 

1805 @filldocs 

1806 def get_channels( 

1807 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1808 model='squirrel'): 

1809 

1810 ''' 

1811 Get channels matching given constraints. 

1812 

1813 %(query_args)s 

1814 

1815 :returns: 

1816 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1817 

1818 See :py:meth:`iter_nuts` for details on time span matching. 

1819 ''' 

1820 

1821 args = self._get_selection_args( 

1822 CHANNEL, obj, tmin, tmax, time, codes) 

1823 

1824 nuts = sorted( 

1825 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1826 

1827 return [self.get_content(nut, model=model) for nut in nuts] 

1828 

1829 @filldocs 

1830 def get_sensors( 

1831 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1832 

1833 ''' 

1834 Get sensors matching given constraints. 

1835 

1836 %(query_args)s 

1837 

1838 :returns: 

1839 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1840 

1841 See :py:meth:`iter_nuts` for details on time span matching. 

1842 ''' 

1843 

1844 tmin, tmax, codes = self._get_selection_args( 

1845 CHANNEL, obj, tmin, tmax, time, codes) 

1846 

1847 if codes is not None: 

1848 codes = codes_patterns_list( 

1849 (entry.replace(channel=entry.channel[:-1] + '?') 

1850 if entry.channel != '*' else entry) 

1851 for entry in codes) 

1852 

1853 nuts = sorted( 

1854 self.iter_nuts( 

1855 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1856 

1857 return [ 

1858 sensor for sensor in model.Sensor.from_channels( 

1859 self.get_content(nut) for nut in nuts) 

1860 if match_time_span(tmin, tmax, sensor)] 

1861 

1862 @filldocs 

1863 def get_responses( 

1864 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1865 model='squirrel'): 

1866 

1867 ''' 

1868 Get instrument responses matching given constraints. 

1869 

1870 %(query_args)s 

1871 

1872 :param model: 

1873 Select data model for returned objects. Choices: ``'squirrel'``, 

1874 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1875 :type model: 

1876 str 

1877 

1878 :returns: 

1879 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1880 'squirrel'`` or list of 

1881 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` 

1882 if ``model == 'stationxml'`` or list of 

1883 (:py:class:`~pyrocko.squirrel.model.Response`, 

1884 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1885 'stationxml+'``. 

1886 

1887 See :py:meth:`iter_nuts` for details on time span matching. 

1888 ''' 

1889 

1890 args = self._get_selection_args( 

1891 RESPONSE, obj, tmin, tmax, time, codes) 

1892 

1893 nuts = sorted( 

1894 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1895 

1896 return [self.get_content(nut, model=model) for nut in nuts] 

1897 

1898 @filldocs 

1899 def get_response( 

1900 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1901 model='squirrel', on_duplicate='raise'): 

1902 

1903 ''' 

1904 Get instrument response matching given constraints. 

1905 

1906 %(query_args)s 

1907 

1908 :param model: 

1909 Select data model for returned object. Choices: ``'squirrel'``, 

1910 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1911 :type model: 

1912 str 

1913 

1914 :param on_duplicate: 

1915 Determines how duplicates/multiple matching responses are handled. 

1916 Choices: ``'raise'`` - raise 

1917 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1918 warning and return first match, ``'ignore'`` - silently return 

1919 first match. 

1920 :type on_duplicate: 

1921 str 

1922 

1923 :returns: 

1924 :py:class:`~pyrocko.squirrel.model.Response` if 

1925 ``model == 'squirrel'`` or 

1926 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model == 

1927 'stationxml'`` or 

1928 (:py:class:`~pyrocko.squirrel.model.Response`, 

1929 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1930 'stationxml+'``. 

1931 

1932 Same as :py:meth:`get_responses` but returning exactly one response. 

1933 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1934 available. Duplicates are handled according to the ``on_duplicate`` 

1935 argument. 

1936 

1937 See :py:meth:`iter_nuts` for details on time span matching. 

1938 ''' 

1939 

1940 if model == 'stationxml': 

1941 model_ = 'stationxml+' 

1942 else: 

1943 model_ = model 

1944 

1945 responses = self.get_responses( 

1946 obj, tmin, tmax, time, codes, model=model_) 

1947 if len(responses) == 0: 

1948 raise error.NotAvailable( 

1949 'No instrument response available (%s).' 

1950 % self._get_selection_args_str( 

1951 RESPONSE, obj, tmin, tmax, time, codes)) 

1952 

1953 elif len(responses) > 1: 

1954 

1955 if on_duplicate in ('raise', 'warn'): 

1956 if model_ == 'squirrel': 

1957 resps_sq = responses 

1958 elif model_ == 'stationxml+': 

1959 resps_sq = [resp[0] for resp in responses] 

1960 else: 

1961 raise ValueError('Invalid response model: %s' % model) 

1962 

1963 rinfo = ':\n' + '\n'.join( 

1964 ' ' + resp.summary for resp in resps_sq) 

1965 

1966 message = \ 

1967 'Multiple instrument responses matching given ' \ 

1968 'constraints (%s)%s%s' % ( 

1969 self._get_selection_args_str( 

1970 RESPONSE, obj, tmin, tmax, time, codes), 

1971 ' -> using first' if on_duplicate == 'warn' else '', 

1972 rinfo) 

1973 

1974 if on_duplicate == 'raise': 

1975 raise error.Duplicate(message) 

1976 

1977 elif on_duplicate == 'warn': 

1978 logger.warning(message) 

1979 

1980 elif on_duplicate == 'ignore': 

1981 pass 

1982 

1983 else: 

1984 ValueError( 

1985 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1986 

1987 if model == 'stationxml': 

1988 return responses[0][1] 

1989 else: 

1990 return responses[0] 

1991 

1992 @filldocs 

1993 def get_events( 

1994 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1995 

1996 ''' 

1997 Get events matching given constraints. 

1998 

1999 %(query_args)s 

2000 

2001 :returns: 

2002 List of :py:class:`~pyrocko.model.event.Event` objects. 

2003 

2004 See :py:meth:`iter_nuts` for details on time span matching. 

2005 ''' 

2006 

2007 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2008 nuts = sorted( 

2009 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2010 

2011 return [self.get_content(nut) for nut in nuts] 

2012 

2013 def _redeem_promises(self, *args, order_only=False): 

2014 

2015 def split_promise(order, tmax=None): 

2016 self._split_nuts( 

2017 'waveform_promise', 

2018 order.tmin, tmax if tmax is not None else order.tmax, 

2019 codes=order.codes, 

2020 path=order.source_id) 

2021 

2022 tmin, tmax = args[:2] 

2023 

2024 waveforms = list(self.iter_nuts('waveform', *args)) 

2025 promises = list(self.iter_nuts('waveform_promise', *args)) 

2026 

2027 codes_to_avail = defaultdict(list) 

2028 for nut in waveforms: 

2029 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2030 

2031 def tts(x): 

2032 if isinstance(x, tuple): 

2033 return tuple(tts(e) for e in x) 

2034 elif isinstance(x, list): 

2035 return list(tts(e) for e in x) 

2036 else: 

2037 return util.time_to_str(x) 

2038 

2039 now = time.time() 

2040 orders = [] 

2041 for promise in promises: 

2042 waveforms_avail = codes_to_avail[promise.codes] 

2043 for block_tmin, block_tmax in blocks( 

2044 max(tmin, promise.tmin), 

2045 min(tmax, promise.tmax), 

2046 promise.deltat): 

2047 

2048 if block_tmin > now: 

2049 continue 

2050 

2051 orders.append( 

2052 WaveformOrder( 

2053 source_id=promise.file_path, 

2054 codes=promise.codes, 

2055 tmin=block_tmin, 

2056 tmax=block_tmax, 

2057 deltat=promise.deltat, 

2058 gaps=gaps(waveforms_avail, block_tmin, block_tmax), 

2059 time_created=now)) 

2060 

2061 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2062 

2063 order_keys_noop = set(order_key(order) for order in orders_noop) 

2064 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2065 logger.info( 

2066 'Waveform orders already satisified with cached/local data: ' 

2067 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2068 

2069 for order in orders_noop: 

2070 split_promise(order) 

2071 

2072 if order_only: 

2073 if orders: 

2074 self._pending_orders.extend(orders) 

2075 logger.info( 

2076 'Enqueuing %i waveform order%s.' 

2077 % len_plural(orders)) 

2078 return 

2079 else: 

2080 if self._pending_orders: 

2081 orders.extend(self._pending_orders) 

2082 logger.info( 

2083 'Adding %i previously enqueued order%s.' 

2084 % len_plural(self._pending_orders)) 

2085 

2086 self._pending_orders = [] 

2087 

2088 source_ids = [] 

2089 sources = {} 

2090 for source in self._sources: 

2091 if isinstance(source, fdsn.FDSNSource): 

2092 source_ids.append(source._source_id) 

2093 sources[source._source_id] = source 

2094 

2095 source_priority = dict( 

2096 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2097 

2098 order_groups = defaultdict(list) 

2099 for order in orders: 

2100 order_groups[order_key(order)].append(order) 

2101 

2102 for k, order_group in order_groups.items(): 

2103 order_group.sort( 

2104 key=lambda order: source_priority[order.source_id]) 

2105 

2106 n_order_groups = len(order_groups) 

2107 

2108 if len(order_groups) != 0 or len(orders) != 0: 

2109 logger.info( 

2110 'Waveform orders standing for download: %i (%i)' 

2111 % (len(order_groups), len(orders))) 

2112 

2113 task = make_task('Waveform orders processed', n_order_groups) 

2114 else: 

2115 task = None 

2116 

2117 def release_order_group(order): 

2118 okey = order_key(order) 

2119 for followup in order_groups[okey]: 

2120 if followup is not order: 

2121 split_promise(followup) 

2122 

2123 del order_groups[okey] 

2124 

2125 if task: 

2126 task.update(n_order_groups - len(order_groups)) 

2127 

2128 def noop(order): 

2129 pass 

2130 

2131 def success(order, trs): 

2132 release_order_group(order) 

2133 if order.is_near_real_time(): 

2134 if not trs: 

2135 return # keep promise when no data received at real time 

2136 else: 

2137 tmax = max(tr.tmax+tr.deltat for tr in trs) 

2138 tmax = order.tmin \ 

2139 + round((tmax - order.tmin) / order.deltat) \ 

2140 * order.deltat 

2141 split_promise(order, tmax) 

2142 else: 

2143 split_promise(order) 

2144 

2145 def batch_add(paths): 

2146 self.add(paths) 

2147 

2148 calls = queue.Queue() 

2149 

2150 def enqueue(f): 

2151 def wrapper(*args): 

2152 calls.put((f, args)) 

2153 

2154 return wrapper 

2155 

2156 while order_groups: 

2157 

2158 orders_now = [] 

2159 empty = [] 

2160 for k, order_group in order_groups.items(): 

2161 try: 

2162 orders_now.append(order_group.pop(0)) 

2163 except IndexError: 

2164 empty.append(k) 

2165 

2166 for k in empty: 

2167 del order_groups[k] 

2168 

2169 by_source_id = defaultdict(list) 

2170 for order in orders_now: 

2171 by_source_id[order.source_id].append(order) 

2172 

2173 threads = [] 

2174 for source_id in by_source_id: 

2175 def download(): 

2176 try: 

2177 sources[source_id].download_waveforms( 

2178 by_source_id[source_id], 

2179 success=enqueue(success), 

2180 error_permanent=enqueue(split_promise), 

2181 error_temporary=noop, 

2182 batch_add=enqueue(batch_add)) 

2183 

2184 finally: 

2185 calls.put(None) 

2186 

2187 if len(by_source_id) > 1: 

2188 thread = threading.Thread(target=download) 

2189 thread.start() 

2190 threads.append(thread) 

2191 else: 

2192 download() 

2193 calls.put(None) 

2194 

2195 ndone = 0 

2196 while ndone < len(by_source_id): 

2197 ret = calls.get() 

2198 if ret is None: 

2199 ndone += 1 

2200 else: 

2201 ret[0](*ret[1]) 

2202 

2203 for thread in threads: 

2204 thread.join() 

2205 

2206 if task: 

2207 task.update(n_order_groups - len(order_groups)) 

2208 

2209 if task: 

2210 task.done() 

2211 

2212 @filldocs 

2213 def get_waveform_nuts( 

2214 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2215 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2216 order_only=False): 

2217 

2218 ''' 

2219 Get waveform content entities matching given constraints. 

2220 

2221 %(query_args)s 

2222 

2223 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2224 resolves matching waveform promises (downloads waveforms from remote 

2225 sources). 

2226 

2227 See :py:meth:`iter_nuts` for details on time span matching. 

2228 ''' 

2229 

2230 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2231 

2232 if self.downloads_enabled: 

2233 self._redeem_promises( 

2234 *args, 

2235 codes_exclude, 

2236 sample_rate_min, 

2237 sample_rate_max, 

2238 order_only=order_only) 

2239 

2240 nuts = sorted( 

2241 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2242 

2243 return nuts 

2244 

2245 @filldocs 

2246 def have_waveforms( 

2247 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2248 

2249 ''' 

2250 Check if any waveforms or waveform promises are available for given 

2251 constraints. 

2252 

2253 %(query_args)s 

2254 ''' 

2255 

2256 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2257 return bool(list( 

2258 self.iter_nuts('waveform', *args, limit=1))) \ 

2259 or (self.downloads_enabled and bool(list( 

2260 self.iter_nuts('waveform_promise', *args, limit=1)))) 

2261 

2262 @filldocs 

2263 def get_waveforms( 

2264 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2265 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2266 uncut=False, want_incomplete=True, degap=True, 

2267 maxgap=5, maxlap=None, snap=None, include_last=False, 

2268 load_data=True, accessor_id='default', operator_params=None, 

2269 order_only=False, channel_priorities=None): 

2270 

2271 ''' 

2272 Get waveforms matching given constraints. 

2273 

2274 %(query_args)s 

2275 

2276 :param sample_rate_min: 

2277 Consider only waveforms with a sampling rate equal to or greater 

2278 than the given value [Hz]. 

2279 :type sample_rate_min: 

2280 float 

2281 

2282 :param sample_rate_max: 

2283 Consider only waveforms with a sampling rate equal to or less than 

2284 the given value [Hz]. 

2285 :type sample_rate_max: 

2286 float 

2287 

2288 :param uncut: 

2289 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2290 and to disable degapping/deoverlapping. Returns untouched traces as 

2291 they are read from file segment. File segments are always read in 

2292 their entirety. 

2293 :type uncut: 

2294 bool 

2295 

2296 :param want_incomplete: 

2297 If ``True``, gappy/incomplete traces are included in the result. 

2298 :type want_incomplete: 

2299 bool 

2300 

2301 :param degap: 

2302 If ``True``, connect traces and remove gaps and overlaps. 

2303 :type degap: 

2304 bool 

2305 

2306 :param maxgap: 

2307 Maximum gap size in samples which is filled with interpolated 

2308 samples when ``degap`` is ``True``. 

2309 :type maxgap: 

2310 int 

2311 

2312 :param maxlap: 

2313 Maximum overlap size in samples which is removed when ``degap`` is 

2314 ``True``. 

2315 :type maxlap: 

2316 int 

2317 

2318 :param snap: 

2319 Rounding functions used when computing sample index from time 

2320 instance, for trace start and trace end, respectively. By default, 

2321 ``(round, round)`` is used. 

2322 :type snap: 

2323 :py:class:`tuple` of 2 callables 

2324 

2325 :param include_last: 

2326 If ``True``, add one more sample to the returned traces (the sample 

2327 which would be the first sample of a query with ``tmin`` set to the 

2328 current value of ``tmax``). 

2329 :type include_last: 

2330 bool 

2331 

2332 :param load_data: 

2333 If ``True``, waveform data samples are read from files (or cache). 

2334 If ``False``, meta-information-only traces are returned (dummy 

2335 traces with no data samples). 

2336 :type load_data: 

2337 bool 

2338 

2339 :param accessor_id: 

2340 Name of consumer on who's behalf data is accessed. Used in cache 

2341 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2342 to distinguish different points of extraction for the decision of 

2343 when to release cached waveform data. Should be used when data is 

2344 alternately extracted from more than one region / selection. 

2345 :type accessor_id: 

2346 str 

2347 

2348 :param channel_priorities: 

2349 List of band/instrument code combinations to try. For example, 

2350 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2351 then fallback to ``BH?`` if these are not available. The first 

2352 matching waveforms are returned. Use in combination with 

2353 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2354 rate. 

2355 :type channel_priorities: 

2356 :py:class:`list` of :py:class:`str` 

2357 

2358 See :py:meth:`iter_nuts` for details on time span matching. 

2359 

2360 Loaded data is kept in memory (at least) until 

2361 :py:meth:`clear_accessor` has been called or 

2362 :py:meth:`advance_accessor` has been called two consecutive times 

2363 without data being accessed between the two calls (by this accessor). 

2364 Data may still be further kept in the memory cache if held alive by 

2365 consumers with a different ``accessor_id``. 

2366 ''' 

2367 

2368 tmin, tmax, codes = self._get_selection_args( 

2369 WAVEFORM, obj, tmin, tmax, time, codes) 

2370 

2371 if channel_priorities is not None: 

2372 return self._get_waveforms_prioritized( 

2373 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2374 sample_rate_min=sample_rate_min, 

2375 sample_rate_max=sample_rate_max, 

2376 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2377 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2378 include_last=include_last, load_data=load_data, 

2379 accessor_id=accessor_id, operator_params=operator_params, 

2380 order_only=order_only, channel_priorities=channel_priorities) 

2381 

2382 kinds = ['waveform'] 

2383 if self.downloads_enabled: 

2384 kinds.append('waveform_promise') 

2385 

2386 self_tmin, self_tmax = self.get_time_span(kinds) 

2387 

2388 if None in (self_tmin, self_tmax): 

2389 logger.warning( 

2390 'No waveforms available.') 

2391 return [] 

2392 

2393 tmin = tmin if tmin is not None else self_tmin 

2394 tmax = tmax if tmax is not None else self_tmax 

2395 

2396 if codes is not None and len(codes) == 1: 

2397 # TODO: fix for multiple / mixed codes 

2398 operator = self.get_operator(codes[0]) 

2399 if operator is not None: 

2400 return operator.get_waveforms( 

2401 self, codes[0], 

2402 tmin=tmin, tmax=tmax, 

2403 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2404 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2405 include_last=include_last, load_data=load_data, 

2406 accessor_id=accessor_id, params=operator_params) 

2407 

2408 nuts = self.get_waveform_nuts( 

2409 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2410 sample_rate_max, order_only=order_only) 

2411 

2412 if order_only: 

2413 return [] 

2414 

2415 if load_data: 

2416 traces = [ 

2417 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2418 

2419 else: 

2420 traces = [ 

2421 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2422 

2423 if uncut: 

2424 return traces 

2425 

2426 if snap is None: 

2427 snap = (round, round) 

2428 

2429 chopped = [] 

2430 for tr in traces: 

2431 if not load_data and tr.ydata is not None: 

2432 tr = tr.copy(data=False) 

2433 tr.ydata = None 

2434 

2435 try: 

2436 chopped.append(tr.chop( 

2437 tmin, tmax, 

2438 inplace=False, 

2439 snap=snap, 

2440 include_last=include_last)) 

2441 

2442 except trace.NoData: 

2443 pass 

2444 

2445 processed = self._process_chopped( 

2446 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2447 

2448 return processed 

2449 

2450 def _get_waveforms_prioritized( 

2451 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2452 channel_priorities=None, **kwargs): 

2453 

2454 trs_all = [] 

2455 codes_have = set() 

2456 for channel in channel_priorities: 

2457 assert len(channel) == 2 

2458 if codes is not None: 

2459 codes_now = [ 

2460 codes_.replace(channel=channel+'?') for codes_ in codes] 

2461 else: 

2462 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2463 

2464 codes_exclude_now = list(set( 

2465 codes_.replace(channel=channel+codes_.channel[-1]) 

2466 for codes_ in codes_have)) 

2467 

2468 if codes_exclude: 

2469 codes_exclude_now.extend(codes_exclude) 

2470 

2471 trs = self.get_waveforms( 

2472 tmin=tmin, 

2473 tmax=tmax, 

2474 codes=codes_now, 

2475 codes_exclude=codes_exclude_now, 

2476 **kwargs) 

2477 

2478 codes_have.update(set(tr.codes for tr in trs)) 

2479 trs_all.extend(trs) 

2480 

2481 return trs_all 

2482 

2483 @filldocs 

2484 def chopper_waveforms( 

2485 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2486 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2487 tinc=None, tpad=0., 

2488 want_incomplete=True, snap_window=False, 

2489 degap=True, maxgap=5, maxlap=None, 

2490 snap=None, include_last=False, load_data=True, 

2491 accessor_id=None, clear_accessor=True, operator_params=None, 

2492 grouping=None, channel_priorities=None): 

2493 

2494 ''' 

2495 Iterate window-wise over waveform archive. 

2496 

2497 %(query_args)s 

2498 

2499 :param tinc: 

2500 Time increment (window shift time) (default uses ``tmax-tmin``). 

2501 :type tinc: 

2502 :py:func:`~pyrocko.util.get_time_float` 

2503 

2504 :param tpad: 

2505 Padding time appended on either side of the data window (window 

2506 overlap is ``2*tpad``). 

2507 :type tpad: 

2508 :py:func:`~pyrocko.util.get_time_float` 

2509 

2510 :param want_incomplete: 

2511 If ``True``, gappy/incomplete traces are included in the result. 

2512 :type want_incomplete: 

2513 bool 

2514 

2515 :param snap_window: 

2516 If ``True``, start time windows at multiples of tinc with respect 

2517 to system time zero. 

2518 :type snap_window: 

2519 bool 

2520 

2521 :param degap: 

2522 If ``True``, connect traces and remove gaps and overlaps. 

2523 :type degap: 

2524 bool 

2525 

2526 :param maxgap: 

2527 Maximum gap size in samples which is filled with interpolated 

2528 samples when ``degap`` is ``True``. 

2529 :type maxgap: 

2530 int 

2531 

2532 :param maxlap: 

2533 Maximum overlap size in samples which is removed when ``degap`` is 

2534 ``True``. 

2535 :type maxlap: 

2536 int 

2537 

2538 :param snap: 

2539 Rounding functions used when computing sample index from time 

2540 instance, for trace start and trace end, respectively. By default, 

2541 ``(round, round)`` is used. 

2542 :type snap: 

2543 :py:class:`tuple` of 2 callables 

2544 

2545 :param include_last: 

2546 If ``True``, add one more sample to the returned traces (the sample 

2547 which would be the first sample of a query with ``tmin`` set to the 

2548 current value of ``tmax``). 

2549 :type include_last: 

2550 bool 

2551 

2552 :param load_data: 

2553 If ``True``, waveform data samples are read from files (or cache). 

2554 If ``False``, meta-information-only traces are returned (dummy 

2555 traces with no data samples). 

2556 :type load_data: 

2557 bool 

2558 

2559 :param accessor_id: 

2560 Name of consumer on who's behalf data is accessed. Used in cache 

2561 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2562 to distinguish different points of extraction for the decision of 

2563 when to release cached waveform data. Should be used when data is 

2564 alternately extracted from more than one region / selection. 

2565 :type accessor_id: 

2566 str 

2567 

2568 :param clear_accessor: 

2569 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2570 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2571 memory when the generator returns. 

2572 :type clear_accessor: 

2573 bool 

2574 

2575 :param grouping: 

2576 By default, traversal over the data is over time and all matching 

2577 traces of a time window are yielded. Using this option, it is 

2578 possible to traverse the data first by group (e.g. station or 

2579 network) and second by time. This can reduce the number of traces 

2580 in each batch and thus reduce the memory footprint of the process. 

2581 :type grouping: 

2582 :py:class:`~pyrocko.squirrel.operators.base.Grouping` 

2583 

2584 :yields: 

2585 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2586 extracted time window. 

2587 

2588 See :py:meth:`iter_nuts` for details on time span matching. 

2589 ''' 

2590 

2591 tmin, tmax, codes = self._get_selection_args( 

2592 WAVEFORM, obj, tmin, tmax, time, codes) 

2593 

2594 kinds = ['waveform'] 

2595 if self.downloads_enabled: 

2596 kinds.append('waveform_promise') 

2597 

2598 self_tmin, self_tmax = self.get_time_span(kinds) 

2599 

2600 if None in (self_tmin, self_tmax): 

2601 logger.warning( 

2602 'Content has undefined time span. No waveforms and no ' 

2603 'waveform promises?') 

2604 return 

2605 

2606 if snap_window and tinc is not None: 

2607 tmin = tmin if tmin is not None else self_tmin 

2608 tmax = tmax if tmax is not None else self_tmax 

2609 tmin = math.floor(tmin / tinc) * tinc 

2610 tmax = math.ceil(tmax / tinc) * tinc 

2611 else: 

2612 tmin = tmin if tmin is not None else self_tmin + tpad 

2613 tmax = tmax if tmax is not None else self_tmax - tpad 

2614 

2615 tinc = tinc if tinc is not None else tmax - tmin 

2616 

2617 try: 

2618 if accessor_id is None: 

2619 accessor_id = 'chopper%i' % self._n_choppers_active 

2620 

2621 self._n_choppers_active += 1 

2622 

2623 eps = tinc * 1e-6 

2624 if tinc != 0.0: 

2625 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2626 else: 

2627 nwin = 1 

2628 

2629 if grouping is None: 

2630 codes_list = [codes] 

2631 else: 

2632 operator = Operator( 

2633 filtering=CodesPatternFiltering(codes=codes), 

2634 grouping=grouping) 

2635 

2636 available = set(self.get_codes(kind='waveform')) 

2637 if self.downloads_enabled: 

2638 available.update(self.get_codes(kind='waveform_promise')) 

2639 operator.update_mappings(sorted(available)) 

2640 

2641 codes_list = [ 

2642 codes_patterns_list(scl) 

2643 for scl in operator.iter_in_codes()] 

2644 

2645 ngroups = len(codes_list) 

2646 for igroup, scl in enumerate(codes_list): 

2647 for iwin in range(nwin): 

2648 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2649 

2650 chopped = self.get_waveforms( 

2651 tmin=wmin-tpad, 

2652 tmax=wmax+tpad, 

2653 codes=scl, 

2654 codes_exclude=codes_exclude, 

2655 sample_rate_min=sample_rate_min, 

2656 sample_rate_max=sample_rate_max, 

2657 snap=snap, 

2658 include_last=include_last, 

2659 load_data=load_data, 

2660 want_incomplete=want_incomplete, 

2661 degap=degap, 

2662 maxgap=maxgap, 

2663 maxlap=maxlap, 

2664 accessor_id=accessor_id, 

2665 operator_params=operator_params, 

2666 channel_priorities=channel_priorities) 

2667 

2668 self.advance_accessor(accessor_id) 

2669 

2670 yield Batch( 

2671 tmin=wmin, 

2672 tmax=wmax, 

2673 i=iwin, 

2674 n=nwin, 

2675 igroup=igroup, 

2676 ngroups=ngroups, 

2677 traces=chopped) 

2678 

2679 finally: 

2680 self._n_choppers_active -= 1 

2681 if clear_accessor: 

2682 self.clear_accessor(accessor_id, 'waveform') 

2683 

2684 def _process_chopped( 

2685 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2686 

2687 chopped.sort(key=lambda a: a.full_id) 

2688 if degap: 

2689 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2690 

2691 if not want_incomplete: 

2692 chopped_weeded = [] 

2693 for tr in chopped: 

2694 emin = tr.tmin - tmin 

2695 emax = tr.tmax + tr.deltat - tmax 

2696 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2697 chopped_weeded.append(tr) 

2698 

2699 elif degap: 

2700 if (0. < emin <= 5. * tr.deltat 

2701 and -5. * tr.deltat <= emax < 0.): 

2702 

2703 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2704 chopped_weeded.append(tr) 

2705 

2706 chopped = chopped_weeded 

2707 

2708 return chopped 

2709 

2710 def _get_pyrocko_stations( 

2711 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2712 on_error='raise'): 

2713 

2714 from pyrocko import model as pmodel 

2715 

2716 if codes is not None: 

2717 codes = codes_patterns_for_kind(STATION, codes) 

2718 

2719 by_nsl = defaultdict(lambda: (list(), list())) 

2720 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2721 sargs = station._get_pyrocko_station_args() 

2722 by_nsl[station.codes.nsl][0].append(sargs) 

2723 

2724 if codes is not None: 

2725 codes = [model.CodesNSLCE(c) for c in codes] 

2726 

2727 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2728 sargs = channel._get_pyrocko_station_args() 

2729 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2730 sargs_list.append(sargs) 

2731 channels_list.append(channel) 

2732 

2733 pstations = [] 

2734 nsls = list(by_nsl.keys()) 

2735 nsls.sort() 

2736 for nsl in nsls: 

2737 sargs_list, channels_list = by_nsl[nsl] 

2738 sargs = util.consistency_merge( 

2739 [('',) + x for x in sargs_list], 

2740 error=on_error) 

2741 

2742 by_c = defaultdict(list) 

2743 for ch in channels_list: 

2744 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2745 

2746 chas = list(by_c.keys()) 

2747 chas.sort() 

2748 pchannels = [] 

2749 for cha in chas: 

2750 list_of_cargs = by_c[cha] 

2751 cargs = util.consistency_merge( 

2752 [('',) + x for x in list_of_cargs], 

2753 error=on_error) 

2754 pchannels.append(pmodel.Channel(*cargs)) 

2755 

2756 pstations.append( 

2757 pmodel.Station(*sargs, channels=pchannels)) 

2758 

2759 return pstations 

2760 

2761 @property 

2762 def pile(self): 

2763 

2764 ''' 

2765 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2766 

2767 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2768 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2769 but uses the fluffy power of the Squirrel under the hood. 

2770 

2771 This interface can be used as a drop-in replacement for piles which are 

2772 used in existing scripts and programs for efficient waveform data 

2773 access. The Squirrel-based pile scales better for large datasets. Newer 

2774 scripts should use Squirrel's native methods to avoid the emulation 

2775 overhead. 

2776 ''' 

2777 from . import pile 

2778 

2779 if self._pile is None: 

2780 self._pile = pile.Pile(self) 

2781 

2782 return self._pile 

2783 

2784 def snuffle(self, **kwargs): 

2785 ''' 

2786 Look at dataset in Snuffler. 

2787 ''' 

2788 self.pile.snuffle(**kwargs) 

2789 

2790 def _gather_codes_keys(self, kind, gather, selector): 

2791 return set( 

2792 gather(codes) 

2793 for codes in self.iter_codes(kind) 

2794 if selector is None or selector(codes)) 

2795 

2796 def __str__(self): 

2797 return str(self.get_stats()) 

2798 

2799 def get_coverage( 

2800 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2801 

2802 ''' 

2803 Get coverage information. 

2804 

2805 Get information about strips of gapless data coverage. 

2806 

2807 :param kind: 

2808 Content kind to be queried. 

2809 :type kind: 

2810 str 

2811 

2812 :param tmin: 

2813 Start time of query interval. 

2814 :type tmin: 

2815 :py:func:`~pyrocko.util.get_time_float` 

2816 

2817 :param tmax: 

2818 End time of query interval. 

2819 :type tmax: 

2820 :py:func:`~pyrocko.util.get_time_float` 

2821 

2822 :param codes: 

2823 If given, restrict query to given content codes patterns. 

2824 :type codes: 

2825 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2826 objects appropriate for the queried content type, or anything which 

2827 can be converted to such objects. 

2828 

2829 :param limit: 

2830 Limit query to return only up to a given maximum number of entries 

2831 per matching time series (without setting this option, very gappy 

2832 data could cause the query to execute for a very long time). 

2833 :type limit: 

2834 int 

2835 

2836 :returns: 

2837 Information about time spans covered by the requested time series 

2838 data. 

2839 :rtype: 

2840 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage` 

2841 ''' 

2842 

2843 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2844 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2845 kind_id = to_kind_id(kind) 

2846 

2847 codes_info = list(self._iter_codes_info(kind=kind)) 

2848 

2849 kdata_all = [] 

2850 if codes is None: 

2851 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2852 kdata_all.append( 

2853 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2854 

2855 else: 

2856 for codes_entry in codes: 

2857 pattern = to_codes(kind_id, codes_entry) 

2858 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2859 if model.match_codes(pattern, codes_entry): 

2860 kdata_all.append( 

2861 (pattern, kind_codes_id, codes_entry, deltat)) 

2862 

2863 kind_codes_ids = [x[1] for x in kdata_all] 

2864 

2865 counts_at_tmin = {} 

2866 if tmin is not None: 

2867 for nut in self.iter_nuts( 

2868 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2869 

2870 k = nut.codes, nut.deltat 

2871 if k not in counts_at_tmin: 

2872 counts_at_tmin[k] = 0 

2873 

2874 counts_at_tmin[k] += 1 

2875 

2876 coverages = [] 

2877 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2878 entry = [pattern, codes_entry, deltat, None, None, []] 

2879 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2880 sql = self._sql(''' 

2881 SELECT 

2882 time_seconds, 

2883 time_offset 

2884 FROM %(db)s.%(coverage)s 

2885 WHERE 

2886 kind_codes_id == ? 

2887 ORDER BY 

2888 kind_codes_id ''' + order + ''', 

2889 time_seconds ''' + order + ''', 

2890 time_offset ''' + order + ''' 

2891 LIMIT 1 

2892 ''') 

2893 

2894 for row in self._conn.execute(sql, [kind_codes_id]): 

2895 entry[3+i] = model.tjoin(row[0], row[1]) 

2896 

2897 if None in entry[3:5]: 

2898 continue 

2899 

2900 args = [kind_codes_id] 

2901 

2902 sql_time = '' 

2903 if tmin is not None: 

2904 # intentionally < because (== tmin) is queried from nuts 

2905 sql_time += ' AND ( ? < time_seconds ' \ 

2906 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2907 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2908 

2909 if tmax is not None: 

2910 sql_time += ' AND ( time_seconds < ? ' \ 

2911 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2912 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2913 

2914 sql_limit = '' 

2915 if limit is not None: 

2916 sql_limit = ' LIMIT ?' 

2917 args.append(limit) 

2918 

2919 sql = self._sql(''' 

2920 SELECT 

2921 time_seconds, 

2922 time_offset, 

2923 step 

2924 FROM %(db)s.%(coverage)s 

2925 WHERE 

2926 kind_codes_id == ? 

2927 ''' + sql_time + ''' 

2928 ORDER BY 

2929 kind_codes_id, 

2930 time_seconds, 

2931 time_offset 

2932 ''' + sql_limit) 

2933 

2934 rows = list(self._conn.execute(sql, args)) 

2935 

2936 if limit is not None and len(rows) == limit: 

2937 entry[-1] = None 

2938 else: 

2939 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2940 tlast = None 

2941 if tmin is not None: 

2942 entry[-1].append((tmin, counts)) 

2943 tlast = tmin 

2944 

2945 for row in rows: 

2946 t = model.tjoin(row[0], row[1]) 

2947 counts += row[2] 

2948 entry[-1].append((t, counts)) 

2949 tlast = t 

2950 

2951 if tmax is not None and (tlast is None or tlast != tmax): 

2952 entry[-1].append((tmax, counts)) 

2953 

2954 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2955 

2956 return coverages 

2957 

2958 def get_stationxml( 

2959 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2960 level='response', on_error='raise'): 

2961 

2962 ''' 

2963 Get station/channel/response metadata in StationXML representation. 

2964 

2965 %(query_args)s 

2966 

2967 :returns: 

2968 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2969 ''' 

2970 

2971 if level not in ('network', 'station', 'channel', 'response'): 

2972 raise ValueError('Invalid level: %s' % level) 

2973 

2974 tmin, tmax, codes = self._get_selection_args( 

2975 CHANNEL, obj, tmin, tmax, time, codes) 

2976 

2977 def tts(t): 

2978 if t is None: 

2979 return '<none>' 

2980 else: 

2981 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2982 

2983 if on_error == 'ignore': 

2984 def handle_error(exc): 

2985 pass 

2986 

2987 elif on_error == 'warn': 

2988 def handle_error(exc): 

2989 logger.warning(str(exc)) 

2990 

2991 elif on_error == 'raise': 

2992 def handle_error(exc): 

2993 raise exc 

2994 

2995 def use_first(node_type_name, codes, k, group): 

2996 if on_error == 'warn': 

2997 logger.warning( 

2998 'Duplicates for %s %s, %s - %s -> using first' % ( 

2999 node_type_name, 

3000 '.'.join(codes), 

3001 tts(k[0]), tts(k[1]))) 

3002 

3003 return group[0] 

3004 

3005 def deduplicate(node_type_name, codes, nodes): 

3006 groups = defaultdict(list) 

3007 for node in nodes: 

3008 k = (node.start_date, node.end_date) 

3009 groups[k].append(node) 

3010 

3011 return [ 

3012 use_first(node_type_name, codes, k, group) 

3013 for (k, group) in groups.items()] 

3014 

3015 filtering = CodesPatternFiltering(codes=codes) 

3016 

3017 nslcs = list(set( 

3018 codes.nslc for codes in 

3019 filtering.filter(self.get_codes(kind='channel')))) 

3020 

3021 from pyrocko.io import stationxml as sx 

3022 

3023 networks = [] 

3024 for net, stas in prefix_tree(nslcs): 

3025 network = sx.Network(code=net) 

3026 networks.append(network) 

3027 

3028 if level not in ('station', 'channel', 'response'): 

3029 continue 

3030 

3031 for sta, locs in stas: 

3032 stations = self.get_stations( 

3033 tmin=tmin, 

3034 tmax=tmax, 

3035 codes=(net, sta, '*'), 

3036 model='stationxml') 

3037 

3038 if on_error != 'raise': 

3039 stations = deduplicate( 

3040 'Station', (net, sta), stations) 

3041 

3042 errors = sx.check_overlaps( 

3043 'Station', (net, sta), stations) 

3044 

3045 if errors: 

3046 handle_error(error.Duplicate( 

3047 'Overlapping/duplicate station info:\n %s' 

3048 % '\n '.join(errors))) 

3049 

3050 network.station_list.extend(stations) 

3051 

3052 if level not in ('channel', 'response'): 

3053 continue 

3054 

3055 for loc, chas in locs: 

3056 for cha, _ in chas: 

3057 channels = self.get_channels( 

3058 tmin=tmin, 

3059 tmax=tmax, 

3060 codes=(net, sta, loc, cha), 

3061 model='stationxml') 

3062 

3063 if on_error != 'raise': 

3064 channels = deduplicate( 

3065 'Channel', (net, sta, loc, cha), channels) 

3066 

3067 errors = sx.check_overlaps( 

3068 'Channel', (net, sta, loc, cha), channels) 

3069 

3070 if errors: 

3071 handle_error(error.Duplicate( 

3072 'Overlapping/duplicate channel info:\n %s' 

3073 % '\n '.join(errors))) 

3074 

3075 for channel in channels: 

3076 station = sx.find_containing(stations, channel) 

3077 if station is not None: 

3078 station.channel_list.append(channel) 

3079 else: 

3080 handle_error(error.NotAvailable( 

3081 'No station or station epoch found ' 

3082 'for channel: %s' % '.'.join( 

3083 (net, sta, loc, cha)))) 

3084 

3085 continue 

3086 

3087 if level != 'response': 

3088 continue 

3089 

3090 try: 

3091 response_sq, response_sx = self.get_response( 

3092 codes=(net, sta, loc, cha), 

3093 tmin=channel.start_date, 

3094 tmax=channel.end_date, 

3095 model='stationxml+', 

3096 on_duplicate=on_error) 

3097 

3098 except error.NotAvailable as e: 

3099 handle_error(e) 

3100 continue 

3101 

3102 if not ( 

3103 sx.eq_open( 

3104 channel.start_date, response_sq.tmin) 

3105 and sx.eq_open( 

3106 channel.end_date, response_sq.tmax)): 

3107 

3108 handle_error(error.Inconsistencies( 

3109 'Response time span does not match ' 

3110 'channel time span: %s' % '.'.join( 

3111 (net, sta, loc, cha)))) 

3112 

3113 channel.response = response_sx 

3114 

3115 return sx.FDSNStationXML( 

3116 source='Generated by Pyrocko Squirrel.', 

3117 network_list=networks) 

3118 

3119 def add_operator(self, op): 

3120 self._operators.append(op) 

3121 

3122 def update_operator_mappings(self): 

3123 available = self.get_codes(kind=('channel')) 

3124 

3125 for operator in self._operators: 

3126 operator.update_mappings(available, self._operator_registry) 

3127 

3128 def iter_operator_mappings(self): 

3129 for operator in self._operators: 

3130 for in_codes, out_codes in operator.iter_mappings(): 

3131 yield operator, in_codes, out_codes 

3132 

3133 def get_operator_mappings(self): 

3134 return list(self.iter_operator_mappings()) 

3135 

3136 def get_operator(self, codes): 

3137 try: 

3138 return self._operator_registry[codes][0] 

3139 except KeyError: 

3140 return None 

3141 

3142 def get_operator_group(self, codes): 

3143 try: 

3144 return self._operator_registry[codes] 

3145 except KeyError: 

3146 return None, (None, None, None) 

3147 

3148 def iter_operator_codes(self): 

3149 for _, _, out_codes in self.iter_operator_mappings(): 

3150 for codes in out_codes: 

3151 yield codes 

3152 

3153 def get_operator_codes(self): 

3154 return list(self.iter_operator_codes()) 

3155 

3156 def print_tables(self, table_names=None, stream=None): 

3157 ''' 

3158 Dump raw database tables in textual form (for debugging purposes). 

3159 

3160 :param table_names: 

3161 Names of tables to be dumped or ``None`` to dump all. 

3162 :type table_names: 

3163 :py:class:`list` of :py:class:`str` 

3164 

3165 :param stream: 

3166 Open file or ``None`` to dump to standard output. 

3167 ''' 

3168 

3169 if stream is None: 

3170 stream = sys.stdout 

3171 

3172 if isinstance(table_names, str): 

3173 table_names = [table_names] 

3174 

3175 if table_names is None: 

3176 table_names = [ 

3177 'selection_file_states', 

3178 'selection_nuts', 

3179 'selection_kind_codes_count', 

3180 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3181 

3182 m = { 

3183 'selection_file_states': '%(db)s.%(file_states)s', 

3184 'selection_nuts': '%(db)s.%(nuts)s', 

3185 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3186 'files': 'files', 

3187 'nuts': 'nuts', 

3188 'kind_codes': 'kind_codes', 

3189 'kind_codes_count': 'kind_codes_count'} 

3190 

3191 for table_name in table_names: 

3192 self._database.print_table( 

3193 m[table_name] % self._names, stream=stream) 

3194 

3195 

3196class SquirrelStats(Object): 

3197 ''' 

3198 Container to hold statistics about contents available from a Squirrel. 

3199 

3200 See also :py:meth:`Squirrel.get_stats`. 

3201 ''' 

3202 

3203 nfiles = Int.T( 

3204 help='Number of files in selection.') 

3205 nnuts = Int.T( 

3206 help='Number of index nuts in selection.') 

3207 codes = List.T( 

3208 Tuple.T(content_t=String.T()), 

3209 help='Available code sequences in selection, e.g. ' 

3210 '(agency, network, station, location) for stations nuts.') 

3211 kinds = List.T( 

3212 String.T(), 

3213 help='Available content types in selection.') 

3214 total_size = Int.T( 

3215 help='Aggregated file size of files is selection.') 

3216 counts = Dict.T( 

3217 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3218 help='Breakdown of how many nuts of any content type and code ' 

3219 'sequence are available in selection, ``counts[kind][codes]``.') 

3220 time_spans = Dict.T( 

3221 String.T(), Tuple.T(content_t=Timestamp.T()), 

3222 help='Time spans by content type.') 

3223 sources = List.T( 

3224 String.T(), 

3225 help='Descriptions of attached sources.') 

3226 operators = List.T( 

3227 String.T(), 

3228 help='Descriptions of attached operators.') 

3229 

3230 def __str__(self): 

3231 kind_counts = dict( 

3232 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3233 

3234 scodes = model.codes_to_str_abbreviated(self.codes) 

3235 

3236 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3237 ' ' + s for s in self.sources) 

3238 

3239 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3240 ' ' + s for s in self.operators) 

3241 

3242 def stime(t): 

3243 return util.tts(t) if t is not None and t not in ( 

3244 model.g_tmin, model.g_tmax) else '<none>' 

3245 

3246 def stable(rows): 

3247 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3248 return '\n'.join( 

3249 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3250 for row in rows) 

3251 

3252 def indent(s): 

3253 return '\n'.join(' '+line for line in s.splitlines()) 

3254 

3255 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3256 kind + ':', 

3257 str(kind_counts[kind]), 

3258 stime(self.time_spans[kind][0]), 

3259 '-', 

3260 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3261 

3262 s = ''' 

3263Number of files: %i 

3264Total size of known files: %s 

3265Number of index nuts: %i 

3266Available content kinds: %s 

3267Available codes: %s 

3268Sources: %s 

3269Operators: %s''' % ( 

3270 self.nfiles, 

3271 util.human_bytesize(self.total_size), 

3272 self.nnuts, 

3273 stspans, scodes, ssources, soperators) 

3274 

3275 return s.lstrip() 

3276 

3277 

3278__all__ = [ 

3279 'Squirrel', 

3280 'SquirrelStats', 

3281]