1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

25 codes_patterns_for_kind 

26from .client import fdsn, catalog 

27from .selection import Selection, filldocs 

28from .database import abspath 

29from .operators.base import Operator, CodesPatternFilter 

30from . import client, environment, error 

31 

32logger = logging.getLogger('psq.base') 

33 

34guts_prefix = 'squirrel' 

35 

36 

37def make_task(*args): 

38 return progress.task(*args, logger=logger) 

39 

40 

41def lpick(condition, seq): 

42 ft = [], [] 

43 for ele in seq: 

44 ft[int(bool(condition(ele)))].append(ele) 

45 

46 return ft 

47 

48 

49def blocks(tmin, tmax, deltat, nsamples_block=100000): 

50 tblock = util.to_time_float(deltat * nsamples_block) 

51 iblock_min = int(math.floor(tmin / tblock)) 

52 iblock_max = int(math.ceil(tmax / tblock)) 

53 for iblock in range(iblock_min, iblock_max): 

54 yield iblock * tblock, (iblock+1) * tblock 

55 

56 

57def gaps(avail, tmin, tmax): 

58 assert tmin < tmax 

59 

60 data = [(tmax, 1), (tmin, -1)] 

61 for (tmin_a, tmax_a) in avail: 

62 assert tmin_a < tmax_a 

63 data.append((tmin_a, 1)) 

64 data.append((tmax_a, -1)) 

65 

66 data.sort() 

67 s = 1 

68 gaps = [] 

69 tmin_g = None 

70 for t, x in data: 

71 if s == 1 and x == -1: 

72 tmin_g = t 

73 elif s == 0 and x == 1 and tmin_g is not None: 

74 tmax_g = t 

75 if tmin_g != tmax_g: 

76 gaps.append((tmin_g, tmax_g)) 

77 

78 s += x 

79 

80 return gaps 

81 

82 

83def order_key(order): 

84 return (order.codes, order.tmin, order.tmax) 

85 

86 

87def _is_exact(pat): 

88 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

89 

90 

91def prefix_tree(tups): 

92 if not tups: 

93 return [] 

94 

95 if len(tups[0]) == 1: 

96 return sorted((tup[0], []) for tup in tups) 

97 

98 d = defaultdict(list) 

99 for tup in tups: 

100 d[tup[0]].append(tup[1:]) 

101 

102 sub = [] 

103 for k in sorted(d.keys()): 

104 sub.append((k, prefix_tree(d[k]))) 

105 

106 return sub 

107 

108 

109class Batch(object): 

110 ''' 

111 Batch of waveforms from window-wise data extraction. 

112 

113 Encapsulates state and results yielded for each window in window-wise 

114 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

115 

116 *Attributes:* 

117 

118 .. py:attribute:: tmin 

119 

120 Start of this time window. 

121 

122 .. py:attribute:: tmax 

123 

124 End of this time window. 

125 

126 .. py:attribute:: i 

127 

128 Index of this time window in sequence. 

129 

130 .. py:attribute:: n 

131 

132 Total number of time windows in sequence. 

133 

134 .. py:attribute:: igroup 

135 

136 Index of this time window's sequence group. 

137 

138 .. py:attribute:: ngroups 

139 

140 Total number of sequence groups. 

141 

142 .. py:attribute:: traces 

143 

144 Extracted waveforms for this time window. 

145 ''' 

146 

147 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

148 self.tmin = tmin 

149 self.tmax = tmax 

150 self.i = i 

151 self.n = n 

152 self.igroup = igroup 

153 self.ngroups = ngroups 

154 self.traces = traces 

155 

156 

157class Squirrel(Selection): 

158 ''' 

159 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

160 

161 :param env: 

162 Squirrel environment instance or directory path to use as starting 

163 point for its detection. By default, the current directory is used as 

164 starting point. When searching for a usable environment the directory 

165 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

166 directory is used if it exists, otherwise the parent directories are 

167 search upwards for the existence of such a directory. If no such 

168 directory is found, the user's global Squirrel environment 

169 ``'$HOME/.pyrocko/squirrel'`` is used. 

170 :type env: 

171 :py:class:`~pyrocko.squirrel.environment.Environment` or 

172 :py:class:`str` 

173 

174 :param database: 

175 Database instance or path to database. By default the 

176 database found in the detected Squirrel environment is used. 

177 :type database: 

178 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

179 

180 :param cache_path: 

181 Directory path to use for data caching. By default, the ``'cache'`` 

182 directory in the detected Squirrel environment is used. 

183 :type cache_path: 

184 :py:class:`str` 

185 

186 :param persistent: 

187 If given a name, create a persistent selection. 

188 :type persistent: 

189 :py:class:`str` 

190 

191 This is the central class of the Squirrel framework. It provides a unified 

192 interface to query and access seismic waveforms, station meta-data and 

193 event information from local file collections and remote data sources. For 

194 prompt responses, a profound database setup is used under the hood. To 

195 speed up assemblage of ad-hoc data selections, files are indexed on first 

196 use and the extracted meta-data is remembered in the database for 

197 subsequent accesses. Bulk data is lazily loaded from disk and remote 

198 sources, just when requested. Once loaded, data is cached in memory to 

199 expedite typical access patterns. Files and data sources can be dynamically 

200 added to and removed from the Squirrel selection at runtime. 

201 

202 Queries are restricted to the contents of the files currently added to the 

203 Squirrel selection (usually a subset of the file meta-information 

204 collection in the database). This list of files is referred to here as the 

205 "selection". By default, temporary tables are created in the attached 

206 database to hold the names of the files in the selection as well as various 

207 indices and counters. These tables are only visible inside the application 

208 which created them and are deleted when the database connection is closed 

209 or the application exits. To create a selection which is not deleted at 

210 exit, supply a name to the ``persistent`` argument of the Squirrel 

211 constructor. Persistent selections are shared among applications using the 

212 same database. 

213 

214 **Method summary** 

215 

216 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

217 :py:class:`~pyrocko.squirrel.selection.Selection`. 

218 

219 .. autosummary:: 

220 

221 ~Squirrel.add 

222 ~Squirrel.add_source 

223 ~Squirrel.add_fdsn 

224 ~Squirrel.add_catalog 

225 ~Squirrel.add_dataset 

226 ~Squirrel.add_virtual 

227 ~Squirrel.update 

228 ~Squirrel.update_waveform_promises 

229 ~Squirrel.advance_accessor 

230 ~Squirrel.clear_accessor 

231 ~Squirrel.reload 

232 ~pyrocko.squirrel.selection.Selection.iter_paths 

233 ~Squirrel.iter_nuts 

234 ~Squirrel.iter_kinds 

235 ~Squirrel.iter_deltats 

236 ~Squirrel.iter_codes 

237 ~pyrocko.squirrel.selection.Selection.get_paths 

238 ~Squirrel.get_nuts 

239 ~Squirrel.get_kinds 

240 ~Squirrel.get_deltats 

241 ~Squirrel.get_codes 

242 ~Squirrel.get_counts 

243 ~Squirrel.get_time_span 

244 ~Squirrel.get_deltat_span 

245 ~Squirrel.get_nfiles 

246 ~Squirrel.get_nnuts 

247 ~Squirrel.get_total_size 

248 ~Squirrel.get_stats 

249 ~Squirrel.get_content 

250 ~Squirrel.get_stations 

251 ~Squirrel.get_channels 

252 ~Squirrel.get_responses 

253 ~Squirrel.get_events 

254 ~Squirrel.get_waveform_nuts 

255 ~Squirrel.get_waveforms 

256 ~Squirrel.chopper_waveforms 

257 ~Squirrel.get_coverage 

258 ~Squirrel.pile 

259 ~Squirrel.snuffle 

260 ~Squirrel.glob_codes 

261 ~pyrocko.squirrel.selection.Selection.get_database 

262 ~Squirrel.print_tables 

263 ''' 

264 

265 def __init__( 

266 self, env=None, database=None, cache_path=None, persistent=None): 

267 

268 if not isinstance(env, environment.Environment): 

269 env = environment.get_environment(env) 

270 

271 if database is None: 

272 database = env.expand_path(env.database_path) 

273 

274 if cache_path is None: 

275 cache_path = env.expand_path(env.cache_path) 

276 

277 if persistent is None: 

278 persistent = env.persistent 

279 

280 Selection.__init__( 

281 self, database=database, persistent=persistent) 

282 

283 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

284 

285 self._content_caches = { 

286 'waveform': cache.ContentCache(), 

287 'default': cache.ContentCache()} 

288 

289 self._cache_path = cache_path 

290 

291 self._sources = [] 

292 self._operators = [] 

293 self._operator_registry = {} 

294 

295 self._pile = None 

296 self._n_choppers_active = 0 

297 

298 self._names.update({ 

299 'nuts': self.name + '_nuts', 

300 'kind_codes_count': self.name + '_kind_codes_count', 

301 'coverage': self.name + '_coverage'}) 

302 

303 with self.transaction('create tables') as cursor: 

304 self._create_tables_squirrel(cursor) 

305 

306 def _create_tables_squirrel(self, cursor): 

307 

308 cursor.execute(self._register_table(self._sql( 

309 ''' 

310 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

311 nut_id integer PRIMARY KEY, 

312 file_id integer, 

313 file_segment integer, 

314 file_element integer, 

315 kind_id integer, 

316 kind_codes_id integer, 

317 tmin_seconds integer, 

318 tmin_offset integer, 

319 tmax_seconds integer, 

320 tmax_offset integer, 

321 kscale integer) 

322 '''))) 

323 

324 cursor.execute(self._register_table(self._sql( 

325 ''' 

326 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

327 kind_codes_id integer PRIMARY KEY, 

328 count integer) 

329 '''))) 

330 

331 cursor.execute(self._sql( 

332 ''' 

333 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

334 ON %(nuts)s (file_id, file_segment, file_element) 

335 ''')) 

336 

337 cursor.execute(self._sql( 

338 ''' 

339 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

340 ON %(nuts)s (file_id) 

341 ''')) 

342 

343 cursor.execute(self._sql( 

344 ''' 

345 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

346 ON %(nuts)s (kind_id, tmin_seconds) 

347 ''')) 

348 

349 cursor.execute(self._sql( 

350 ''' 

351 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

352 ON %(nuts)s (kind_id, tmax_seconds) 

353 ''')) 

354 

355 cursor.execute(self._sql( 

356 ''' 

357 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

358 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

359 ''')) 

360 

361 cursor.execute(self._sql( 

362 ''' 

363 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

364 BEFORE DELETE ON main.files FOR EACH ROW 

365 BEGIN 

366 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

367 END 

368 ''')) 

369 

370 # trigger only on size to make silent update of mtime possible 

371 cursor.execute(self._sql( 

372 ''' 

373 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

374 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

375 BEGIN 

376 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

377 END 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE TRIGGER IF NOT EXISTS 

383 %(db)s.%(file_states)s_delete_files 

384 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

385 BEGIN 

386 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

387 END 

388 ''')) 

389 

390 cursor.execute(self._sql( 

391 ''' 

392 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

393 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

394 BEGIN 

395 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

396 (new.kind_codes_id, 0); 

397 UPDATE %(kind_codes_count)s 

398 SET count = count + 1 

399 WHERE new.kind_codes_id 

400 == %(kind_codes_count)s.kind_codes_id; 

401 END 

402 ''')) 

403 

404 cursor.execute(self._sql( 

405 ''' 

406 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

407 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

408 BEGIN 

409 UPDATE %(kind_codes_count)s 

410 SET count = count - 1 

411 WHERE old.kind_codes_id 

412 == %(kind_codes_count)s.kind_codes_id; 

413 END 

414 ''')) 

415 

416 cursor.execute(self._register_table(self._sql( 

417 ''' 

418 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

419 kind_codes_id integer, 

420 time_seconds integer, 

421 time_offset integer, 

422 step integer) 

423 '''))) 

424 

425 cursor.execute(self._sql( 

426 ''' 

427 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

428 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

429 ''')) 

430 

431 cursor.execute(self._sql( 

432 ''' 

433 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

434 AFTER INSERT ON %(nuts)s FOR EACH ROW 

435 BEGIN 

436 INSERT OR IGNORE INTO %(coverage)s VALUES 

437 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

438 ; 

439 UPDATE %(coverage)s 

440 SET step = step + 1 

441 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

442 AND new.tmin_seconds == %(coverage)s.time_seconds 

443 AND new.tmin_offset == %(coverage)s.time_offset 

444 ; 

445 INSERT OR IGNORE INTO %(coverage)s VALUES 

446 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

447 ; 

448 UPDATE %(coverage)s 

449 SET step = step - 1 

450 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

451 AND new.tmax_seconds == %(coverage)s.time_seconds 

452 AND new.tmax_offset == %(coverage)s.time_offset 

453 ; 

454 DELETE FROM %(coverage)s 

455 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

456 AND new.tmin_seconds == %(coverage)s.time_seconds 

457 AND new.tmin_offset == %(coverage)s.time_offset 

458 AND step == 0 

459 ; 

460 DELETE FROM %(coverage)s 

461 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

462 AND new.tmax_seconds == %(coverage)s.time_seconds 

463 AND new.tmax_offset == %(coverage)s.time_offset 

464 AND step == 0 

465 ; 

466 END 

467 ''')) 

468 

469 cursor.execute(self._sql( 

470 ''' 

471 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

472 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

473 BEGIN 

474 INSERT OR IGNORE INTO %(coverage)s VALUES 

475 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

476 ; 

477 UPDATE %(coverage)s 

478 SET step = step - 1 

479 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

480 AND old.tmin_seconds == %(coverage)s.time_seconds 

481 AND old.tmin_offset == %(coverage)s.time_offset 

482 ; 

483 INSERT OR IGNORE INTO %(coverage)s VALUES 

484 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

485 ; 

486 UPDATE %(coverage)s 

487 SET step = step + 1 

488 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

489 AND old.tmax_seconds == %(coverage)s.time_seconds 

490 AND old.tmax_offset == %(coverage)s.time_offset 

491 ; 

492 DELETE FROM %(coverage)s 

493 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

494 AND old.tmin_seconds == %(coverage)s.time_seconds 

495 AND old.tmin_offset == %(coverage)s.time_offset 

496 AND step == 0 

497 ; 

498 DELETE FROM %(coverage)s 

499 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

500 AND old.tmax_seconds == %(coverage)s.time_seconds 

501 AND old.tmax_offset == %(coverage)s.time_offset 

502 AND step == 0 

503 ; 

504 END 

505 ''')) 

506 

507 def _delete(self): 

508 '''Delete database tables associated with this Squirrel.''' 

509 

510 with self.transaction('delete tables') as cursor: 

511 for s in ''' 

512 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

513 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

514 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

515 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

516 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

517 DROP TABLE %(db)s.%(nuts)s; 

518 DROP TABLE %(db)s.%(kind_codes_count)s; 

519 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

520 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

521 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

522 '''.strip().splitlines(): 

523 

524 cursor.execute(self._sql(s)) 

525 

526 Selection._delete(self) 

527 

528 @filldocs 

529 def add(self, 

530 paths, 

531 kinds=None, 

532 format='detect', 

533 include=None, 

534 exclude=None, 

535 check=True): 

536 

537 ''' 

538 Add files to the selection. 

539 

540 :param paths: 

541 Iterator yielding paths to files or directories to be added to the 

542 selection. Recurses into directories. If given a ``str``, it 

543 is treated as a single path to be added. 

544 :type paths: 

545 :py:class:`list` of :py:class:`str` 

546 

547 :param kinds: 

548 Content types to be made available through the Squirrel selection. 

549 By default, all known content types are accepted. 

550 :type kinds: 

551 :py:class:`list` of :py:class:`str` 

552 

553 :param format: 

554 File format identifier or ``'detect'`` to enable auto-detection 

555 (available: %(file_formats)s). 

556 :type format: 

557 str 

558 

559 :param include: 

560 If not ``None``, files are only included if their paths match the 

561 given regular expression pattern. 

562 :type format: 

563 str 

564 

565 :param exclude: 

566 If not ``None``, files are only included if their paths do not 

567 match the given regular expression pattern. 

568 :type format: 

569 str 

570 

571 :param check: 

572 If ``True``, all file modification times are checked to see if 

573 cached information has to be updated (slow). If ``False``, only 

574 previously unknown files are indexed and cached information is used 

575 for known files, regardless of file state (fast, corrresponds to 

576 Squirrel's ``--optimistic`` mode). File deletions will go 

577 undetected in the latter case. 

578 :type check: 

579 bool 

580 

581 :Complexity: 

582 O(log N) 

583 ''' 

584 

585 if isinstance(kinds, str): 

586 kinds = (kinds,) 

587 

588 if isinstance(paths, str): 

589 paths = [paths] 

590 

591 kind_mask = model.to_kind_mask(kinds) 

592 

593 with progress.view(): 

594 Selection.add( 

595 self, util.iter_select_files( 

596 paths, 

597 show_progress=False, 

598 include=include, 

599 exclude=exclude, 

600 pass_through=lambda path: path.startswith('virtual:') 

601 ), kind_mask, format) 

602 

603 self._load(check) 

604 self._update_nuts() 

605 

606 def reload(self): 

607 ''' 

608 Check for modifications and reindex modified files. 

609 

610 Based on file modification times. 

611 ''' 

612 

613 self._set_file_states_force_check() 

614 self._load(check=True) 

615 self._update_nuts() 

616 

617 def add_virtual(self, nuts, virtual_paths=None): 

618 ''' 

619 Add content which is not backed by files. 

620 

621 :param nuts: 

622 Content pieces to be added. 

623 :type nuts: 

624 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

625 

626 :param virtual_paths: 

627 List of virtual paths to prevent creating a temporary list of the 

628 nuts while aggregating the file paths for the selection. 

629 :type virtual_paths: 

630 :py:class:`list` of :py:class:`str` 

631 

632 Stores to the main database and the selection. 

633 ''' 

634 

635 if isinstance(virtual_paths, str): 

636 virtual_paths = [virtual_paths] 

637 

638 if virtual_paths is None: 

639 if not isinstance(nuts, list): 

640 nuts = list(nuts) 

641 virtual_paths = set(nut.file_path for nut in nuts) 

642 

643 Selection.add(self, virtual_paths) 

644 self.get_database().dig(nuts) 

645 self._update_nuts() 

646 

647 def add_volatile(self, nuts): 

648 if not isinstance(nuts, list): 

649 nuts = list(nuts) 

650 

651 paths = list(set(nut.file_path for nut in nuts)) 

652 io.backends.virtual.add_nuts(nuts) 

653 self.add_virtual(nuts, paths) 

654 self._volatile_paths.extend(paths) 

655 

656 def add_volatile_waveforms(self, traces): 

657 ''' 

658 Add in-memory waveforms which will be removed when the app closes. 

659 ''' 

660 

661 name = model.random_name() 

662 

663 path = 'virtual:volatile:%s' % name 

664 

665 nuts = [] 

666 for itr, tr in enumerate(traces): 

667 assert tr.tmin <= tr.tmax 

668 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

669 tmax_seconds, tmax_offset = model.tsplit( 

670 tr.tmin + tr.data_len()*tr.deltat) 

671 

672 nuts.append(model.Nut( 

673 file_path=path, 

674 file_format='virtual', 

675 file_segment=itr, 

676 file_element=0, 

677 file_mtime=0, 

678 codes=tr.codes, 

679 tmin_seconds=tmin_seconds, 

680 tmin_offset=tmin_offset, 

681 tmax_seconds=tmax_seconds, 

682 tmax_offset=tmax_offset, 

683 deltat=tr.deltat, 

684 kind_id=to_kind_id('waveform'), 

685 content=tr)) 

686 

687 self.add_volatile(nuts) 

688 return path 

689 

690 def _load(self, check): 

691 for _ in io.iload( 

692 self, 

693 content=[], 

694 skip_unchanged=True, 

695 check=check): 

696 pass 

697 

698 def _update_nuts(self, transaction=None): 

699 transaction = transaction or self.transaction('update nuts') 

700 with make_task('Aggregating selection') as task, \ 

701 transaction as cursor: 

702 

703 self._conn.set_progress_handler(task.update, 100000) 

704 nrows = cursor.execute(self._sql( 

705 ''' 

706 INSERT INTO %(db)s.%(nuts)s 

707 SELECT NULL, 

708 nuts.file_id, nuts.file_segment, nuts.file_element, 

709 nuts.kind_id, nuts.kind_codes_id, 

710 nuts.tmin_seconds, nuts.tmin_offset, 

711 nuts.tmax_seconds, nuts.tmax_offset, 

712 nuts.kscale 

713 FROM %(db)s.%(file_states)s 

714 INNER JOIN nuts 

715 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

716 INNER JOIN kind_codes 

717 ON nuts.kind_codes_id == 

718 kind_codes.kind_codes_id 

719 WHERE %(db)s.%(file_states)s.file_state != 2 

720 AND (((1 << kind_codes.kind_id) 

721 & %(db)s.%(file_states)s.kind_mask) != 0) 

722 ''')).rowcount 

723 

724 task.update(nrows) 

725 self._set_file_states_known(transaction) 

726 self._conn.set_progress_handler(None, 0) 

727 

728 def add_source(self, source, check=True): 

729 ''' 

730 Add remote resource. 

731 

732 :param source: 

733 Remote data access client instance. 

734 :type source: 

735 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

736 ''' 

737 

738 self._sources.append(source) 

739 source.setup(self, check=check) 

740 

741 def add_fdsn(self, *args, **kwargs): 

742 ''' 

743 Add FDSN site for transparent remote data access. 

744 

745 Arguments are passed to 

746 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

747 ''' 

748 

749 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

750 

751 def add_catalog(self, *args, **kwargs): 

752 ''' 

753 Add online catalog for transparent event data access. 

754 

755 Arguments are passed to 

756 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

757 ''' 

758 

759 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

760 

761 def add_dataset(self, ds, check=True): 

762 ''' 

763 Read dataset description from file and add its contents. 

764 

765 :param ds: 

766 Path to dataset description file or dataset description object 

767 . See :py:mod:`~pyrocko.squirrel.dataset`. 

768 :type ds: 

769 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

770 

771 :param check: 

772 If ``True``, all file modification times are checked to see if 

773 cached information has to be updated (slow). If ``False``, only 

774 previously unknown files are indexed and cached information is used 

775 for known files, regardless of file state (fast, corrresponds to 

776 Squirrel's ``--optimistic`` mode). File deletions will go 

777 undetected in the latter case. 

778 :type check: 

779 bool 

780 ''' 

781 if isinstance(ds, str): 

782 ds = dataset.read_dataset(ds) 

783 

784 ds.setup(self, check=check) 

785 

786 def _get_selection_args( 

787 self, kind_id, 

788 obj=None, tmin=None, tmax=None, time=None, codes=None): 

789 

790 if codes is not None: 

791 codes = codes_patterns_for_kind(kind_id, codes) 

792 

793 if time is not None: 

794 tmin = time 

795 tmax = time 

796 

797 if obj is not None: 

798 tmin = tmin if tmin is not None else obj.tmin 

799 tmax = tmax if tmax is not None else obj.tmax 

800 codes = codes if codes is not None else codes_patterns_for_kind( 

801 kind_id, obj.codes) 

802 

803 return tmin, tmax, codes 

804 

805 def _get_selection_args_str(self, *args, **kwargs): 

806 

807 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

808 return 'tmin: %s, tmax: %s, codes: %s' % ( 

809 util.time_to_str(tmin) if tmin is not None else 'none', 

810 util.time_to_str(tmax) if tmax is not None else 'none', 

811 ','.join(str(entry) for entry in codes)) 

812 

813 def _selection_args_to_kwargs( 

814 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

815 

816 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

817 

818 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

819 

820 tmin_seconds, tmin_offset = model.tsplit(tmin) 

821 tmax_seconds, tmax_offset = model.tsplit(tmax) 

822 if naiv: 

823 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

824 args.append(tmax_seconds) 

825 else: 

826 tscale_edges = model.tscale_edges 

827 tmin_cond = [] 

828 for kscale in range(tscale_edges.size + 1): 

829 if kscale != tscale_edges.size: 

830 tscale = int(tscale_edges[kscale]) 

831 tmin_cond.append(''' 

832 (%(db)s.%(nuts)s.kind_id = ? 

833 AND %(db)s.%(nuts)s.kscale == ? 

834 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

835 ''') 

836 args.extend( 

837 (to_kind_id(kind), kscale, 

838 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

839 

840 else: 

841 tmin_cond.append(''' 

842 (%(db)s.%(nuts)s.kind_id == ? 

843 AND %(db)s.%(nuts)s.kscale == ? 

844 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

845 ''') 

846 

847 args.extend( 

848 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

849 if tmin_cond: 

850 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

851 

852 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

853 args.append(tmin_seconds) 

854 

855 def _codes_match_sql(self, kind_id, codes, cond, args): 

856 pats = codes_patterns_for_kind(kind_id, codes) 

857 if pats is None: 

858 return 

859 

860 pats_exact = [] 

861 pats_nonexact = [] 

862 for pat in pats: 

863 spat = pat.safe_str 

864 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

865 

866 cond_exact = None 

867 if pats_exact: 

868 cond_exact = ' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

869 '?'*len(pats_exact)) 

870 

871 args.extend(pats_exact) 

872 

873 cond_nonexact = None 

874 if pats_nonexact: 

875 cond_nonexact = ' ( %s ) ' % ' OR '.join( 

876 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)) 

877 

878 args.extend(pats_nonexact) 

879 

880 if cond_exact and cond_nonexact: 

881 cond.append(' ( %s OR %s ) ' % (cond_exact, cond_nonexact)) 

882 

883 elif cond_exact: 

884 cond.append(cond_exact) 

885 

886 elif cond_nonexact: 

887 cond.append(cond_nonexact) 

888 

889 def iter_nuts( 

890 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

891 kind_codes_ids=None, path=None): 

892 

893 ''' 

894 Iterate over content entities matching given constraints. 

895 

896 :param kind: 

897 Content kind (or kinds) to extract. 

898 :type kind: 

899 :py:class:`str`, :py:class:`list` of :py:class:`str` 

900 

901 :param tmin: 

902 Start time of query interval. 

903 :type tmin: 

904 timestamp 

905 

906 :param tmax: 

907 End time of query interval. 

908 :type tmax: 

909 timestamp 

910 

911 :param codes: 

912 List of code patterns to query. 

913 :type codes: 

914 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

915 objects appropriate for the queried content type, or anything which 

916 can be converted to such objects. 

917 

918 :param naiv: 

919 Bypass time span lookup through indices (slow, for testing). 

920 :type naiv: 

921 :py:class:`bool` 

922 

923 :param kind_codes_ids: 

924 Kind-codes IDs of contents to be retrieved (internal use). 

925 :type kind_codes_ids: 

926 :py:class:`list` of :py:class:`int` 

927 

928 :yields: 

929 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

930 intersecting content. 

931 

932 :complexity: 

933 O(log N) for the time selection part due to heavy use of database 

934 indices. 

935 

936 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

937 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

938 closed-interval so that content intersecting with the time instant ``t 

939 = tmin = tmax`` is returned (otherwise nothing would be returned as 

940 ``[t, t)`` never matches anything). 

941 

942 Time spans of content entities to be matched are also treated as half 

943 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

944 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

945 modified to closed-interval when the content time span is an empty 

946 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

947 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

948 ''' 

949 

950 if not isinstance(kind, str): 

951 if kind is None: 

952 kind = model.g_content_kinds 

953 for kind_ in kind: 

954 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

955 yield nut 

956 

957 return 

958 

959 kind_id = to_kind_id(kind) 

960 

961 cond = [] 

962 args = [] 

963 if tmin is not None or tmax is not None: 

964 assert kind is not None 

965 if tmin is None: 

966 tmin = self.get_time_span()[0] 

967 if tmax is None: 

968 tmax = self.get_time_span()[1] + 1.0 

969 

970 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

971 

972 cond.append('kind_codes.kind_id == ?') 

973 args.append(kind_id) 

974 

975 if codes is not None: 

976 self._codes_match_sql(kind_id, codes, cond, args) 

977 

978 if kind_codes_ids is not None: 

979 cond.append( 

980 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

981 '?'*len(kind_codes_ids))) 

982 

983 args.extend(kind_codes_ids) 

984 

985 db = self.get_database() 

986 if path is not None: 

987 cond.append('files.path == ?') 

988 args.append(db.relpath(abspath(path))) 

989 

990 sql = (''' 

991 SELECT 

992 files.path, 

993 files.format, 

994 files.mtime, 

995 files.size, 

996 %(db)s.%(nuts)s.file_segment, 

997 %(db)s.%(nuts)s.file_element, 

998 kind_codes.kind_id, 

999 kind_codes.codes, 

1000 %(db)s.%(nuts)s.tmin_seconds, 

1001 %(db)s.%(nuts)s.tmin_offset, 

1002 %(db)s.%(nuts)s.tmax_seconds, 

1003 %(db)s.%(nuts)s.tmax_offset, 

1004 kind_codes.deltat 

1005 FROM files 

1006 INNER JOIN %(db)s.%(nuts)s 

1007 ON files.file_id == %(db)s.%(nuts)s.file_id 

1008 INNER JOIN kind_codes 

1009 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1010 ''') 

1011 

1012 if cond: 

1013 sql += ''' WHERE ''' + ' AND '.join(cond) 

1014 

1015 sql = self._sql(sql) 

1016 if tmin is None and tmax is None: 

1017 for row in self._conn.execute(sql, args): 

1018 row = (db.abspath(row[0]),) + row[1:] 

1019 nut = model.Nut(values_nocheck=row) 

1020 yield nut 

1021 else: 

1022 assert tmin is not None and tmax is not None 

1023 if tmin == tmax: 

1024 for row in self._conn.execute(sql, args): 

1025 row = (db.abspath(row[0]),) + row[1:] 

1026 nut = model.Nut(values_nocheck=row) 

1027 if (nut.tmin <= tmin < nut.tmax) \ 

1028 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1029 

1030 yield nut 

1031 else: 

1032 for row in self._conn.execute(sql, args): 

1033 row = (db.abspath(row[0]),) + row[1:] 

1034 nut = model.Nut(values_nocheck=row) 

1035 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1036 or (nut.tmin == nut.tmax 

1037 and tmin <= nut.tmin < tmax): 

1038 

1039 yield nut 

1040 

1041 def get_nuts(self, *args, **kwargs): 

1042 ''' 

1043 Get content entities matching given constraints. 

1044 

1045 Like :py:meth:`iter_nuts` but returns results as a list. 

1046 ''' 

1047 

1048 return list(self.iter_nuts(*args, **kwargs)) 

1049 

1050 def _split_nuts( 

1051 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1052 

1053 kind_id = to_kind_id(kind) 

1054 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1055 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1056 

1057 names_main_nuts = dict(self._names) 

1058 names_main_nuts.update(db='main', nuts='nuts') 

1059 

1060 db = self.get_database() 

1061 

1062 def main_nuts(s): 

1063 return s % names_main_nuts 

1064 

1065 with self.transaction('split nuts') as cursor: 

1066 # modify selection and main 

1067 for sql_subst in [ 

1068 self._sql, main_nuts]: 

1069 

1070 cond = [] 

1071 args = [] 

1072 

1073 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1074 

1075 if codes is not None: 

1076 self._codes_match_sql(kind_id, codes, cond, args) 

1077 

1078 if path is not None: 

1079 cond.append('files.path == ?') 

1080 args.append(db.relpath(abspath(path))) 

1081 

1082 sql = sql_subst(''' 

1083 SELECT 

1084 %(db)s.%(nuts)s.nut_id, 

1085 %(db)s.%(nuts)s.tmin_seconds, 

1086 %(db)s.%(nuts)s.tmin_offset, 

1087 %(db)s.%(nuts)s.tmax_seconds, 

1088 %(db)s.%(nuts)s.tmax_offset, 

1089 kind_codes.deltat 

1090 FROM files 

1091 INNER JOIN %(db)s.%(nuts)s 

1092 ON files.file_id == %(db)s.%(nuts)s.file_id 

1093 INNER JOIN kind_codes 

1094 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1095 WHERE ''' + ' AND '.join(cond)) # noqa 

1096 

1097 insert = [] 

1098 delete = [] 

1099 for row in cursor.execute(sql, args): 

1100 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1101 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1102 

1103 nut_tmin = model.tjoin( 

1104 nut_tmin_seconds, nut_tmin_offset) 

1105 nut_tmax = model.tjoin( 

1106 nut_tmax_seconds, nut_tmax_offset) 

1107 

1108 if nut_tmin < tmax and tmin < nut_tmax: 

1109 if nut_tmin < tmin: 

1110 insert.append(( 

1111 nut_tmin_seconds, nut_tmin_offset, 

1112 tmin_seconds, tmin_offset, 

1113 model.tscale_to_kscale( 

1114 tmin_seconds - nut_tmin_seconds), 

1115 nut_id)) 

1116 

1117 if tmax < nut_tmax: 

1118 insert.append(( 

1119 tmax_seconds, tmax_offset, 

1120 nut_tmax_seconds, nut_tmax_offset, 

1121 model.tscale_to_kscale( 

1122 nut_tmax_seconds - tmax_seconds), 

1123 nut_id)) 

1124 

1125 delete.append((nut_id,)) 

1126 

1127 sql_add = ''' 

1128 INSERT INTO %(db)s.%(nuts)s ( 

1129 file_id, file_segment, file_element, kind_id, 

1130 kind_codes_id, tmin_seconds, tmin_offset, 

1131 tmax_seconds, tmax_offset, kscale ) 

1132 SELECT 

1133 file_id, file_segment, file_element, 

1134 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1135 FROM %(db)s.%(nuts)s 

1136 WHERE nut_id == ? 

1137 ''' 

1138 cursor.executemany(sql_subst(sql_add), insert) 

1139 

1140 sql_delete = ''' 

1141 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1142 ''' 

1143 cursor.executemany(sql_subst(sql_delete), delete) 

1144 

1145 def get_time_span(self, kinds=None): 

1146 ''' 

1147 Get time interval over all content in selection. 

1148 

1149 :param kinds: 

1150 If not ``None``, restrict query to given content kinds. 

1151 :type kind: 

1152 list of str 

1153 

1154 :complexity: 

1155 O(1), independent of the number of nuts. 

1156 

1157 :returns: 

1158 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1159 ''' 

1160 

1161 sql_min = self._sql(''' 

1162 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1163 FROM %(db)s.%(nuts)s 

1164 WHERE kind_id == ? 

1165 AND tmin_seconds == ( 

1166 SELECT MIN(tmin_seconds) 

1167 FROM %(db)s.%(nuts)s 

1168 WHERE kind_id == ?) 

1169 ''') 

1170 

1171 sql_max = self._sql(''' 

1172 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1173 FROM %(db)s.%(nuts)s 

1174 WHERE kind_id == ? 

1175 AND tmax_seconds == ( 

1176 SELECT MAX(tmax_seconds) 

1177 FROM %(db)s.%(nuts)s 

1178 WHERE kind_id == ?) 

1179 ''') 

1180 

1181 gtmin = None 

1182 gtmax = None 

1183 

1184 if isinstance(kinds, str): 

1185 kinds = [kinds] 

1186 

1187 if kinds is None: 

1188 kind_ids = model.g_content_kind_ids 

1189 else: 

1190 kind_ids = model.to_kind_ids(kinds) 

1191 

1192 for kind_id in kind_ids: 

1193 for tmin_seconds, tmin_offset in self._conn.execute( 

1194 sql_min, (kind_id, kind_id)): 

1195 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1196 if tmin is not None and (gtmin is None or tmin < gtmin): 

1197 gtmin = tmin 

1198 

1199 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1200 sql_max, (kind_id, kind_id)): 

1201 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1202 if tmax is not None and (gtmax is None or tmax > gtmax): 

1203 gtmax = tmax 

1204 

1205 return gtmin, gtmax 

1206 

1207 def has(self, kinds): 

1208 ''' 

1209 Check availability of given content kinds. 

1210 

1211 :param kinds: 

1212 Content kinds to query. 

1213 :type kind: 

1214 list of str 

1215 

1216 :returns: 

1217 ``True`` if any of the queried content kinds is available 

1218 in the selection. 

1219 ''' 

1220 self_tmin, self_tmax = self.get_time_span(kinds) 

1221 

1222 return None not in (self_tmin, self_tmax) 

1223 

1224 def get_deltat_span(self, kind): 

1225 ''' 

1226 Get min and max sampling interval of all content of given kind. 

1227 

1228 :param kind: 

1229 Content kind 

1230 :type kind: 

1231 str 

1232 

1233 :returns: ``(deltat_min, deltat_max)`` 

1234 ''' 

1235 

1236 deltats = [ 

1237 deltat for deltat in self.get_deltats(kind) 

1238 if deltat is not None] 

1239 

1240 if deltats: 

1241 return min(deltats), max(deltats) 

1242 else: 

1243 return None, None 

1244 

1245 def iter_kinds(self, codes=None): 

1246 ''' 

1247 Iterate over content types available in selection. 

1248 

1249 :param codes: 

1250 If given, get kinds only for selected codes identifier. 

1251 Only a single identifier may be given here and no pattern matching 

1252 is done, currently. 

1253 :type codes: 

1254 :py:class:`~pyrocko.squirrel.model.Codes` 

1255 

1256 :yields: 

1257 Available content kinds as :py:class:`str`. 

1258 

1259 :complexity: 

1260 O(1), independent of number of nuts. 

1261 ''' 

1262 

1263 return self._database._iter_kinds( 

1264 codes=codes, 

1265 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1266 

1267 def iter_deltats(self, kind=None): 

1268 ''' 

1269 Iterate over sampling intervals available in selection. 

1270 

1271 :param kind: 

1272 If given, get sampling intervals only for a given content type. 

1273 :type kind: 

1274 str 

1275 

1276 :yields: 

1277 :py:class:`float` values. 

1278 

1279 :complexity: 

1280 O(1), independent of number of nuts. 

1281 ''' 

1282 return self._database._iter_deltats( 

1283 kind=kind, 

1284 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1285 

1286 def iter_codes(self, kind=None): 

1287 ''' 

1288 Iterate over content identifier code sequences available in selection. 

1289 

1290 :param kind: 

1291 If given, get codes only for a given content type. 

1292 :type kind: 

1293 str 

1294 

1295 :yields: 

1296 :py:class:`tuple` of :py:class:`str` 

1297 

1298 :complexity: 

1299 O(1), independent of number of nuts. 

1300 ''' 

1301 return self._database._iter_codes( 

1302 kind=kind, 

1303 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1304 

1305 def _iter_codes_info(self, kind=None, codes=None): 

1306 ''' 

1307 Iterate over number of occurrences of any (kind, codes) combination. 

1308 

1309 :param kind: 

1310 If given, get counts only for selected content type. 

1311 :type kind: 

1312 str 

1313 

1314 :yields: 

1315 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1316 

1317 :complexity: 

1318 O(1), independent of number of nuts. 

1319 ''' 

1320 return self._database._iter_codes_info( 

1321 kind=kind, 

1322 codes=codes, 

1323 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1324 

1325 def get_kinds(self, codes=None): 

1326 ''' 

1327 Get content types available in selection. 

1328 

1329 :param codes: 

1330 If given, get kinds only for selected codes identifier. 

1331 Only a single identifier may be given here and no pattern matching 

1332 is done, currently. 

1333 :type codes: 

1334 :py:class:`~pyrocko.squirrel.model.Codes` 

1335 

1336 :returns: 

1337 Sorted list of available content types. 

1338 :rtype: 

1339 py:class:`list` of :py:class:`str` 

1340 

1341 :complexity: 

1342 O(1), independent of number of nuts. 

1343 

1344 ''' 

1345 return sorted(list(self.iter_kinds(codes=codes))) 

1346 

1347 def get_deltats(self, kind=None): 

1348 ''' 

1349 Get sampling intervals available in selection. 

1350 

1351 :param kind: 

1352 If given, get sampling intervals only for selected content type. 

1353 :type kind: 

1354 str 

1355 

1356 :complexity: 

1357 O(1), independent of number of nuts. 

1358 

1359 :returns: Sorted list of available sampling intervals. 

1360 ''' 

1361 return sorted(list(self.iter_deltats(kind=kind))) 

1362 

1363 def get_codes(self, kind=None): 

1364 ''' 

1365 Get identifier code sequences available in selection. 

1366 

1367 :param kind: 

1368 If given, get codes only for selected content type. 

1369 :type kind: 

1370 str 

1371 

1372 :complexity: 

1373 O(1), independent of number of nuts. 

1374 

1375 :returns: Sorted list of available codes as tuples of strings. 

1376 ''' 

1377 return sorted(list(self.iter_codes(kind=kind))) 

1378 

1379 def get_counts(self, kind=None): 

1380 ''' 

1381 Get number of occurrences of any (kind, codes) combination. 

1382 

1383 :param kind: 

1384 If given, get codes only for selected content type. 

1385 :type kind: 

1386 str 

1387 

1388 :complexity: 

1389 O(1), independent of number of nuts. 

1390 

1391 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1392 if kind is not ``None`` 

1393 ''' 

1394 d = {} 

1395 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1396 if kind_id not in d: 

1397 v = d[kind_id] = {} 

1398 else: 

1399 v = d[kind_id] 

1400 

1401 if codes not in v: 

1402 v[codes] = 0 

1403 

1404 v[codes] += count 

1405 

1406 if kind is not None: 

1407 return d[to_kind_id(kind)] 

1408 else: 

1409 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1410 

1411 def glob_codes(self, kind, codes): 

1412 ''' 

1413 Find codes matching given patterns. 

1414 

1415 :param kind: 

1416 Content kind to be queried. 

1417 :type kind: 

1418 str 

1419 

1420 :param codes: 

1421 List of code patterns to query. 

1422 :type codes: 

1423 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1424 objects appropriate for the queried content type, or anything which 

1425 can be converted to such objects. 

1426 

1427 :returns: 

1428 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1429 ''' 

1430 

1431 kind_id = to_kind_id(kind) 

1432 args = [kind_id] 

1433 pats = codes_patterns_for_kind(kind_id, codes) 

1434 

1435 if pats: 

1436 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1437 ('kind_codes.codes GLOB ?',) * len(pats)) 

1438 

1439 args.extend(pat.safe_str for pat in pats) 

1440 else: 

1441 codes_cond = '' 

1442 

1443 sql = self._sql(''' 

1444 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1445 WHERE 

1446 kind_id == ? ''' + codes_cond) 

1447 

1448 return list(map(list, self._conn.execute(sql, args))) 

1449 

1450 def update(self, constraint=None, **kwargs): 

1451 ''' 

1452 Update or partially update channel and event inventories. 

1453 

1454 :param constraint: 

1455 Selection of times or areas to be brought up to date. 

1456 :type constraint: 

1457 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1458 

1459 :param \\*\\*kwargs: 

1460 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1461 

1462 This function triggers all attached remote sources, to check for 

1463 updates in the meta-data. The sources will only submit queries when 

1464 their expiration date has passed, or if the selection spans into 

1465 previously unseen times or areas. 

1466 ''' 

1467 

1468 if constraint is None: 

1469 constraint = client.Constraint(**kwargs) 

1470 

1471 for source in self._sources: 

1472 source.update_channel_inventory(self, constraint) 

1473 source.update_event_inventory(self, constraint) 

1474 

1475 def update_waveform_promises(self, constraint=None, **kwargs): 

1476 ''' 

1477 Permit downloading of remote waveforms. 

1478 

1479 :param constraint: 

1480 Remote waveforms compatible with the given constraint are enabled 

1481 for download. 

1482 :type constraint: 

1483 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1484 

1485 :param \\*\\*kwargs: 

1486 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1487 

1488 Calling this method permits Squirrel to download waveforms from remote 

1489 sources when processing subsequent waveform requests. This works by 

1490 inserting so called waveform promises into the database. It will look 

1491 into the available channels for each remote source and create a promise 

1492 for each channel compatible with the given constraint. If the promise 

1493 then matches in a waveform request, Squirrel tries to download the 

1494 waveform. If the download is successful, the downloaded waveform is 

1495 added to the Squirrel and the promise is deleted. If the download 

1496 fails, the promise is kept if the reason of failure looks like being 

1497 temporary, e.g. because of a network failure. If the cause of failure 

1498 however seems to be permanent, the promise is deleted so that no 

1499 further attempts are made to download a waveform which might not be 

1500 available from that server at all. To force re-scheduling after a 

1501 permanent failure, call :py:meth:`update_waveform_promises` 

1502 yet another time. 

1503 ''' 

1504 

1505 if constraint is None: 

1506 constraint = client.Constraint(**kwargs) 

1507 

1508 for source in self._sources: 

1509 source.update_waveform_promises(self, constraint) 

1510 

1511 def remove_waveform_promises(self, from_database='selection'): 

1512 ''' 

1513 Remove waveform promises from live selection or global database. 

1514 

1515 Calling this function removes all waveform promises provided by the 

1516 attached sources. 

1517 

1518 :param from_database: 

1519 Remove from live selection ``'selection'`` or global database 

1520 ``'global'``. 

1521 ''' 

1522 for source in self._sources: 

1523 source.remove_waveform_promises(self, from_database=from_database) 

1524 

1525 def update_responses(self, constraint=None, **kwargs): 

1526 if constraint is None: 

1527 constraint = client.Constraint(**kwargs) 

1528 

1529 for source in self._sources: 

1530 source.update_response_inventory(self, constraint) 

1531 

1532 def get_nfiles(self): 

1533 ''' 

1534 Get number of files in selection. 

1535 ''' 

1536 

1537 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1538 for row in self._conn.execute(sql): 

1539 return row[0] 

1540 

1541 def get_nnuts(self): 

1542 ''' 

1543 Get number of nuts in selection. 

1544 ''' 

1545 

1546 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1547 for row in self._conn.execute(sql): 

1548 return row[0] 

1549 

1550 def get_total_size(self): 

1551 ''' 

1552 Get aggregated file size available in selection. 

1553 ''' 

1554 

1555 sql = self._sql(''' 

1556 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1557 INNER JOIN files 

1558 ON %(db)s.%(file_states)s.file_id = files.file_id 

1559 ''') 

1560 

1561 for row in self._conn.execute(sql): 

1562 return row[0] or 0 

1563 

1564 def get_stats(self): 

1565 ''' 

1566 Get statistics on contents available through this selection. 

1567 ''' 

1568 

1569 kinds = self.get_kinds() 

1570 time_spans = {} 

1571 for kind in kinds: 

1572 time_spans[kind] = self.get_time_span([kind]) 

1573 

1574 return SquirrelStats( 

1575 nfiles=self.get_nfiles(), 

1576 nnuts=self.get_nnuts(), 

1577 kinds=kinds, 

1578 codes=self.get_codes(), 

1579 total_size=self.get_total_size(), 

1580 counts=self.get_counts(), 

1581 time_spans=time_spans, 

1582 sources=[s.describe() for s in self._sources], 

1583 operators=[op.describe() for op in self._operators]) 

1584 

1585 def get_content( 

1586 self, 

1587 nut, 

1588 cache_id='default', 

1589 accessor_id='default', 

1590 show_progress=False, 

1591 model='squirrel'): 

1592 

1593 ''' 

1594 Get and possibly load full content for a given index entry from file. 

1595 

1596 Loads the actual content objects (channel, station, waveform, ...) from 

1597 file. For efficiency, sibling content (all stuff in the same file 

1598 segment) will also be loaded as a side effect. The loaded contents are 

1599 cached in the Squirrel object. 

1600 ''' 

1601 

1602 content_cache = self._content_caches[cache_id] 

1603 if not content_cache.has(nut): 

1604 

1605 for nut_loaded in io.iload( 

1606 nut.file_path, 

1607 segment=nut.file_segment, 

1608 format=nut.file_format, 

1609 database=self._database, 

1610 update_selection=self, 

1611 show_progress=show_progress): 

1612 

1613 content_cache.put(nut_loaded) 

1614 

1615 try: 

1616 return content_cache.get(nut, accessor_id, model) 

1617 

1618 except KeyError: 

1619 raise error.NotAvailable( 

1620 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1621 

1622 def advance_accessor(self, accessor_id='default', cache_id=None): 

1623 ''' 

1624 Notify memory caches about consumer moving to a new data batch. 

1625 

1626 :param accessor_id: 

1627 Name of accessing consumer to be advanced. 

1628 :type accessor_id: 

1629 str 

1630 

1631 :param cache_id: 

1632 Name of cache to for which the accessor should be advanced. By 

1633 default the named accessor is advanced in all registered caches. 

1634 By default, two caches named ``'default'`` and ``'waveform'`` are 

1635 available. 

1636 :type cache_id: 

1637 str 

1638 

1639 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1640 Squirrel's memory caching works and can be tuned. Default behaviour is 

1641 to release data when it has not been used in the latest data 

1642 window/batch. If the accessor is never advanced, data is cached 

1643 indefinitely - which is often desired e.g. for station meta-data. 

1644 Methods for consecutive data traversal, like 

1645 :py:meth:`chopper_waveforms` automatically advance and clear 

1646 their accessor. 

1647 ''' 

1648 for cache_ in ( 

1649 self._content_caches.keys() 

1650 if cache_id is None 

1651 else [cache_id]): 

1652 

1653 self._content_caches[cache_].advance_accessor(accessor_id) 

1654 

1655 def clear_accessor(self, accessor_id, cache_id=None): 

1656 ''' 

1657 Notify memory caches about a consumer having finished. 

1658 

1659 :param accessor_id: 

1660 Name of accessor to be cleared. 

1661 :type accessor_id: 

1662 str 

1663 

1664 :param cache_id: 

1665 Name of cache for which the accessor should be cleared. By default 

1666 the named accessor is cleared from all registered caches. By 

1667 default, two caches named ``'default'`` and ``'waveform'`` are 

1668 available. 

1669 :type cache_id: 

1670 str 

1671 

1672 Calling this method clears all references to cache entries held by the 

1673 named accessor. Cache entries are then freed if not referenced by any 

1674 other accessor. 

1675 ''' 

1676 

1677 for cache_ in ( 

1678 self._content_caches.keys() 

1679 if cache_id is None 

1680 else [cache_id]): 

1681 

1682 self._content_caches[cache_].clear_accessor(accessor_id) 

1683 

1684 def get_cache_stats(self, cache_id): 

1685 return self._content_caches[cache_id].get_stats() 

1686 

1687 @filldocs 

1688 def get_stations( 

1689 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1690 model='squirrel'): 

1691 

1692 ''' 

1693 Get stations matching given constraints. 

1694 

1695 %(query_args)s 

1696 

1697 :param model: 

1698 Select object model for returned values: ``'squirrel'`` to get 

1699 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1700 objects with channel information attached. 

1701 :type model: 

1702 str 

1703 

1704 :returns: 

1705 List of :py:class:`pyrocko.squirrel.Station 

1706 <pyrocko.squirrel.model.Station>` objects by default or list of 

1707 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1708 objects if ``model='pyrocko'`` is requested. 

1709 

1710 See :py:meth:`iter_nuts` for details on time span matching. 

1711 ''' 

1712 

1713 if model == 'pyrocko': 

1714 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1715 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1716 args = self._get_selection_args( 

1717 STATION, obj, tmin, tmax, time, codes) 

1718 

1719 nuts = sorted( 

1720 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1721 

1722 return [self.get_content(nut, model=model) for nut in nuts] 

1723 else: 

1724 raise ValueError('Invalid station model: %s' % model) 

1725 

1726 @filldocs 

1727 def get_channels( 

1728 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1729 model='squirrel'): 

1730 

1731 ''' 

1732 Get channels matching given constraints. 

1733 

1734 %(query_args)s 

1735 

1736 :returns: 

1737 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1738 

1739 See :py:meth:`iter_nuts` for details on time span matching. 

1740 ''' 

1741 

1742 args = self._get_selection_args( 

1743 CHANNEL, obj, tmin, tmax, time, codes) 

1744 

1745 nuts = sorted( 

1746 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1747 

1748 return [self.get_content(nut, model=model) for nut in nuts] 

1749 

1750 @filldocs 

1751 def get_sensors( 

1752 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1753 

1754 ''' 

1755 Get sensors matching given constraints. 

1756 

1757 %(query_args)s 

1758 

1759 :returns: 

1760 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1761 

1762 See :py:meth:`iter_nuts` for details on time span matching. 

1763 ''' 

1764 

1765 tmin, tmax, codes = self._get_selection_args( 

1766 CHANNEL, obj, tmin, tmax, time, codes) 

1767 

1768 if codes is not None: 

1769 codes = codes_patterns_list( 

1770 (entry.replace(channel=entry.channel[:-1] + '?') 

1771 if entry != '*' else entry) 

1772 for entry in codes) 

1773 

1774 nuts = sorted( 

1775 self.iter_nuts( 

1776 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1777 

1778 return model.Sensor.from_channels( 

1779 self.get_content(nut) for nut in nuts) 

1780 

1781 @filldocs 

1782 def get_responses( 

1783 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1784 model='squirrel'): 

1785 

1786 ''' 

1787 Get instrument responses matching given constraints. 

1788 

1789 %(query_args)s 

1790 

1791 :returns: 

1792 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1793 

1794 See :py:meth:`iter_nuts` for details on time span matching. 

1795 ''' 

1796 

1797 args = self._get_selection_args( 

1798 RESPONSE, obj, tmin, tmax, time, codes) 

1799 

1800 nuts = sorted( 

1801 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1802 

1803 return [self.get_content(nut, model=model) for nut in nuts] 

1804 

1805 @filldocs 

1806 def get_response( 

1807 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1808 model='squirrel'): 

1809 

1810 ''' 

1811 Get instrument response matching given constraints. 

1812 

1813 %(query_args)s 

1814 

1815 :returns: 

1816 :py:class:`~pyrocko.squirrel.model.Response` object. 

1817 

1818 Same as :py:meth:`get_responses` but returning exactly one response. 

1819 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1820 than one is available. 

1821 

1822 See :py:meth:`iter_nuts` for details on time span matching. 

1823 ''' 

1824 

1825 if model == 'stationxml': 

1826 model_ = 'stationxml+' 

1827 else: 

1828 model_ = model 

1829 

1830 responses = self.get_responses( 

1831 obj, tmin, tmax, time, codes, model=model_) 

1832 if len(responses) == 0: 

1833 raise error.NotAvailable( 

1834 'No instrument response available (%s).' 

1835 % self._get_selection_args_str( 

1836 RESPONSE, obj, tmin, tmax, time, codes)) 

1837 

1838 elif len(responses) > 1: 

1839 if model_ == 'squirrel': 

1840 resps_sq = responses 

1841 elif model_ == 'stationxml+': 

1842 resps_sq = [resp[0] for resp in responses] 

1843 else: 

1844 raise ValueError('Invalid response model: %s' % model) 

1845 

1846 rinfo = ':\n' + '\n'.join( 

1847 ' ' + resp.summary for resp in resps_sq) 

1848 

1849 raise error.NotAvailable( 

1850 'Multiple instrument responses matching given constraints ' 

1851 '(%s)%s' % ( 

1852 self._get_selection_args_str( 

1853 RESPONSE, obj, tmin, tmax, time, codes), rinfo)) 

1854 

1855 if model == 'stationxml': 

1856 return responses[0][1] 

1857 else: 

1858 return responses[0] 

1859 

1860 @filldocs 

1861 def get_events( 

1862 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1863 

1864 ''' 

1865 Get events matching given constraints. 

1866 

1867 %(query_args)s 

1868 

1869 :returns: 

1870 List of :py:class:`~pyrocko.model.event.Event` objects. 

1871 

1872 See :py:meth:`iter_nuts` for details on time span matching. 

1873 ''' 

1874 

1875 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1876 nuts = sorted( 

1877 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1878 

1879 return [self.get_content(nut) for nut in nuts] 

1880 

1881 def _redeem_promises(self, *args): 

1882 

1883 tmin, tmax, _ = args 

1884 

1885 waveforms = list(self.iter_nuts('waveform', *args)) 

1886 promises = list(self.iter_nuts('waveform_promise', *args)) 

1887 

1888 codes_to_avail = defaultdict(list) 

1889 for nut in waveforms: 

1890 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1891 

1892 def tts(x): 

1893 if isinstance(x, tuple): 

1894 return tuple(tts(e) for e in x) 

1895 elif isinstance(x, list): 

1896 return list(tts(e) for e in x) 

1897 else: 

1898 return util.time_to_str(x) 

1899 

1900 orders = [] 

1901 for promise in promises: 

1902 waveforms_avail = codes_to_avail[promise.codes] 

1903 for block_tmin, block_tmax in blocks( 

1904 max(tmin, promise.tmin), 

1905 min(tmax, promise.tmax), 

1906 promise.deltat): 

1907 

1908 orders.append( 

1909 WaveformOrder( 

1910 source_id=promise.file_path, 

1911 codes=promise.codes, 

1912 tmin=block_tmin, 

1913 tmax=block_tmax, 

1914 deltat=promise.deltat, 

1915 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1916 

1917 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1918 

1919 order_keys_noop = set(order_key(order) for order in orders_noop) 

1920 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1921 logger.info( 

1922 'Waveform orders already satisified with cached/local data: ' 

1923 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1924 

1925 source_ids = [] 

1926 sources = {} 

1927 for source in self._sources: 

1928 if isinstance(source, fdsn.FDSNSource): 

1929 source_ids.append(source._source_id) 

1930 sources[source._source_id] = source 

1931 

1932 source_priority = dict( 

1933 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1934 

1935 order_groups = defaultdict(list) 

1936 for order in orders: 

1937 order_groups[order_key(order)].append(order) 

1938 

1939 for k, order_group in order_groups.items(): 

1940 order_group.sort( 

1941 key=lambda order: source_priority[order.source_id]) 

1942 

1943 n_order_groups = len(order_groups) 

1944 

1945 if len(order_groups) != 0 or len(orders) != 0: 

1946 logger.info( 

1947 'Waveform orders standing for download: %i (%i)' 

1948 % (len(order_groups), len(orders))) 

1949 

1950 task = make_task('Waveform orders processed', n_order_groups) 

1951 else: 

1952 task = None 

1953 

1954 def split_promise(order): 

1955 self._split_nuts( 

1956 'waveform_promise', 

1957 order.tmin, order.tmax, 

1958 codes=order.codes, 

1959 path=order.source_id) 

1960 

1961 def release_order_group(order): 

1962 okey = order_key(order) 

1963 for followup in order_groups[okey]: 

1964 split_promise(followup) 

1965 

1966 del order_groups[okey] 

1967 

1968 if task: 

1969 task.update(n_order_groups - len(order_groups)) 

1970 

1971 def noop(order): 

1972 pass 

1973 

1974 def success(order): 

1975 release_order_group(order) 

1976 split_promise(order) 

1977 

1978 def batch_add(paths): 

1979 self.add(paths) 

1980 

1981 calls = queue.Queue() 

1982 

1983 def enqueue(f): 

1984 def wrapper(*args): 

1985 calls.put((f, args)) 

1986 

1987 return wrapper 

1988 

1989 for order in orders_noop: 

1990 split_promise(order) 

1991 

1992 while order_groups: 

1993 

1994 orders_now = [] 

1995 empty = [] 

1996 for k, order_group in order_groups.items(): 

1997 try: 

1998 orders_now.append(order_group.pop(0)) 

1999 except IndexError: 

2000 empty.append(k) 

2001 

2002 for k in empty: 

2003 del order_groups[k] 

2004 

2005 by_source_id = defaultdict(list) 

2006 for order in orders_now: 

2007 by_source_id[order.source_id].append(order) 

2008 

2009 threads = [] 

2010 for source_id in by_source_id: 

2011 def download(): 

2012 try: 

2013 sources[source_id].download_waveforms( 

2014 by_source_id[source_id], 

2015 success=enqueue(success), 

2016 error_permanent=enqueue(split_promise), 

2017 error_temporary=noop, 

2018 batch_add=enqueue(batch_add)) 

2019 

2020 finally: 

2021 calls.put(None) 

2022 

2023 thread = threading.Thread(target=download) 

2024 thread.start() 

2025 threads.append(thread) 

2026 

2027 ndone = 0 

2028 while ndone < len(threads): 

2029 ret = calls.get() 

2030 if ret is None: 

2031 ndone += 1 

2032 else: 

2033 ret[0](*ret[1]) 

2034 

2035 for thread in threads: 

2036 thread.join() 

2037 

2038 if task: 

2039 task.update(n_order_groups - len(order_groups)) 

2040 

2041 if task: 

2042 task.done() 

2043 

2044 @filldocs 

2045 def get_waveform_nuts( 

2046 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2047 

2048 ''' 

2049 Get waveform content entities matching given constraints. 

2050 

2051 %(query_args)s 

2052 

2053 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2054 resolves matching waveform promises (downloads waveforms from remote 

2055 sources). 

2056 

2057 See :py:meth:`iter_nuts` for details on time span matching. 

2058 ''' 

2059 

2060 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2061 self._redeem_promises(*args) 

2062 return sorted( 

2063 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2064 

2065 @filldocs 

2066 def get_waveforms( 

2067 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2068 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2069 maxlap=None, snap=None, include_last=False, load_data=True, 

2070 accessor_id='default', operator_params=None): 

2071 

2072 ''' 

2073 Get waveforms matching given constraints. 

2074 

2075 %(query_args)s 

2076 

2077 :param uncut: 

2078 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2079 and to disable degapping/deoverlapping. Returns untouched traces as 

2080 they are read from file segment. File segments are always read in 

2081 their entirety. 

2082 :type uncut: 

2083 bool 

2084 

2085 :param want_incomplete: 

2086 If ``True``, gappy/incomplete traces are included in the result. 

2087 :type want_incomplete: 

2088 bool 

2089 

2090 :param degap: 

2091 If ``True``, connect traces and remove gaps and overlaps. 

2092 :type degap: 

2093 bool 

2094 

2095 :param maxgap: 

2096 Maximum gap size in samples which is filled with interpolated 

2097 samples when ``degap`` is ``True``. 

2098 :type maxgap: 

2099 int 

2100 

2101 :param maxlap: 

2102 Maximum overlap size in samples which is removed when ``degap`` is 

2103 ``True``. 

2104 :type maxlap: 

2105 int 

2106 

2107 :param snap: 

2108 Rounding functions used when computing sample index from time 

2109 instance, for trace start and trace end, respectively. By default, 

2110 ``(round, round)`` is used. 

2111 :type snap: 

2112 tuple of 2 callables 

2113 

2114 :param include_last: 

2115 If ``True``, add one more sample to the returned traces (the sample 

2116 which would be the first sample of a query with ``tmin`` set to the 

2117 current value of ``tmax``). 

2118 :type include_last: 

2119 bool 

2120 

2121 :param load_data: 

2122 If ``True``, waveform data samples are read from files (or cache). 

2123 If ``False``, meta-information-only traces are returned (dummy 

2124 traces with no data samples). 

2125 :type load_data: 

2126 bool 

2127 

2128 :param accessor_id: 

2129 Name of consumer on who's behalf data is accessed. Used in cache 

2130 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2131 to distinguish different points of extraction for the decision of 

2132 when to release cached waveform data. Should be used when data is 

2133 alternately extracted from more than one region / selection. 

2134 :type accessor_id: 

2135 str 

2136 

2137 See :py:meth:`iter_nuts` for details on time span matching. 

2138 

2139 Loaded data is kept in memory (at least) until 

2140 :py:meth:`clear_accessor` has been called or 

2141 :py:meth:`advance_accessor` has been called two consecutive times 

2142 without data being accessed between the two calls (by this accessor). 

2143 Data may still be further kept in the memory cache if held alive by 

2144 consumers with a different ``accessor_id``. 

2145 ''' 

2146 

2147 tmin, tmax, codes = self._get_selection_args( 

2148 WAVEFORM, obj, tmin, tmax, time, codes) 

2149 

2150 self_tmin, self_tmax = self.get_time_span( 

2151 ['waveform', 'waveform_promise']) 

2152 

2153 if None in (self_tmin, self_tmax): 

2154 logger.warning( 

2155 'No waveforms available.') 

2156 return [] 

2157 

2158 tmin = tmin if tmin is not None else self_tmin 

2159 tmax = tmax if tmax is not None else self_tmax 

2160 

2161 if codes is not None and len(codes) == 1: 

2162 # TODO: fix for multiple / mixed codes 

2163 operator = self.get_operator(codes[0]) 

2164 if operator is not None: 

2165 return operator.get_waveforms( 

2166 self, codes[0], 

2167 tmin=tmin, tmax=tmax, 

2168 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2169 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2170 include_last=include_last, load_data=load_data, 

2171 accessor_id=accessor_id, params=operator_params) 

2172 

2173 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2174 

2175 if load_data: 

2176 traces = [ 

2177 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2178 

2179 else: 

2180 traces = [ 

2181 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2182 

2183 if uncut: 

2184 return traces 

2185 

2186 if snap is None: 

2187 snap = (round, round) 

2188 

2189 chopped = [] 

2190 for tr in traces: 

2191 if not load_data and tr.ydata is not None: 

2192 tr = tr.copy(data=False) 

2193 tr.ydata = None 

2194 

2195 try: 

2196 chopped.append(tr.chop( 

2197 tmin, tmax, 

2198 inplace=False, 

2199 snap=snap, 

2200 include_last=include_last)) 

2201 

2202 except trace.NoData: 

2203 pass 

2204 

2205 processed = self._process_chopped( 

2206 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2207 

2208 return processed 

2209 

2210 @filldocs 

2211 def chopper_waveforms( 

2212 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2213 tinc=None, tpad=0., 

2214 want_incomplete=True, snap_window=False, 

2215 degap=True, maxgap=5, maxlap=None, 

2216 snap=None, include_last=False, load_data=True, 

2217 accessor_id=None, clear_accessor=True, operator_params=None, 

2218 grouping=None): 

2219 

2220 ''' 

2221 Iterate window-wise over waveform archive. 

2222 

2223 %(query_args)s 

2224 

2225 :param tinc: 

2226 Time increment (window shift time) (default uses ``tmax-tmin``). 

2227 :type tinc: 

2228 timestamp 

2229 

2230 :param tpad: 

2231 Padding time appended on either side of the data window (window 

2232 overlap is ``2*tpad``). 

2233 :type tpad: 

2234 timestamp 

2235 

2236 :param want_incomplete: 

2237 If ``True``, gappy/incomplete traces are included in the result. 

2238 :type want_incomplete: 

2239 bool 

2240 

2241 :param snap_window: 

2242 If ``True``, start time windows at multiples of tinc with respect 

2243 to system time zero. 

2244 :type snap_window: 

2245 bool 

2246 

2247 :param degap: 

2248 If ``True``, connect traces and remove gaps and overlaps. 

2249 :type degap: 

2250 bool 

2251 

2252 :param maxgap: 

2253 Maximum gap size in samples which is filled with interpolated 

2254 samples when ``degap`` is ``True``. 

2255 :type maxgap: 

2256 int 

2257 

2258 :param maxlap: 

2259 Maximum overlap size in samples which is removed when ``degap`` is 

2260 ``True``. 

2261 :type maxlap: 

2262 int 

2263 

2264 :param snap: 

2265 Rounding functions used when computing sample index from time 

2266 instance, for trace start and trace end, respectively. By default, 

2267 ``(round, round)`` is used. 

2268 :type snap: 

2269 tuple of 2 callables 

2270 

2271 :param include_last: 

2272 If ``True``, add one more sample to the returned traces (the sample 

2273 which would be the first sample of a query with ``tmin`` set to the 

2274 current value of ``tmax``). 

2275 :type include_last: 

2276 bool 

2277 

2278 :param load_data: 

2279 If ``True``, waveform data samples are read from files (or cache). 

2280 If ``False``, meta-information-only traces are returned (dummy 

2281 traces with no data samples). 

2282 :type load_data: 

2283 bool 

2284 

2285 :param accessor_id: 

2286 Name of consumer on who's behalf data is accessed. Used in cache 

2287 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2288 to distinguish different points of extraction for the decision of 

2289 when to release cached waveform data. Should be used when data is 

2290 alternately extracted from more than one region / selection. 

2291 :type accessor_id: 

2292 str 

2293 

2294 :param clear_accessor: 

2295 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2296 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2297 memory when the generator returns. 

2298 :type clear_accessor: 

2299 bool 

2300 

2301 :param grouping: 

2302 By default, traversal over the data is over time and all matching 

2303 traces of a time window are yielded. Using this option, it is 

2304 possible to traverse the data first by group (e.g. station or 

2305 network) and second by time. This can reduce the number of traces 

2306 in each batch and thus reduce the memory footprint of the process. 

2307 :type grouping: 

2308 :py:class:`~pyrocko.squirrel.operator.Grouping` 

2309 

2310 :yields: 

2311 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2312 extracted time window. 

2313 

2314 See :py:meth:`iter_nuts` for details on time span matching. 

2315 ''' 

2316 

2317 tmin, tmax, codes = self._get_selection_args( 

2318 WAVEFORM, obj, tmin, tmax, time, codes) 

2319 

2320 self_tmin, self_tmax = self.get_time_span( 

2321 ['waveform', 'waveform_promise']) 

2322 

2323 if None in (self_tmin, self_tmax): 

2324 logger.warning( 

2325 'Content has undefined time span. No waveforms and no ' 

2326 'waveform promises?') 

2327 return 

2328 

2329 if snap_window and tinc is not None: 

2330 tmin = tmin if tmin is not None else self_tmin 

2331 tmax = tmax if tmax is not None else self_tmax 

2332 tmin = math.floor(tmin / tinc) * tinc 

2333 tmax = math.ceil(tmax / tinc) * tinc 

2334 else: 

2335 tmin = tmin if tmin is not None else self_tmin + tpad 

2336 tmax = tmax if tmax is not None else self_tmax - tpad 

2337 

2338 tinc = tinc if tinc is not None else tmax - tmin 

2339 

2340 try: 

2341 if accessor_id is None: 

2342 accessor_id = 'chopper%i' % self._n_choppers_active 

2343 

2344 self._n_choppers_active += 1 

2345 

2346 eps = tinc * 1e-6 

2347 if tinc != 0.0: 

2348 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2349 else: 

2350 nwin = 1 

2351 

2352 if grouping is None: 

2353 codes_list = [codes] 

2354 else: 

2355 operator = Operator( 

2356 filter=CodesPatternFilter(codes=codes), 

2357 grouping=grouping) 

2358 

2359 available = set(self.get_codes(kind='waveform')) 

2360 available.update(self.get_codes(kind='waveform_promise')) 

2361 operator.update_mappings(sorted(available), self, tmin, tmax) 

2362 

2363 codes_list = [ 

2364 codes_patterns_list(scl) 

2365 for scl in operator.iter_in_codes()] 

2366 

2367 ngroups = len(codes_list) 

2368 for igroup, scl in enumerate(codes_list): 

2369 for iwin in range(nwin): 

2370 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2371 

2372 chopped = self.get_waveforms( 

2373 tmin=wmin-tpad, 

2374 tmax=wmax+tpad, 

2375 codes=scl, 

2376 snap=snap, 

2377 include_last=include_last, 

2378 load_data=load_data, 

2379 want_incomplete=want_incomplete, 

2380 degap=degap, 

2381 maxgap=maxgap, 

2382 maxlap=maxlap, 

2383 accessor_id=accessor_id, 

2384 operator_params=operator_params) 

2385 

2386 self.advance_accessor(accessor_id) 

2387 

2388 yield Batch( 

2389 tmin=wmin, 

2390 tmax=wmax, 

2391 i=iwin, 

2392 n=nwin, 

2393 igroup=igroup, 

2394 ngroups=ngroups, 

2395 traces=chopped) 

2396 

2397 finally: 

2398 self._n_choppers_active -= 1 

2399 if clear_accessor: 

2400 self.clear_accessor(accessor_id, 'waveform') 

2401 

2402 def _process_chopped( 

2403 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2404 

2405 chopped.sort(key=lambda a: a.full_id) 

2406 if degap: 

2407 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2408 

2409 if not want_incomplete: 

2410 chopped_weeded = [] 

2411 for tr in chopped: 

2412 emin = tr.tmin - tmin 

2413 emax = tr.tmax + tr.deltat - tmax 

2414 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2415 chopped_weeded.append(tr) 

2416 

2417 elif degap: 

2418 if (0. < emin <= 5. * tr.deltat 

2419 and -5. * tr.deltat <= emax < 0.): 

2420 

2421 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2422 chopped_weeded.append(tr) 

2423 

2424 chopped = chopped_weeded 

2425 

2426 return chopped 

2427 

2428 def _get_pyrocko_stations( 

2429 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2430 

2431 from pyrocko import model as pmodel 

2432 

2433 by_nsl = defaultdict(lambda: (list(), list())) 

2434 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2435 sargs = station._get_pyrocko_station_args() 

2436 by_nsl[station.codes.nsl][0].append(sargs) 

2437 

2438 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2439 sargs = channel._get_pyrocko_station_args() 

2440 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2441 sargs_list.append(sargs) 

2442 channels_list.append(channel) 

2443 

2444 pstations = [] 

2445 nsls = list(by_nsl.keys()) 

2446 nsls.sort() 

2447 for nsl in nsls: 

2448 sargs_list, channels_list = by_nsl[nsl] 

2449 sargs = util.consistency_merge( 

2450 [('',) + x for x in sargs_list]) 

2451 

2452 by_c = defaultdict(list) 

2453 for ch in channels_list: 

2454 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2455 

2456 chas = list(by_c.keys()) 

2457 chas.sort() 

2458 pchannels = [] 

2459 for cha in chas: 

2460 list_of_cargs = by_c[cha] 

2461 cargs = util.consistency_merge( 

2462 [('',) + x for x in list_of_cargs]) 

2463 pchannels.append(pmodel.Channel(*cargs)) 

2464 

2465 pstations.append( 

2466 pmodel.Station(*sargs, channels=pchannels)) 

2467 

2468 return pstations 

2469 

2470 @property 

2471 def pile(self): 

2472 

2473 ''' 

2474 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2475 

2476 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2477 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2478 but uses the fluffy power of the Squirrel under the hood. 

2479 

2480 This interface can be used as a drop-in replacement for piles which are 

2481 used in existing scripts and programs for efficient waveform data 

2482 access. The Squirrel-based pile scales better for large datasets. Newer 

2483 scripts should use Squirrel's native methods to avoid the emulation 

2484 overhead. 

2485 ''' 

2486 from . import pile 

2487 

2488 if self._pile is None: 

2489 self._pile = pile.Pile(self) 

2490 

2491 return self._pile 

2492 

2493 def snuffle(self): 

2494 ''' 

2495 Look at dataset in Snuffler. 

2496 ''' 

2497 self.pile.snuffle() 

2498 

2499 def _gather_codes_keys(self, kind, gather, selector): 

2500 return set( 

2501 gather(codes) 

2502 for codes in self.iter_codes(kind) 

2503 if selector is None or selector(codes)) 

2504 

2505 def __str__(self): 

2506 return str(self.get_stats()) 

2507 

2508 def get_coverage( 

2509 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2510 

2511 ''' 

2512 Get coverage information. 

2513 

2514 Get information about strips of gapless data coverage. 

2515 

2516 :param kind: 

2517 Content kind to be queried. 

2518 :type kind: 

2519 str 

2520 

2521 :param tmin: 

2522 Start time of query interval. 

2523 :type tmin: 

2524 timestamp 

2525 

2526 :param tmax: 

2527 End time of query interval. 

2528 :type tmax: 

2529 timestamp 

2530 

2531 :param codes: 

2532 If given, restrict query to given content codes patterns. 

2533 :type codes: 

2534 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2535 objects appropriate for the queried content type, or anything which 

2536 can be converted to such objects. 

2537 

2538 :param limit: 

2539 Limit query to return only up to a given maximum number of entries 

2540 per matching time series (without setting this option, very gappy 

2541 data could cause the query to execute for a very long time). 

2542 :type limit: 

2543 int 

2544 

2545 :returns: 

2546 Information about time spans covered by the requested time series 

2547 data. 

2548 :rtype: 

2549 :py:class:`list` of :py:class:`Coverage` objects 

2550 ''' 

2551 

2552 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2553 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2554 kind_id = to_kind_id(kind) 

2555 

2556 codes_info = list(self._iter_codes_info(kind=kind)) 

2557 

2558 kdata_all = [] 

2559 if codes is None: 

2560 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2561 kdata_all.append( 

2562 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2563 

2564 else: 

2565 for codes_entry in codes: 

2566 pattern = to_codes(kind_id, codes_entry) 

2567 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2568 if model.match_codes(pattern, codes_entry): 

2569 kdata_all.append( 

2570 (pattern, kind_codes_id, codes_entry, deltat)) 

2571 

2572 kind_codes_ids = [x[1] for x in kdata_all] 

2573 

2574 counts_at_tmin = {} 

2575 if tmin is not None: 

2576 for nut in self.iter_nuts( 

2577 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2578 

2579 k = nut.codes, nut.deltat 

2580 if k not in counts_at_tmin: 

2581 counts_at_tmin[k] = 0 

2582 

2583 counts_at_tmin[k] += 1 

2584 

2585 coverages = [] 

2586 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2587 entry = [pattern, codes_entry, deltat, None, None, []] 

2588 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2589 sql = self._sql(''' 

2590 SELECT 

2591 time_seconds, 

2592 time_offset 

2593 FROM %(db)s.%(coverage)s 

2594 WHERE 

2595 kind_codes_id == ? 

2596 ORDER BY 

2597 kind_codes_id ''' + order + ''', 

2598 time_seconds ''' + order + ''', 

2599 time_offset ''' + order + ''' 

2600 LIMIT 1 

2601 ''') 

2602 

2603 for row in self._conn.execute(sql, [kind_codes_id]): 

2604 entry[3+i] = model.tjoin(row[0], row[1]) 

2605 

2606 if None in entry[3:5]: 

2607 continue 

2608 

2609 args = [kind_codes_id] 

2610 

2611 sql_time = '' 

2612 if tmin is not None: 

2613 # intentionally < because (== tmin) is queried from nuts 

2614 sql_time += ' AND ( ? < time_seconds ' \ 

2615 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2616 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2617 

2618 if tmax is not None: 

2619 sql_time += ' AND ( time_seconds < ? ' \ 

2620 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2621 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2622 

2623 sql_limit = '' 

2624 if limit is not None: 

2625 sql_limit = ' LIMIT ?' 

2626 args.append(limit) 

2627 

2628 sql = self._sql(''' 

2629 SELECT 

2630 time_seconds, 

2631 time_offset, 

2632 step 

2633 FROM %(db)s.%(coverage)s 

2634 WHERE 

2635 kind_codes_id == ? 

2636 ''' + sql_time + ''' 

2637 ORDER BY 

2638 kind_codes_id, 

2639 time_seconds, 

2640 time_offset 

2641 ''' + sql_limit) 

2642 

2643 rows = list(self._conn.execute(sql, args)) 

2644 

2645 if limit is not None and len(rows) == limit: 

2646 entry[-1] = None 

2647 else: 

2648 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2649 tlast = None 

2650 if tmin is not None: 

2651 entry[-1].append((tmin, counts)) 

2652 tlast = tmin 

2653 

2654 for row in rows: 

2655 t = model.tjoin(row[0], row[1]) 

2656 counts += row[2] 

2657 entry[-1].append((t, counts)) 

2658 tlast = t 

2659 

2660 if tmax is not None and (tlast is None or tlast != tmax): 

2661 entry[-1].append((tmax, counts)) 

2662 

2663 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2664 

2665 return coverages 

2666 

2667 def get_stationxml( 

2668 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2669 level='response'): 

2670 

2671 ''' 

2672 Get station/channel/response metadata in StationXML representation. 

2673 

2674 %(query_args)s 

2675 

2676 :returns: 

2677 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2678 ''' 

2679 

2680 if level not in ('network', 'station', 'channel', 'response'): 

2681 raise ValueError('Invalid level: %s' % level) 

2682 

2683 tmin, tmax, codes = self._get_selection_args( 

2684 CHANNEL, obj, tmin, tmax, time, codes) 

2685 

2686 filter = CodesPatternFilter(codes=codes) 

2687 

2688 nslcs = list(set( 

2689 codes.nslc for codes in 

2690 filter.filter(self.get_codes(kind='channel')))) 

2691 

2692 from pyrocko.io import stationxml as sx 

2693 

2694 networks = [] 

2695 for net, stas in prefix_tree(nslcs): 

2696 network = sx.Network(code=net) 

2697 networks.append(network) 

2698 

2699 if level not in ('station', 'channel', 'response'): 

2700 continue 

2701 

2702 for sta, locs in stas: 

2703 stations = self.get_stations( 

2704 tmin=tmin, 

2705 tmax=tmax, 

2706 codes=(net, sta, '*'), 

2707 model='stationxml') 

2708 

2709 errors = sx.check_overlaps( 

2710 'Station', (net, sta), stations) 

2711 

2712 if errors: 

2713 raise sx.Inconsistencies( 

2714 'Inconsistencies found:\n %s' 

2715 % '\n '.join(errors)) 

2716 

2717 network.station_list.extend(stations) 

2718 

2719 if level not in ('channel', 'response'): 

2720 continue 

2721 

2722 for loc, chas in locs: 

2723 for cha, _ in chas: 

2724 channels = self.get_channels( 

2725 tmin=tmin, 

2726 tmax=tmax, 

2727 codes=(net, sta, loc, cha), 

2728 model='stationxml') 

2729 

2730 errors = sx.check_overlaps( 

2731 'Channel', (net, sta, loc, cha), channels) 

2732 

2733 if errors: 

2734 raise sx.Inconsistencies( 

2735 'Inconsistencies found:\n %s' 

2736 % '\n '.join(errors)) 

2737 

2738 for channel in channels: 

2739 station = sx.find_containing(stations, channel) 

2740 if station is not None: 

2741 station.channel_list.append(channel) 

2742 else: 

2743 raise sx.Inconsistencies( 

2744 'No station or station epoch found for ' 

2745 'channel: %s' % '.'.join( 

2746 (net, sta, loc, cha))) 

2747 

2748 if level != 'response': 

2749 continue 

2750 

2751 response_sq, response_sx = self.get_response( 

2752 codes=(net, sta, loc, cha), 

2753 tmin=channel.start_date, 

2754 tmax=channel.end_date, 

2755 model='stationxml+') 

2756 

2757 if not ( 

2758 sx.eq_open( 

2759 channel.start_date, response_sq.tmin) 

2760 and sx.eq_open( 

2761 channel.end_date, response_sq.tmax)): 

2762 

2763 raise sx.Inconsistencies( 

2764 'Response time span does not match ' 

2765 'channel time span: %s' % '.'.join( 

2766 (net, sta, loc, cha))) 

2767 

2768 channel.response = response_sx 

2769 

2770 return sx.FDSNStationXML( 

2771 source='Generated by Pyrocko Squirrel.', 

2772 network_list=networks) 

2773 

2774 def get_location_pool(self, tmin=None, tmax=None): 

2775 return model.LocationPool(self, tmin=tmin, tmax=tmax) 

2776 

2777 def add_operator(self, op): 

2778 self._operators.append(op) 

2779 

2780 def update_operator_mappings(self): 

2781 available = self.get_codes(kind=('channel')) 

2782 

2783 for operator in self._operators: 

2784 operator.update_mappings( 

2785 available, self, None, None, self._operator_registry) 

2786 

2787 def iter_operator_mappings(self): 

2788 for operator in self._operators: 

2789 for in_codes, out_codes in operator.iter_mappings(): 

2790 yield operator, in_codes, out_codes 

2791 

2792 def get_operator_mappings(self): 

2793 return list(self.iter_operator_mappings()) 

2794 

2795 def get_operator(self, codes): 

2796 try: 

2797 return self._operator_registry[codes][0] 

2798 except KeyError: 

2799 return None 

2800 

2801 def get_operator_group(self, codes): 

2802 try: 

2803 return self._operator_registry[codes] 

2804 except KeyError: 

2805 return None, (None, None, None) 

2806 

2807 def iter_operator_codes(self): 

2808 for _, _, out_codes in self.iter_operator_mappings(): 

2809 for codes in out_codes: 

2810 yield codes 

2811 

2812 def get_operator_codes(self): 

2813 return list(self.iter_operator_codes()) 

2814 

2815 def print_tables(self, table_names=None, stream=None): 

2816 ''' 

2817 Dump raw database tables in textual form (for debugging purposes). 

2818 

2819 :param table_names: 

2820 Names of tables to be dumped or ``None`` to dump all. 

2821 :type table_names: 

2822 :py:class:`list` of :py:class:`str` 

2823 

2824 :param stream: 

2825 Open file or ``None`` to dump to standard output. 

2826 ''' 

2827 

2828 if stream is None: 

2829 stream = sys.stdout 

2830 

2831 if isinstance(table_names, str): 

2832 table_names = [table_names] 

2833 

2834 if table_names is None: 

2835 table_names = [ 

2836 'selection_file_states', 

2837 'selection_nuts', 

2838 'selection_kind_codes_count', 

2839 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2840 

2841 m = { 

2842 'selection_file_states': '%(db)s.%(file_states)s', 

2843 'selection_nuts': '%(db)s.%(nuts)s', 

2844 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2845 'files': 'files', 

2846 'nuts': 'nuts', 

2847 'kind_codes': 'kind_codes', 

2848 'kind_codes_count': 'kind_codes_count'} 

2849 

2850 for table_name in table_names: 

2851 self._database.print_table( 

2852 m[table_name] % self._names, stream=stream) 

2853 

2854 

2855class SquirrelStats(Object): 

2856 ''' 

2857 Container to hold statistics about contents available from a Squirrel. 

2858 

2859 See also :py:meth:`Squirrel.get_stats`. 

2860 ''' 

2861 

2862 nfiles = Int.T( 

2863 help='Number of files in selection.') 

2864 nnuts = Int.T( 

2865 help='Number of index nuts in selection.') 

2866 codes = List.T( 

2867 Tuple.T(content_t=String.T()), 

2868 help='Available code sequences in selection, e.g. ' 

2869 '(agency, network, station, location) for stations nuts.') 

2870 kinds = List.T( 

2871 String.T(), 

2872 help='Available content types in selection.') 

2873 total_size = Int.T( 

2874 help='Aggregated file size of files is selection.') 

2875 counts = Dict.T( 

2876 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2877 help='Breakdown of how many nuts of any content type and code ' 

2878 'sequence are available in selection, ``counts[kind][codes]``.') 

2879 time_spans = Dict.T( 

2880 String.T(), Tuple.T(content_t=Timestamp.T()), 

2881 help='Time spans by content type.') 

2882 sources = List.T( 

2883 String.T(), 

2884 help='Descriptions of attached sources.') 

2885 operators = List.T( 

2886 String.T(), 

2887 help='Descriptions of attached operators.') 

2888 

2889 def __str__(self): 

2890 kind_counts = dict( 

2891 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2892 

2893 scodes = model.codes_to_str_abbreviated(self.codes) 

2894 

2895 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2896 ' ' + s for s in self.sources) 

2897 

2898 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2899 ' ' + s for s in self.operators) 

2900 

2901 def stime(t): 

2902 return util.tts(t) if t is not None and t not in ( 

2903 model.g_tmin, model.g_tmax) else '<none>' 

2904 

2905 def stable(rows): 

2906 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2907 return '\n'.join( 

2908 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2909 for row in rows) 

2910 

2911 def indent(s): 

2912 return '\n'.join(' '+line for line in s.splitlines()) 

2913 

2914 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2915 kind + ':', 

2916 str(kind_counts[kind]), 

2917 stime(self.time_spans[kind][0]), 

2918 '-', 

2919 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2920 

2921 s = ''' 

2922Number of files: %i 

2923Total size of known files: %s 

2924Number of index nuts: %i 

2925Available content kinds: %s 

2926Available codes: %s 

2927Sources: %s 

2928Operators: %s''' % ( 

2929 self.nfiles, 

2930 util.human_bytesize(self.total_size), 

2931 self.nnuts, 

2932 stspans, scodes, ssources, soperators) 

2933 

2934 return s.lstrip() 

2935 

2936 

2937__all__ = [ 

2938 'Squirrel', 

2939 'SquirrelStats', 

2940]