1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from . import client, environment, error 

29 

30logger = logging.getLogger('psq.base') 

31 

32guts_prefix = 'squirrel' 

33 

34 

35def make_task(*args): 

36 return progress.task(*args, logger=logger) 

37 

38 

39def lpick(condition, seq): 

40 ft = [], [] 

41 for ele in seq: 

42 ft[int(bool(condition(ele)))].append(ele) 

43 

44 return ft 

45 

46 

47# derived list class to enable detection of already preprocessed codes patterns 

48class codes_patterns_list(list): 

49 pass 

50 

51 

52def codes_patterns_for_kind(kind_id, codes): 

53 if isinstance(codes, codes_patterns_list): 

54 return codes 

55 

56 if isinstance(codes, list): 

57 lcodes = codes_patterns_list() 

58 for sc in codes: 

59 lcodes.extend(codes_patterns_for_kind(kind_id, sc)) 

60 

61 return lcodes 

62 

63 codes = to_codes(kind_id, codes) 

64 

65 lcodes = codes_patterns_list() 

66 lcodes.append(codes) 

67 if kind_id == model.STATION: 

68 return lcodes.append(codes.replace(location='[*]')) 

69 

70 return lcodes 

71 

72 

73def blocks(tmin, tmax, deltat, nsamples_block=100000): 

74 tblock = util.to_time_float(deltat * nsamples_block) 

75 iblock_min = int(math.floor(tmin / tblock)) 

76 iblock_max = int(math.ceil(tmax / tblock)) 

77 for iblock in range(iblock_min, iblock_max): 

78 yield iblock * tblock, (iblock+1) * tblock 

79 

80 

81def gaps(avail, tmin, tmax): 

82 assert tmin < tmax 

83 

84 data = [(tmax, 1), (tmin, -1)] 

85 for (tmin_a, tmax_a) in avail: 

86 assert tmin_a < tmax_a 

87 data.append((tmin_a, 1)) 

88 data.append((tmax_a, -1)) 

89 

90 data.sort() 

91 s = 1 

92 gaps = [] 

93 tmin_g = None 

94 for t, x in data: 

95 if s == 1 and x == -1: 

96 tmin_g = t 

97 elif s == 0 and x == 1 and tmin_g is not None: 

98 tmax_g = t 

99 if tmin_g != tmax_g: 

100 gaps.append((tmin_g, tmax_g)) 

101 

102 s += x 

103 

104 return gaps 

105 

106 

107def order_key(order): 

108 return (order.codes, order.tmin, order.tmax) 

109 

110 

111class Batch(object): 

112 ''' 

113 Batch of waveforms from window-wise data extraction. 

114 

115 Encapsulates state and results yielded for each window in window-wise 

116 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

117 

118 *Attributes:* 

119 

120 .. py:attribute:: tmin 

121 

122 Start of this time window. 

123 

124 .. py:attribute:: tmax 

125 

126 End of this time window. 

127 

128 .. py:attribute:: i 

129 

130 Index of this time window in sequence. 

131 

132 .. py:attribute:: n 

133 

134 Total number of time windows in sequence. 

135 

136 .. py:attribute:: traces 

137 

138 Extracted waveforms for this time window. 

139 ''' 

140 

141 def __init__(self, tmin, tmax, i, n, traces): 

142 self.tmin = tmin 

143 self.tmax = tmax 

144 self.i = i 

145 self.n = n 

146 self.traces = traces 

147 

148 

149class Squirrel(Selection): 

150 ''' 

151 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

152 

153 :param env: 

154 Squirrel environment instance or directory path to use as starting 

155 point for its detection. By default, the current directory is used as 

156 starting point. When searching for a usable environment the directory 

157 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

158 directory is used if it exists, otherwise the parent directories are 

159 search upwards for the existence of such a directory. If no such 

160 directory is found, the user's global Squirrel environment 

161 ``'$HOME/.pyrocko/squirrel'`` is used. 

162 :type env: 

163 :py:class:`~pyrocko.squirrel.environment.Environment` or 

164 :py:class:`str` 

165 

166 :param database: 

167 Database instance or path to database. By default the 

168 database found in the detected Squirrel environment is used. 

169 :type database: 

170 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

171 

172 :param cache_path: 

173 Directory path to use for data caching. By default, the ``'cache'`` 

174 directory in the detected Squirrel environment is used. 

175 :type cache_path: 

176 :py:class:`str` 

177 

178 :param persistent: 

179 If given a name, create a persistent selection. 

180 :type persistent: 

181 :py:class:`str` 

182 

183 This is the central class of the Squirrel framework. It provides a unified 

184 interface to query and access seismic waveforms, station meta-data and 

185 event information from local file collections and remote data sources. For 

186 prompt responses, a profound database setup is used under the hood. To 

187 speed up assemblage of ad-hoc data selections, files are indexed on first 

188 use and the extracted meta-data is remembered in the database for 

189 subsequent accesses. Bulk data is lazily loaded from disk and remote 

190 sources, just when requested. Once loaded, data is cached in memory to 

191 expedite typical access patterns. Files and data sources can be dynamically 

192 added to and removed from the Squirrel selection at runtime. 

193 

194 Queries are restricted to the contents of the files currently added to the 

195 Squirrel selection (usually a subset of the file meta-information 

196 collection in the database). This list of files is referred to here as the 

197 "selection". By default, temporary tables are created in the attached 

198 database to hold the names of the files in the selection as well as various 

199 indices and counters. These tables are only visible inside the application 

200 which created them and are deleted when the database connection is closed 

201 or the application exits. To create a selection which is not deleted at 

202 exit, supply a name to the ``persistent`` argument of the Squirrel 

203 constructor. Persistent selections are shared among applications using the 

204 same database. 

205 

206 **Method summary** 

207 

208 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

209 :py:class:`~pyrocko.squirrel.selection.Selection`. 

210 

211 .. autosummary:: 

212 

213 ~Squirrel.add 

214 ~Squirrel.add_source 

215 ~Squirrel.add_fdsn 

216 ~Squirrel.add_catalog 

217 ~Squirrel.add_dataset 

218 ~Squirrel.add_virtual 

219 ~Squirrel.update 

220 ~Squirrel.update_waveform_promises 

221 ~Squirrel.advance_accessor 

222 ~Squirrel.clear_accessor 

223 ~Squirrel.reload 

224 ~pyrocko.squirrel.selection.Selection.iter_paths 

225 ~Squirrel.iter_nuts 

226 ~Squirrel.iter_kinds 

227 ~Squirrel.iter_deltats 

228 ~Squirrel.iter_codes 

229 ~pyrocko.squirrel.selection.Selection.get_paths 

230 ~Squirrel.get_nuts 

231 ~Squirrel.get_kinds 

232 ~Squirrel.get_deltats 

233 ~Squirrel.get_codes 

234 ~Squirrel.get_counts 

235 ~Squirrel.get_time_span 

236 ~Squirrel.get_deltat_span 

237 ~Squirrel.get_nfiles 

238 ~Squirrel.get_nnuts 

239 ~Squirrel.get_total_size 

240 ~Squirrel.get_stats 

241 ~Squirrel.get_content 

242 ~Squirrel.get_stations 

243 ~Squirrel.get_channels 

244 ~Squirrel.get_responses 

245 ~Squirrel.get_events 

246 ~Squirrel.get_waveform_nuts 

247 ~Squirrel.get_waveforms 

248 ~Squirrel.chopper_waveforms 

249 ~Squirrel.get_coverage 

250 ~Squirrel.pile 

251 ~Squirrel.snuffle 

252 ~Squirrel.glob_codes 

253 ~pyrocko.squirrel.selection.Selection.get_database 

254 ~Squirrel.print_tables 

255 ''' 

256 

257 def __init__( 

258 self, env=None, database=None, cache_path=None, persistent=None): 

259 

260 if not isinstance(env, environment.Environment): 

261 env = environment.get_environment(env) 

262 

263 if database is None: 

264 database = env.expand_path(env.database_path) 

265 

266 if cache_path is None: 

267 cache_path = env.expand_path(env.cache_path) 

268 

269 if persistent is None: 

270 persistent = env.persistent 

271 

272 Selection.__init__( 

273 self, database=database, persistent=persistent) 

274 

275 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

276 

277 self._content_caches = { 

278 'waveform': cache.ContentCache(), 

279 'default': cache.ContentCache()} 

280 

281 self._cache_path = cache_path 

282 

283 self._sources = [] 

284 self._operators = [] 

285 self._operator_registry = {} 

286 

287 self._pile = None 

288 self._n_choppers_active = 0 

289 

290 self._names.update({ 

291 'nuts': self.name + '_nuts', 

292 'kind_codes_count': self.name + '_kind_codes_count', 

293 'coverage': self.name + '_coverage'}) 

294 

295 with self.transaction('create tables') as cursor: 

296 self._create_tables_squirrel(cursor) 

297 

298 def _create_tables_squirrel(self, cursor): 

299 

300 cursor.execute(self._register_table(self._sql( 

301 ''' 

302 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

303 nut_id integer PRIMARY KEY, 

304 file_id integer, 

305 file_segment integer, 

306 file_element integer, 

307 kind_id integer, 

308 kind_codes_id integer, 

309 tmin_seconds integer, 

310 tmin_offset integer, 

311 tmax_seconds integer, 

312 tmax_offset integer, 

313 kscale integer) 

314 '''))) 

315 

316 cursor.execute(self._register_table(self._sql( 

317 ''' 

318 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

319 kind_codes_id integer PRIMARY KEY, 

320 count integer) 

321 '''))) 

322 

323 cursor.execute(self._sql( 

324 ''' 

325 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

326 ON %(nuts)s (file_id, file_segment, file_element) 

327 ''')) 

328 

329 cursor.execute(self._sql( 

330 ''' 

331 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

332 ON %(nuts)s (file_id) 

333 ''')) 

334 

335 cursor.execute(self._sql( 

336 ''' 

337 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

338 ON %(nuts)s (kind_id, tmin_seconds) 

339 ''')) 

340 

341 cursor.execute(self._sql( 

342 ''' 

343 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

344 ON %(nuts)s (kind_id, tmax_seconds) 

345 ''')) 

346 

347 cursor.execute(self._sql( 

348 ''' 

349 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

350 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

351 ''')) 

352 

353 cursor.execute(self._sql( 

354 ''' 

355 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

356 BEFORE DELETE ON main.files FOR EACH ROW 

357 BEGIN 

358 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

359 END 

360 ''')) 

361 

362 # trigger only on size to make silent update of mtime possible 

363 cursor.execute(self._sql( 

364 ''' 

365 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

366 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

367 BEGIN 

368 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

369 END 

370 ''')) 

371 

372 cursor.execute(self._sql( 

373 ''' 

374 CREATE TRIGGER IF NOT EXISTS 

375 %(db)s.%(file_states)s_delete_files 

376 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

377 BEGIN 

378 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

379 END 

380 ''')) 

381 

382 cursor.execute(self._sql( 

383 ''' 

384 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

385 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

386 BEGIN 

387 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

388 (new.kind_codes_id, 0); 

389 UPDATE %(kind_codes_count)s 

390 SET count = count + 1 

391 WHERE new.kind_codes_id 

392 == %(kind_codes_count)s.kind_codes_id; 

393 END 

394 ''')) 

395 

396 cursor.execute(self._sql( 

397 ''' 

398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

399 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

400 BEGIN 

401 UPDATE %(kind_codes_count)s 

402 SET count = count - 1 

403 WHERE old.kind_codes_id 

404 == %(kind_codes_count)s.kind_codes_id; 

405 END 

406 ''')) 

407 

408 cursor.execute(self._register_table(self._sql( 

409 ''' 

410 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

411 kind_codes_id integer, 

412 time_seconds integer, 

413 time_offset integer, 

414 step integer) 

415 '''))) 

416 

417 cursor.execute(self._sql( 

418 ''' 

419 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

420 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

421 ''')) 

422 

423 cursor.execute(self._sql( 

424 ''' 

425 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

426 AFTER INSERT ON %(nuts)s FOR EACH ROW 

427 BEGIN 

428 INSERT OR IGNORE INTO %(coverage)s VALUES 

429 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

430 ; 

431 UPDATE %(coverage)s 

432 SET step = step + 1 

433 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

434 AND new.tmin_seconds == %(coverage)s.time_seconds 

435 AND new.tmin_offset == %(coverage)s.time_offset 

436 ; 

437 INSERT OR IGNORE INTO %(coverage)s VALUES 

438 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

439 ; 

440 UPDATE %(coverage)s 

441 SET step = step - 1 

442 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

443 AND new.tmax_seconds == %(coverage)s.time_seconds 

444 AND new.tmax_offset == %(coverage)s.time_offset 

445 ; 

446 DELETE FROM %(coverage)s 

447 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

448 AND new.tmin_seconds == %(coverage)s.time_seconds 

449 AND new.tmin_offset == %(coverage)s.time_offset 

450 AND step == 0 

451 ; 

452 DELETE FROM %(coverage)s 

453 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

454 AND new.tmax_seconds == %(coverage)s.time_seconds 

455 AND new.tmax_offset == %(coverage)s.time_offset 

456 AND step == 0 

457 ; 

458 END 

459 ''')) 

460 

461 cursor.execute(self._sql( 

462 ''' 

463 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

464 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

465 BEGIN 

466 INSERT OR IGNORE INTO %(coverage)s VALUES 

467 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

468 ; 

469 UPDATE %(coverage)s 

470 SET step = step - 1 

471 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

472 AND old.tmin_seconds == %(coverage)s.time_seconds 

473 AND old.tmin_offset == %(coverage)s.time_offset 

474 ; 

475 INSERT OR IGNORE INTO %(coverage)s VALUES 

476 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

477 ; 

478 UPDATE %(coverage)s 

479 SET step = step + 1 

480 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

481 AND old.tmax_seconds == %(coverage)s.time_seconds 

482 AND old.tmax_offset == %(coverage)s.time_offset 

483 ; 

484 DELETE FROM %(coverage)s 

485 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

486 AND old.tmin_seconds == %(coverage)s.time_seconds 

487 AND old.tmin_offset == %(coverage)s.time_offset 

488 AND step == 0 

489 ; 

490 DELETE FROM %(coverage)s 

491 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

492 AND old.tmax_seconds == %(coverage)s.time_seconds 

493 AND old.tmax_offset == %(coverage)s.time_offset 

494 AND step == 0 

495 ; 

496 END 

497 ''')) 

498 

499 def _delete(self): 

500 '''Delete database tables associated with this Squirrel.''' 

501 

502 with self.transaction('delete tables') as cursor: 

503 for s in ''' 

504 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

505 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

506 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

507 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

508 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

509 DROP TABLE %(db)s.%(nuts)s; 

510 DROP TABLE %(db)s.%(kind_codes_count)s; 

511 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

512 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

513 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

514 '''.strip().splitlines(): 

515 

516 cursor.execute(self._sql(s)) 

517 

518 Selection._delete(self) 

519 

520 @filldocs 

521 def add(self, 

522 paths, 

523 kinds=None, 

524 format='detect', 

525 include=None, 

526 exclude=None, 

527 check=True): 

528 

529 ''' 

530 Add files to the selection. 

531 

532 :param paths: 

533 Iterator yielding paths to files or directories to be added to the 

534 selection. Recurses into directories. If given a ``str``, it 

535 is treated as a single path to be added. 

536 :type paths: 

537 :py:class:`list` of :py:class:`str` 

538 

539 :param kinds: 

540 Content types to be made available through the Squirrel selection. 

541 By default, all known content types are accepted. 

542 :type kinds: 

543 :py:class:`list` of :py:class:`str` 

544 

545 :param format: 

546 File format identifier or ``'detect'`` to enable auto-detection 

547 (available: %(file_formats)s). 

548 :type format: 

549 str 

550 

551 :param include: 

552 If not ``None``, files are only included if their paths match the 

553 given regular expression pattern. 

554 :type format: 

555 str 

556 

557 :param exclude: 

558 If not ``None``, files are only included if their paths do not 

559 match the given regular expression pattern. 

560 :type format: 

561 str 

562 

563 :param check: 

564 If ``True``, all file modification times are checked to see if 

565 cached information has to be updated (slow). If ``False``, only 

566 previously unknown files are indexed and cached information is used 

567 for known files, regardless of file state (fast, corrresponds to 

568 Squirrel's ``--optimistic`` mode). File deletions will go 

569 undetected in the latter case. 

570 :type check: 

571 bool 

572 

573 :Complexity: 

574 O(log N) 

575 ''' 

576 

577 if isinstance(kinds, str): 

578 kinds = (kinds,) 

579 

580 if isinstance(paths, str): 

581 paths = [paths] 

582 

583 kind_mask = model.to_kind_mask(kinds) 

584 

585 with progress.view(): 

586 Selection.add( 

587 self, util.iter_select_files( 

588 paths, 

589 show_progress=False, 

590 include=include, 

591 exclude=exclude, 

592 pass_through=lambda path: path.startswith('virtual:') 

593 ), kind_mask, format) 

594 

595 self._load(check) 

596 self._update_nuts() 

597 

598 def reload(self): 

599 ''' 

600 Check for modifications and reindex modified files. 

601 

602 Based on file modification times. 

603 ''' 

604 

605 self._set_file_states_force_check() 

606 self._load(check=True) 

607 self._update_nuts() 

608 

609 def add_virtual(self, nuts, virtual_paths=None): 

610 ''' 

611 Add content which is not backed by files. 

612 

613 :param nuts: 

614 Content pieces to be added. 

615 :type nuts: 

616 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

617 

618 :param virtual_paths: 

619 List of virtual paths to prevent creating a temporary list of the 

620 nuts while aggregating the file paths for the selection. 

621 :type virtual_paths: 

622 :py:class:`list` of :py:class:`str` 

623 

624 Stores to the main database and the selection. 

625 ''' 

626 

627 if isinstance(virtual_paths, str): 

628 virtual_paths = [virtual_paths] 

629 

630 if virtual_paths is None: 

631 if not isinstance(nuts, list): 

632 nuts = list(nuts) 

633 virtual_paths = set(nut.file_path for nut in nuts) 

634 

635 Selection.add(self, virtual_paths) 

636 self.get_database().dig(nuts) 

637 self._update_nuts() 

638 

639 def add_volatile(self, nuts): 

640 if not isinstance(nuts, list): 

641 nuts = list(nuts) 

642 

643 paths = list(set(nut.file_path for nut in nuts)) 

644 io.backends.virtual.add_nuts(nuts) 

645 self.add_virtual(nuts, paths) 

646 self._volatile_paths.extend(paths) 

647 

648 def add_volatile_waveforms(self, traces): 

649 ''' 

650 Add in-memory waveforms which will be removed when the app closes. 

651 ''' 

652 

653 name = model.random_name() 

654 

655 path = 'virtual:volatile:%s' % name 

656 

657 nuts = [] 

658 for itr, tr in enumerate(traces): 

659 assert tr.tmin <= tr.tmax 

660 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

661 tmax_seconds, tmax_offset = model.tsplit( 

662 tr.tmin + tr.data_len()*tr.deltat) 

663 

664 nuts.append(model.Nut( 

665 file_path=path, 

666 file_format='virtual', 

667 file_segment=itr, 

668 file_element=0, 

669 file_mtime=0, 

670 codes=tr.codes, 

671 tmin_seconds=tmin_seconds, 

672 tmin_offset=tmin_offset, 

673 tmax_seconds=tmax_seconds, 

674 tmax_offset=tmax_offset, 

675 deltat=tr.deltat, 

676 kind_id=to_kind_id('waveform'), 

677 content=tr)) 

678 

679 self.add_volatile(nuts) 

680 return path 

681 

682 def _load(self, check): 

683 for _ in io.iload( 

684 self, 

685 content=[], 

686 skip_unchanged=True, 

687 check=check): 

688 pass 

689 

690 def _update_nuts(self, transaction=None): 

691 transaction = transaction or self.transaction('update nuts') 

692 with make_task('Aggregating selection') as task, \ 

693 transaction as cursor: 

694 

695 self._conn.set_progress_handler(task.update, 100000) 

696 nrows = cursor.execute(self._sql( 

697 ''' 

698 INSERT INTO %(db)s.%(nuts)s 

699 SELECT NULL, 

700 nuts.file_id, nuts.file_segment, nuts.file_element, 

701 nuts.kind_id, nuts.kind_codes_id, 

702 nuts.tmin_seconds, nuts.tmin_offset, 

703 nuts.tmax_seconds, nuts.tmax_offset, 

704 nuts.kscale 

705 FROM %(db)s.%(file_states)s 

706 INNER JOIN nuts 

707 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

708 INNER JOIN kind_codes 

709 ON nuts.kind_codes_id == 

710 kind_codes.kind_codes_id 

711 WHERE %(db)s.%(file_states)s.file_state != 2 

712 AND (((1 << kind_codes.kind_id) 

713 & %(db)s.%(file_states)s.kind_mask) != 0) 

714 ''')).rowcount 

715 

716 task.update(nrows) 

717 self._set_file_states_known(transaction) 

718 self._conn.set_progress_handler(None, 0) 

719 

720 def add_source(self, source, check=True): 

721 ''' 

722 Add remote resource. 

723 

724 :param source: 

725 Remote data access client instance. 

726 :type source: 

727 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

728 ''' 

729 

730 self._sources.append(source) 

731 source.setup(self, check=check) 

732 

733 def add_fdsn(self, *args, **kwargs): 

734 ''' 

735 Add FDSN site for transparent remote data access. 

736 

737 Arguments are passed to 

738 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

739 ''' 

740 

741 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

742 

743 def add_catalog(self, *args, **kwargs): 

744 ''' 

745 Add online catalog for transparent event data access. 

746 

747 Arguments are passed to 

748 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

749 ''' 

750 

751 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

752 

753 def add_dataset(self, ds, check=True, warn_persistent=True): 

754 ''' 

755 Read dataset description from file and add its contents. 

756 

757 :param ds: 

758 Path to dataset description file or dataset description object 

759 . See :py:mod:`~pyrocko.squirrel.dataset`. 

760 :type ds: 

761 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

762 

763 :param check: 

764 If ``True``, all file modification times are checked to see if 

765 cached information has to be updated (slow). If ``False``, only 

766 previously unknown files are indexed and cached information is used 

767 for known files, regardless of file state (fast, corrresponds to 

768 Squirrel's ``--optimistic`` mode). File deletions will go 

769 undetected in the latter case. 

770 :type check: 

771 bool 

772 ''' 

773 if isinstance(ds, str): 

774 ds = dataset.read_dataset(ds) 

775 path = ds 

776 else: 

777 path = None 

778 

779 if warn_persistent and ds.persistent and ( 

780 not self._persistent or (self._persistent != ds.persistent)): 

781 

782 logger.warning( 

783 'Dataset `persistent` flag ignored. Can not be set on already ' 

784 'existing Squirrel instance.%s' % ( 

785 ' Dataset: %s' % path if path else '')) 

786 

787 ds.setup(self, check=check) 

788 

789 def _get_selection_args( 

790 self, kind_id, 

791 obj=None, tmin=None, tmax=None, time=None, codes=None): 

792 

793 if codes is not None: 

794 codes = codes_patterns_for_kind(kind_id, codes) 

795 

796 if time is not None: 

797 tmin = time 

798 tmax = time 

799 

800 if obj is not None: 

801 tmin = tmin if tmin is not None else obj.tmin 

802 tmax = tmax if tmax is not None else obj.tmax 

803 codes = codes if codes is not None else codes_patterns_for_kind( 

804 kind_id, obj.codes) 

805 

806 return tmin, tmax, codes 

807 

808 def _get_selection_args_str(self, *args, **kwargs): 

809 

810 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

811 return 'tmin: %s, tmax: %s, codes: %s' % ( 

812 util.time_to_str(tmin) if tmin is not None else 'none', 

813 util.time_to_str(tmax) if tmin is not None else 'none', 

814 ','.join(str(entry) for entry in codes)) 

815 

816 def _selection_args_to_kwargs( 

817 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

818 

819 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

820 

821 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

822 

823 tmin_seconds, tmin_offset = model.tsplit(tmin) 

824 tmax_seconds, tmax_offset = model.tsplit(tmax) 

825 if naiv: 

826 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

827 args.append(tmax_seconds) 

828 else: 

829 tscale_edges = model.tscale_edges 

830 tmin_cond = [] 

831 for kscale in range(tscale_edges.size + 1): 

832 if kscale != tscale_edges.size: 

833 tscale = int(tscale_edges[kscale]) 

834 tmin_cond.append(''' 

835 (%(db)s.%(nuts)s.kind_id = ? 

836 AND %(db)s.%(nuts)s.kscale == ? 

837 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

838 ''') 

839 args.extend( 

840 (to_kind_id(kind), kscale, 

841 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

842 

843 else: 

844 tmin_cond.append(''' 

845 (%(db)s.%(nuts)s.kind_id == ? 

846 AND %(db)s.%(nuts)s.kscale == ? 

847 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

848 ''') 

849 

850 args.extend( 

851 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

852 if tmin_cond: 

853 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

854 

855 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

856 args.append(tmin_seconds) 

857 

858 def iter_nuts( 

859 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

860 kind_codes_ids=None, path=None): 

861 

862 ''' 

863 Iterate over content entities matching given constraints. 

864 

865 :param kind: 

866 Content kind (or kinds) to extract. 

867 :type kind: 

868 :py:class:`str`, :py:class:`list` of :py:class:`str` 

869 

870 :param tmin: 

871 Start time of query interval. 

872 :type tmin: 

873 timestamp 

874 

875 :param tmax: 

876 End time of query interval. 

877 :type tmax: 

878 timestamp 

879 

880 :param codes: 

881 List of code patterns to query. 

882 :type codes: 

883 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

884 objects appropriate for the queried content type, or anything which 

885 can be converted to such objects. 

886 

887 :param naiv: 

888 Bypass time span lookup through indices (slow, for testing). 

889 :type naiv: 

890 :py:class:`bool` 

891 

892 :param kind_codes_ids: 

893 Kind-codes IDs of contents to be retrieved (internal use). 

894 :type kind_codes_ids: 

895 :py:class:`list` of :py:class:`int` 

896 

897 :yields: 

898 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

899 intersecting content. 

900 

901 :complexity: 

902 O(log N) for the time selection part due to heavy use of database 

903 indices. 

904 

905 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

906 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

907 closed-interval so that content intersecting with the time instant ``t 

908 = tmin = tmax`` is returned (otherwise nothing would be returned as 

909 ``[t, t)`` never matches anything). 

910 

911 Time spans of content entities to be matched are also treated as half 

912 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

913 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

914 modified to closed-interval when the content time span is an empty 

915 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

916 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

917 ''' 

918 

919 if not isinstance(kind, str): 

920 if kind is None: 

921 kind = model.g_content_kinds 

922 for kind_ in kind: 

923 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

924 yield nut 

925 

926 return 

927 

928 kind_id = to_kind_id(kind) 

929 

930 cond = [] 

931 args = [] 

932 if tmin is not None or tmax is not None: 

933 assert kind is not None 

934 if tmin is None: 

935 tmin = self.get_time_span()[0] 

936 if tmax is None: 

937 tmax = self.get_time_span()[1] + 1.0 

938 

939 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

940 

941 cond.append('kind_codes.kind_id == ?') 

942 args.append(kind_id) 

943 

944 if codes is not None: 

945 pats = codes_patterns_for_kind(kind_id, codes) 

946 

947 if pats: 

948 # could optimize this by using IN for non-patterns 

949 cond.append( 

950 ' ( %s ) ' % ' OR '.join( 

951 ('kind_codes.codes GLOB ?',) * len(pats))) 

952 args.extend(pat.safe_str for pat in pats) 

953 

954 if kind_codes_ids is not None: 

955 cond.append( 

956 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

957 '?'*len(kind_codes_ids))) 

958 

959 args.extend(kind_codes_ids) 

960 

961 db = self.get_database() 

962 if path is not None: 

963 cond.append('files.path == ?') 

964 args.append(db.relpath(abspath(path))) 

965 

966 sql = (''' 

967 SELECT 

968 files.path, 

969 files.format, 

970 files.mtime, 

971 files.size, 

972 %(db)s.%(nuts)s.file_segment, 

973 %(db)s.%(nuts)s.file_element, 

974 kind_codes.kind_id, 

975 kind_codes.codes, 

976 %(db)s.%(nuts)s.tmin_seconds, 

977 %(db)s.%(nuts)s.tmin_offset, 

978 %(db)s.%(nuts)s.tmax_seconds, 

979 %(db)s.%(nuts)s.tmax_offset, 

980 kind_codes.deltat 

981 FROM files 

982 INNER JOIN %(db)s.%(nuts)s 

983 ON files.file_id == %(db)s.%(nuts)s.file_id 

984 INNER JOIN kind_codes 

985 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

986 ''') 

987 

988 if cond: 

989 sql += ''' WHERE ''' + ' AND '.join(cond) 

990 

991 sql = self._sql(sql) 

992 if tmin is None and tmax is None: 

993 for row in self._conn.execute(sql, args): 

994 row = (db.abspath(row[0]),) + row[1:] 

995 nut = model.Nut(values_nocheck=row) 

996 yield nut 

997 else: 

998 assert tmin is not None and tmax is not None 

999 if tmin == tmax: 

1000 for row in self._conn.execute(sql, args): 

1001 row = (db.abspath(row[0]),) + row[1:] 

1002 nut = model.Nut(values_nocheck=row) 

1003 if (nut.tmin <= tmin < nut.tmax) \ 

1004 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1005 

1006 yield nut 

1007 else: 

1008 for row in self._conn.execute(sql, args): 

1009 row = (db.abspath(row[0]),) + row[1:] 

1010 nut = model.Nut(values_nocheck=row) 

1011 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1012 or (nut.tmin == nut.tmax 

1013 and tmin <= nut.tmin < tmax): 

1014 

1015 yield nut 

1016 

1017 def get_nuts(self, *args, **kwargs): 

1018 ''' 

1019 Get content entities matching given constraints. 

1020 

1021 Like :py:meth:`iter_nuts` but returns results as a list. 

1022 ''' 

1023 

1024 return list(self.iter_nuts(*args, **kwargs)) 

1025 

1026 def _split_nuts( 

1027 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1028 

1029 kind_id = to_kind_id(kind) 

1030 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1031 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1032 

1033 names_main_nuts = dict(self._names) 

1034 names_main_nuts.update(db='main', nuts='nuts') 

1035 

1036 db = self.get_database() 

1037 

1038 def main_nuts(s): 

1039 return s % names_main_nuts 

1040 

1041 with self.transaction('split nuts') as cursor: 

1042 # modify selection and main 

1043 for sql_subst in [ 

1044 self._sql, main_nuts]: 

1045 

1046 cond = [] 

1047 args = [] 

1048 

1049 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1050 

1051 if codes is not None: 

1052 pats = codes_patterns_for_kind(kind_id, codes) 

1053 if pats: 

1054 cond.append( 

1055 ' ( %s ) ' % ' OR '.join( 

1056 ('kind_codes.codes GLOB ?',) * len(pats))) 

1057 args.extend(pat.safe_str for pat in pats) 

1058 

1059 if path is not None: 

1060 cond.append('files.path == ?') 

1061 args.append(db.relpath(abspath(path))) 

1062 

1063 sql = sql_subst(''' 

1064 SELECT 

1065 %(db)s.%(nuts)s.nut_id, 

1066 %(db)s.%(nuts)s.tmin_seconds, 

1067 %(db)s.%(nuts)s.tmin_offset, 

1068 %(db)s.%(nuts)s.tmax_seconds, 

1069 %(db)s.%(nuts)s.tmax_offset, 

1070 kind_codes.deltat 

1071 FROM files 

1072 INNER JOIN %(db)s.%(nuts)s 

1073 ON files.file_id == %(db)s.%(nuts)s.file_id 

1074 INNER JOIN kind_codes 

1075 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1076 WHERE ''' + ' AND '.join(cond)) # noqa 

1077 

1078 insert = [] 

1079 delete = [] 

1080 for row in cursor.execute(sql, args): 

1081 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1082 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1083 

1084 nut_tmin = model.tjoin( 

1085 nut_tmin_seconds, nut_tmin_offset) 

1086 nut_tmax = model.tjoin( 

1087 nut_tmax_seconds, nut_tmax_offset) 

1088 

1089 if nut_tmin < tmax and tmin < nut_tmax: 

1090 if nut_tmin < tmin: 

1091 insert.append(( 

1092 nut_tmin_seconds, nut_tmin_offset, 

1093 tmin_seconds, tmin_offset, 

1094 model.tscale_to_kscale( 

1095 tmin_seconds - nut_tmin_seconds), 

1096 nut_id)) 

1097 

1098 if tmax < nut_tmax: 

1099 insert.append(( 

1100 tmax_seconds, tmax_offset, 

1101 nut_tmax_seconds, nut_tmax_offset, 

1102 model.tscale_to_kscale( 

1103 nut_tmax_seconds - tmax_seconds), 

1104 nut_id)) 

1105 

1106 delete.append((nut_id,)) 

1107 

1108 sql_add = ''' 

1109 INSERT INTO %(db)s.%(nuts)s ( 

1110 file_id, file_segment, file_element, kind_id, 

1111 kind_codes_id, tmin_seconds, tmin_offset, 

1112 tmax_seconds, tmax_offset, kscale ) 

1113 SELECT 

1114 file_id, file_segment, file_element, 

1115 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1116 FROM %(db)s.%(nuts)s 

1117 WHERE nut_id == ? 

1118 ''' 

1119 cursor.executemany(sql_subst(sql_add), insert) 

1120 

1121 sql_delete = ''' 

1122 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1123 ''' 

1124 cursor.executemany(sql_subst(sql_delete), delete) 

1125 

1126 def get_time_span(self, kinds=None): 

1127 ''' 

1128 Get time interval over all content in selection. 

1129 

1130 :param kinds: 

1131 If not ``None``, restrict query to given content kinds. 

1132 :type kind: 

1133 list of str 

1134 

1135 :complexity: 

1136 O(1), independent of the number of nuts. 

1137 

1138 :returns: 

1139 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1140 ''' 

1141 

1142 sql_min = self._sql(''' 

1143 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1144 FROM %(db)s.%(nuts)s 

1145 WHERE kind_id == ? 

1146 AND tmin_seconds == ( 

1147 SELECT MIN(tmin_seconds) 

1148 FROM %(db)s.%(nuts)s 

1149 WHERE kind_id == ?) 

1150 ''') 

1151 

1152 sql_max = self._sql(''' 

1153 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1154 FROM %(db)s.%(nuts)s 

1155 WHERE kind_id == ? 

1156 AND tmax_seconds == ( 

1157 SELECT MAX(tmax_seconds) 

1158 FROM %(db)s.%(nuts)s 

1159 WHERE kind_id == ?) 

1160 ''') 

1161 

1162 gtmin = None 

1163 gtmax = None 

1164 

1165 if isinstance(kinds, str): 

1166 kinds = [kinds] 

1167 

1168 if kinds is None: 

1169 kind_ids = model.g_content_kind_ids 

1170 else: 

1171 kind_ids = model.to_kind_ids(kinds) 

1172 

1173 for kind_id in kind_ids: 

1174 for tmin_seconds, tmin_offset in self._conn.execute( 

1175 sql_min, (kind_id, kind_id)): 

1176 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1177 if tmin is not None and (gtmin is None or tmin < gtmin): 

1178 gtmin = tmin 

1179 

1180 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1181 sql_max, (kind_id, kind_id)): 

1182 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1183 if tmax is not None and (gtmax is None or tmax > gtmax): 

1184 gtmax = tmax 

1185 

1186 return gtmin, gtmax 

1187 

1188 def has(self, kinds): 

1189 ''' 

1190 Check availability of given content kinds. 

1191 

1192 :param kinds: 

1193 Content kinds to query. 

1194 :type kind: 

1195 list of str 

1196 

1197 :returns: 

1198 ``True`` if any of the queried content kinds is available 

1199 in the selection. 

1200 ''' 

1201 self_tmin, self_tmax = self.get_time_span(kinds) 

1202 

1203 return None not in (self_tmin, self_tmax) 

1204 

1205 def get_deltat_span(self, kind): 

1206 ''' 

1207 Get min and max sampling interval of all content of given kind. 

1208 

1209 :param kind: 

1210 Content kind 

1211 :type kind: 

1212 str 

1213 

1214 :returns: ``(deltat_min, deltat_max)`` 

1215 ''' 

1216 

1217 deltats = [ 

1218 deltat for deltat in self.get_deltats(kind) 

1219 if deltat is not None] 

1220 

1221 if deltats: 

1222 return min(deltats), max(deltats) 

1223 else: 

1224 return None, None 

1225 

1226 def iter_kinds(self, codes=None): 

1227 ''' 

1228 Iterate over content types available in selection. 

1229 

1230 :param codes: 

1231 If given, get kinds only for selected codes identifier. 

1232 Only a single identifier may be given here and no pattern matching 

1233 is done, currently. 

1234 :type codes: 

1235 :py:class:`~pyrocko.squirrel.model.Codes` 

1236 

1237 :yields: 

1238 Available content kinds as :py:class:`str`. 

1239 

1240 :complexity: 

1241 O(1), independent of number of nuts. 

1242 ''' 

1243 

1244 return self._database._iter_kinds( 

1245 codes=codes, 

1246 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1247 

1248 def iter_deltats(self, kind=None): 

1249 ''' 

1250 Iterate over sampling intervals available in selection. 

1251 

1252 :param kind: 

1253 If given, get sampling intervals only for a given content type. 

1254 :type kind: 

1255 str 

1256 

1257 :yields: 

1258 :py:class:`float` values. 

1259 

1260 :complexity: 

1261 O(1), independent of number of nuts. 

1262 ''' 

1263 return self._database._iter_deltats( 

1264 kind=kind, 

1265 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1266 

1267 def iter_codes(self, kind=None): 

1268 ''' 

1269 Iterate over content identifier code sequences available in selection. 

1270 

1271 :param kind: 

1272 If given, get codes only for a given content type. 

1273 :type kind: 

1274 str 

1275 

1276 :yields: 

1277 :py:class:`tuple` of :py:class:`str` 

1278 

1279 :complexity: 

1280 O(1), independent of number of nuts. 

1281 ''' 

1282 return self._database._iter_codes( 

1283 kind=kind, 

1284 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1285 

1286 def _iter_codes_info(self, kind=None): 

1287 ''' 

1288 Iterate over number of occurrences of any (kind, codes) combination. 

1289 

1290 :param kind: 

1291 If given, get counts only for selected content type. 

1292 :type kind: 

1293 str 

1294 

1295 :yields: 

1296 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1297 

1298 :complexity: 

1299 O(1), independent of number of nuts. 

1300 ''' 

1301 return self._database._iter_codes_info( 

1302 kind=kind, 

1303 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1304 

1305 def get_kinds(self, codes=None): 

1306 ''' 

1307 Get content types available in selection. 

1308 

1309 :param codes: 

1310 If given, get kinds only for selected codes identifier. 

1311 Only a single identifier may be given here and no pattern matching 

1312 is done, currently. 

1313 :type codes: 

1314 :py:class:`~pyrocko.squirrel.model.Codes` 

1315 

1316 :returns: 

1317 Sorted list of available content types. 

1318 :rtype: 

1319 py:class:`list` of :py:class:`str` 

1320 

1321 :complexity: 

1322 O(1), independent of number of nuts. 

1323 

1324 ''' 

1325 return sorted(list(self.iter_kinds(codes=codes))) 

1326 

1327 def get_deltats(self, kind=None): 

1328 ''' 

1329 Get sampling intervals available in selection. 

1330 

1331 :param kind: 

1332 If given, get sampling intervals only for selected content type. 

1333 :type kind: 

1334 str 

1335 

1336 :complexity: 

1337 O(1), independent of number of nuts. 

1338 

1339 :returns: Sorted list of available sampling intervals. 

1340 ''' 

1341 return sorted(list(self.iter_deltats(kind=kind))) 

1342 

1343 def get_codes(self, kind=None): 

1344 ''' 

1345 Get identifier code sequences available in selection. 

1346 

1347 :param kind: 

1348 If given, get codes only for selected content type. 

1349 :type kind: 

1350 str 

1351 

1352 :complexity: 

1353 O(1), independent of number of nuts. 

1354 

1355 :returns: Sorted list of available codes as tuples of strings. 

1356 ''' 

1357 return sorted(list(self.iter_codes(kind=kind))) 

1358 

1359 def get_counts(self, kind=None): 

1360 ''' 

1361 Get number of occurrences of any (kind, codes) combination. 

1362 

1363 :param kind: 

1364 If given, get codes only for selected content type. 

1365 :type kind: 

1366 str 

1367 

1368 :complexity: 

1369 O(1), independent of number of nuts. 

1370 

1371 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1372 if kind is not ``None`` 

1373 ''' 

1374 d = {} 

1375 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1376 if kind_id not in d: 

1377 v = d[kind_id] = {} 

1378 else: 

1379 v = d[kind_id] 

1380 

1381 if codes not in v: 

1382 v[codes] = 0 

1383 

1384 v[codes] += count 

1385 

1386 if kind is not None: 

1387 return d[to_kind_id(kind)] 

1388 else: 

1389 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1390 

1391 def glob_codes(self, kind, codes): 

1392 ''' 

1393 Find codes matching given patterns. 

1394 

1395 :param kind: 

1396 Content kind to be queried. 

1397 :type kind: 

1398 str 

1399 

1400 :param codes: 

1401 List of code patterns to query. 

1402 :type codes: 

1403 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1404 objects appropriate for the queried content type, or anything which 

1405 can be converted to such objects. 

1406 

1407 :returns: 

1408 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1409 ''' 

1410 

1411 kind_id = to_kind_id(kind) 

1412 args = [kind_id] 

1413 pats = codes_patterns_for_kind(kind_id, codes) 

1414 

1415 if pats: 

1416 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1417 ('kind_codes.codes GLOB ?',) * len(pats)) 

1418 

1419 args.extend(pat.safe_str for pat in pats) 

1420 else: 

1421 codes_cond = '' 

1422 

1423 sql = self._sql(''' 

1424 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1425 WHERE 

1426 kind_id == ? ''' + codes_cond) 

1427 

1428 return list(map(list, self._conn.execute(sql, args))) 

1429 

1430 def update(self, constraint=None, **kwargs): 

1431 ''' 

1432 Update or partially update channel and event inventories. 

1433 

1434 :param constraint: 

1435 Selection of times or areas to be brought up to date. 

1436 :type constraint: 

1437 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1438 

1439 :param \\*\\*kwargs: 

1440 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1441 

1442 This function triggers all attached remote sources, to check for 

1443 updates in the meta-data. The sources will only submit queries when 

1444 their expiration date has passed, or if the selection spans into 

1445 previously unseen times or areas. 

1446 ''' 

1447 

1448 if constraint is None: 

1449 constraint = client.Constraint(**kwargs) 

1450 

1451 for source in self._sources: 

1452 source.update_channel_inventory(self, constraint) 

1453 source.update_event_inventory(self, constraint) 

1454 

1455 def update_waveform_promises(self, constraint=None, **kwargs): 

1456 ''' 

1457 Permit downloading of remote waveforms. 

1458 

1459 :param constraint: 

1460 Remote waveforms compatible with the given constraint are enabled 

1461 for download. 

1462 :type constraint: 

1463 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1464 

1465 :param \\*\\*kwargs: 

1466 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1467 

1468 Calling this method permits Squirrel to download waveforms from remote 

1469 sources when processing subsequent waveform requests. This works by 

1470 inserting so called waveform promises into the database. It will look 

1471 into the available channels for each remote source and create a promise 

1472 for each channel compatible with the given constraint. If the promise 

1473 then matches in a waveform request, Squirrel tries to download the 

1474 waveform. If the download is successful, the downloaded waveform is 

1475 added to the Squirrel and the promise is deleted. If the download 

1476 fails, the promise is kept if the reason of failure looks like being 

1477 temporary, e.g. because of a network failure. If the cause of failure 

1478 however seems to be permanent, the promise is deleted so that no 

1479 further attempts are made to download a waveform which might not be 

1480 available from that server at all. To force re-scheduling after a 

1481 permanent failure, call :py:meth:`update_waveform_promises` 

1482 yet another time. 

1483 ''' 

1484 

1485 if constraint is None: 

1486 constraint = client.Constraint(**kwargs) 

1487 

1488 for source in self._sources: 

1489 source.update_waveform_promises(self, constraint) 

1490 

1491 def remove_waveform_promises(self, from_database='selection'): 

1492 ''' 

1493 Remove waveform promises from live selection or global database. 

1494 

1495 Calling this function removes all waveform promises provided by the 

1496 attached sources. 

1497 

1498 :param from_database: 

1499 Remove from live selection ``'selection'`` or global database 

1500 ``'global'``. 

1501 ''' 

1502 for source in self._sources: 

1503 source.remove_waveform_promises(self, from_database=from_database) 

1504 

1505 def update_responses(self, constraint=None, **kwargs): 

1506 if constraint is None: 

1507 constraint = client.Constraint(**kwargs) 

1508 

1509 for source in self._sources: 

1510 source.update_response_inventory(self, constraint) 

1511 

1512 def get_nfiles(self): 

1513 ''' 

1514 Get number of files in selection. 

1515 ''' 

1516 

1517 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1518 for row in self._conn.execute(sql): 

1519 return row[0] 

1520 

1521 def get_nnuts(self): 

1522 ''' 

1523 Get number of nuts in selection. 

1524 ''' 

1525 

1526 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1527 for row in self._conn.execute(sql): 

1528 return row[0] 

1529 

1530 def get_total_size(self): 

1531 ''' 

1532 Get aggregated file size available in selection. 

1533 ''' 

1534 

1535 sql = self._sql(''' 

1536 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1537 INNER JOIN files 

1538 ON %(db)s.%(file_states)s.file_id = files.file_id 

1539 ''') 

1540 

1541 for row in self._conn.execute(sql): 

1542 return row[0] or 0 

1543 

1544 def get_stats(self): 

1545 ''' 

1546 Get statistics on contents available through this selection. 

1547 ''' 

1548 

1549 kinds = self.get_kinds() 

1550 time_spans = {} 

1551 for kind in kinds: 

1552 time_spans[kind] = self.get_time_span([kind]) 

1553 

1554 return SquirrelStats( 

1555 nfiles=self.get_nfiles(), 

1556 nnuts=self.get_nnuts(), 

1557 kinds=kinds, 

1558 codes=self.get_codes(), 

1559 total_size=self.get_total_size(), 

1560 counts=self.get_counts(), 

1561 time_spans=time_spans, 

1562 sources=[s.describe() for s in self._sources], 

1563 operators=[op.describe() for op in self._operators]) 

1564 

1565 def get_content( 

1566 self, 

1567 nut, 

1568 cache_id='default', 

1569 accessor_id='default', 

1570 show_progress=False, 

1571 model='squirrel'): 

1572 

1573 ''' 

1574 Get and possibly load full content for a given index entry from file. 

1575 

1576 Loads the actual content objects (channel, station, waveform, ...) from 

1577 file. For efficiency, sibling content (all stuff in the same file 

1578 segment) will also be loaded as a side effect. The loaded contents are 

1579 cached in the Squirrel object. 

1580 ''' 

1581 

1582 content_cache = self._content_caches[cache_id] 

1583 if not content_cache.has(nut): 

1584 

1585 for nut_loaded in io.iload( 

1586 nut.file_path, 

1587 segment=nut.file_segment, 

1588 format=nut.file_format, 

1589 database=self._database, 

1590 update_selection=self, 

1591 show_progress=show_progress): 

1592 

1593 content_cache.put(nut_loaded) 

1594 

1595 try: 

1596 return content_cache.get(nut, accessor_id, model) 

1597 except KeyError: 

1598 raise error.NotAvailable( 

1599 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1600 

1601 def advance_accessor(self, accessor_id='default', cache_id=None): 

1602 ''' 

1603 Notify memory caches about consumer moving to a new data batch. 

1604 

1605 :param accessor_id: 

1606 Name of accessing consumer to be advanced. 

1607 :type accessor_id: 

1608 str 

1609 

1610 :param cache_id: 

1611 Name of cache to for which the accessor should be advanced. By 

1612 default the named accessor is advanced in all registered caches. 

1613 By default, two caches named ``'default'`` and ``'waveform'`` are 

1614 available. 

1615 :type cache_id: 

1616 str 

1617 

1618 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1619 Squirrel's memory caching works and can be tuned. Default behaviour is 

1620 to release data when it has not been used in the latest data 

1621 window/batch. If the accessor is never advanced, data is cached 

1622 indefinitely - which is often desired e.g. for station meta-data. 

1623 Methods for consecutive data traversal, like 

1624 :py:meth:`chopper_waveforms` automatically advance and clear 

1625 their accessor. 

1626 ''' 

1627 for cache_ in ( 

1628 self._content_caches.keys() 

1629 if cache_id is None 

1630 else [cache_id]): 

1631 

1632 self._content_caches[cache_].advance_accessor(accessor_id) 

1633 

1634 def clear_accessor(self, accessor_id, cache_id=None): 

1635 ''' 

1636 Notify memory caches about a consumer having finished. 

1637 

1638 :param accessor_id: 

1639 Name of accessor to be cleared. 

1640 :type accessor_id: 

1641 str 

1642 

1643 :param cache_id: 

1644 Name of cache for which the accessor should be cleared. By default 

1645 the named accessor is cleared from all registered caches. By 

1646 default, two caches named ``'default'`` and ``'waveform'`` are 

1647 available. 

1648 :type cache_id: 

1649 str 

1650 

1651 Calling this method clears all references to cache entries held by the 

1652 named accessor. Cache entries are then freed if not referenced by any 

1653 other accessor. 

1654 ''' 

1655 

1656 for cache_ in ( 

1657 self._content_caches.keys() 

1658 if cache_id is None 

1659 else [cache_id]): 

1660 

1661 self._content_caches[cache_].clear_accessor(accessor_id) 

1662 

1663 def get_cache_stats(self, cache_id): 

1664 return self._content_caches[cache_id].get_stats() 

1665 

1666 def _check_duplicates(self, nuts): 

1667 d = defaultdict(list) 

1668 for nut in nuts: 

1669 d[nut.codes].append(nut) 

1670 

1671 for codes, group in d.items(): 

1672 if len(group) > 1: 

1673 logger.warning( 

1674 'Multiple entries matching codes: %s' % str(codes)) 

1675 

1676 @filldocs 

1677 def get_stations( 

1678 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1679 model='squirrel'): 

1680 

1681 ''' 

1682 Get stations matching given constraints. 

1683 

1684 %(query_args)s 

1685 

1686 :param model: 

1687 Select object model for returned values: ``'squirrel'`` to get 

1688 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1689 objects with channel information attached. 

1690 :type model: 

1691 str 

1692 

1693 :returns: 

1694 List of :py:class:`pyrocko.squirrel.Station 

1695 <pyrocko.squirrel.model.Station>` objects by default or list of 

1696 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1697 objects if ``model='pyrocko'`` is requested. 

1698 

1699 See :py:meth:`iter_nuts` for details on time span matching. 

1700 ''' 

1701 

1702 if model == 'pyrocko': 

1703 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1704 elif model in ('squirrel', 'stationxml'): 

1705 args = self._get_selection_args( 

1706 STATION, obj, tmin, tmax, time, codes) 

1707 

1708 nuts = sorted( 

1709 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1710 self._check_duplicates(nuts) 

1711 return [self.get_content(nut, model=model) for nut in nuts] 

1712 else: 

1713 raise ValueError('Invalid station model: %s' % model) 

1714 

1715 @filldocs 

1716 def get_channels( 

1717 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1718 model='squirrel'): 

1719 

1720 ''' 

1721 Get channels matching given constraints. 

1722 

1723 %(query_args)s 

1724 

1725 :returns: 

1726 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1727 

1728 See :py:meth:`iter_nuts` for details on time span matching. 

1729 ''' 

1730 

1731 args = self._get_selection_args( 

1732 CHANNEL, obj, tmin, tmax, time, codes) 

1733 

1734 nuts = sorted( 

1735 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1736 self._check_duplicates(nuts) 

1737 return [self.get_content(nut, model=model) for nut in nuts] 

1738 

1739 @filldocs 

1740 def get_sensors( 

1741 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1742 

1743 ''' 

1744 Get sensors matching given constraints. 

1745 

1746 %(query_args)s 

1747 

1748 :returns: 

1749 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1750 

1751 See :py:meth:`iter_nuts` for details on time span matching. 

1752 ''' 

1753 

1754 tmin, tmax, codes = self._get_selection_args( 

1755 CHANNEL, obj, tmin, tmax, time, codes) 

1756 

1757 if codes is not None: 

1758 codes = codes_patterns_list( 

1759 (entry.replace(channel=entry.channel[:-1] + '?') 

1760 if entry != '*' else entry) 

1761 for entry in codes) 

1762 

1763 nuts = sorted( 

1764 self.iter_nuts( 

1765 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1766 self._check_duplicates(nuts) 

1767 return model.Sensor.from_channels( 

1768 self.get_content(nut) for nut in nuts) 

1769 

1770 @filldocs 

1771 def get_responses( 

1772 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1773 model='squirrel'): 

1774 

1775 ''' 

1776 Get instrument responses matching given constraints. 

1777 

1778 %(query_args)s 

1779 

1780 :returns: 

1781 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1782 

1783 See :py:meth:`iter_nuts` for details on time span matching. 

1784 ''' 

1785 

1786 args = self._get_selection_args( 

1787 RESPONSE, obj, tmin, tmax, time, codes) 

1788 

1789 nuts = sorted( 

1790 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1791 self._check_duplicates(nuts) 

1792 return [self.get_content(nut, model=model) for nut in nuts] 

1793 

1794 @filldocs 

1795 def get_response( 

1796 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1797 model='squirrel'): 

1798 

1799 ''' 

1800 Get instrument response matching given constraints. 

1801 

1802 %(query_args)s 

1803 

1804 :returns: 

1805 :py:class:`~pyrocko.squirrel.model.Response` object. 

1806 

1807 Same as :py:meth:`get_responses` but returning exactly one response. 

1808 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1809 than one is available. 

1810 

1811 See :py:meth:`iter_nuts` for details on time span matching. 

1812 ''' 

1813 

1814 responses = self.get_responses( 

1815 obj, tmin, tmax, time, codes, model=model) 

1816 if len(responses) == 0: 

1817 raise error.NotAvailable( 

1818 'No instrument response available (%s).' 

1819 % self._get_selection_args_str( 

1820 RESPONSE, obj, tmin, tmax, time, codes)) 

1821 

1822 elif len(responses) > 1: 

1823 if model == 'squirrel': 

1824 rinfo = ':\n' + '\n'.join( 

1825 ' ' + resp.summary for resp in responses) 

1826 else: 

1827 rinfo = '.' 

1828 

1829 raise error.NotAvailable( 

1830 'Multiple instrument responses matching given constraints ' 

1831 '(%s)%s' % ( 

1832 self._get_selection_args_str( 

1833 RESPONSE, obj, tmin, tmax, time, codes), rinfo)) 

1834 

1835 return responses[0] 

1836 

1837 @filldocs 

1838 def get_events( 

1839 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1840 

1841 ''' 

1842 Get events matching given constraints. 

1843 

1844 %(query_args)s 

1845 

1846 :returns: 

1847 List of :py:class:`~pyrocko.model.event.Event` objects. 

1848 

1849 See :py:meth:`iter_nuts` for details on time span matching. 

1850 ''' 

1851 

1852 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1853 nuts = sorted( 

1854 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1855 self._check_duplicates(nuts) 

1856 return [self.get_content(nut) for nut in nuts] 

1857 

1858 def _redeem_promises(self, *args): 

1859 

1860 tmin, tmax, _ = args 

1861 

1862 waveforms = list(self.iter_nuts('waveform', *args)) 

1863 promises = list(self.iter_nuts('waveform_promise', *args)) 

1864 

1865 codes_to_avail = defaultdict(list) 

1866 for nut in waveforms: 

1867 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1868 

1869 def tts(x): 

1870 if isinstance(x, tuple): 

1871 return tuple(tts(e) for e in x) 

1872 elif isinstance(x, list): 

1873 return list(tts(e) for e in x) 

1874 else: 

1875 return util.time_to_str(x) 

1876 

1877 orders = [] 

1878 for promise in promises: 

1879 waveforms_avail = codes_to_avail[promise.codes] 

1880 for block_tmin, block_tmax in blocks( 

1881 max(tmin, promise.tmin), 

1882 min(tmax, promise.tmax), 

1883 promise.deltat): 

1884 

1885 orders.append( 

1886 WaveformOrder( 

1887 source_id=promise.file_path, 

1888 codes=promise.codes, 

1889 tmin=block_tmin, 

1890 tmax=block_tmax, 

1891 deltat=promise.deltat, 

1892 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1893 

1894 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1895 

1896 order_keys_noop = set(order_key(order) for order in orders_noop) 

1897 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1898 logger.info( 

1899 'Waveform orders already satisified with cached/local data: ' 

1900 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1901 

1902 source_ids = [] 

1903 sources = {} 

1904 for source in self._sources: 

1905 if isinstance(source, fdsn.FDSNSource): 

1906 source_ids.append(source._source_id) 

1907 sources[source._source_id] = source 

1908 

1909 source_priority = dict( 

1910 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1911 

1912 order_groups = defaultdict(list) 

1913 for order in orders: 

1914 order_groups[order_key(order)].append(order) 

1915 

1916 for k, order_group in order_groups.items(): 

1917 order_group.sort( 

1918 key=lambda order: source_priority[order.source_id]) 

1919 

1920 n_order_groups = len(order_groups) 

1921 

1922 if len(order_groups) != 0 or len(orders) != 0: 

1923 logger.info( 

1924 'Waveform orders standing for download: %i (%i)' 

1925 % (len(order_groups), len(orders))) 

1926 

1927 task = make_task('Waveform orders processed', n_order_groups) 

1928 else: 

1929 task = None 

1930 

1931 def split_promise(order): 

1932 self._split_nuts( 

1933 'waveform_promise', 

1934 order.tmin, order.tmax, 

1935 codes=order.codes, 

1936 path=order.source_id) 

1937 

1938 def release_order_group(order): 

1939 okey = order_key(order) 

1940 for followup in order_groups[okey]: 

1941 split_promise(followup) 

1942 

1943 del order_groups[okey] 

1944 

1945 if task: 

1946 task.update(n_order_groups - len(order_groups)) 

1947 

1948 def noop(order): 

1949 pass 

1950 

1951 def success(order): 

1952 release_order_group(order) 

1953 split_promise(order) 

1954 

1955 def batch_add(paths): 

1956 self.add(paths) 

1957 

1958 calls = queue.Queue() 

1959 

1960 def enqueue(f): 

1961 def wrapper(*args): 

1962 calls.put((f, args)) 

1963 

1964 return wrapper 

1965 

1966 for order in orders_noop: 

1967 split_promise(order) 

1968 

1969 while order_groups: 

1970 

1971 orders_now = [] 

1972 empty = [] 

1973 for k, order_group in order_groups.items(): 

1974 try: 

1975 orders_now.append(order_group.pop(0)) 

1976 except IndexError: 

1977 empty.append(k) 

1978 

1979 for k in empty: 

1980 del order_groups[k] 

1981 

1982 by_source_id = defaultdict(list) 

1983 for order in orders_now: 

1984 by_source_id[order.source_id].append(order) 

1985 

1986 threads = [] 

1987 for source_id in by_source_id: 

1988 def download(): 

1989 try: 

1990 sources[source_id].download_waveforms( 

1991 by_source_id[source_id], 

1992 success=enqueue(success), 

1993 error_permanent=enqueue(split_promise), 

1994 error_temporary=noop, 

1995 batch_add=enqueue(batch_add)) 

1996 

1997 finally: 

1998 calls.put(None) 

1999 

2000 thread = threading.Thread(target=download) 

2001 thread.start() 

2002 threads.append(thread) 

2003 

2004 ndone = 0 

2005 while ndone < len(threads): 

2006 ret = calls.get() 

2007 if ret is None: 

2008 ndone += 1 

2009 else: 

2010 ret[0](*ret[1]) 

2011 

2012 for thread in threads: 

2013 thread.join() 

2014 

2015 if task: 

2016 task.update(n_order_groups - len(order_groups)) 

2017 

2018 if task: 

2019 task.done() 

2020 

2021 @filldocs 

2022 def get_waveform_nuts( 

2023 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2024 

2025 ''' 

2026 Get waveform content entities matching given constraints. 

2027 

2028 %(query_args)s 

2029 

2030 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2031 resolves matching waveform promises (downloads waveforms from remote 

2032 sources). 

2033 

2034 See :py:meth:`iter_nuts` for details on time span matching. 

2035 ''' 

2036 

2037 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2038 self._redeem_promises(*args) 

2039 return sorted( 

2040 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2041 

2042 @filldocs 

2043 def get_waveforms( 

2044 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2045 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2046 maxlap=None, snap=None, include_last=False, load_data=True, 

2047 accessor_id='default', operator_params=None): 

2048 

2049 ''' 

2050 Get waveforms matching given constraints. 

2051 

2052 %(query_args)s 

2053 

2054 :param uncut: 

2055 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2056 and to disable degapping/deoverlapping. Returns untouched traces as 

2057 they are read from file segment. File segments are always read in 

2058 their entirety. 

2059 :type uncut: 

2060 bool 

2061 

2062 :param want_incomplete: 

2063 If ``True``, gappy/incomplete traces are included in the result. 

2064 :type want_incomplete: 

2065 bool 

2066 

2067 :param degap: 

2068 If ``True``, connect traces and remove gaps and overlaps. 

2069 :type degap: 

2070 bool 

2071 

2072 :param maxgap: 

2073 Maximum gap size in samples which is filled with interpolated 

2074 samples when ``degap`` is ``True``. 

2075 :type maxgap: 

2076 int 

2077 

2078 :param maxlap: 

2079 Maximum overlap size in samples which is removed when ``degap`` is 

2080 ``True``. 

2081 :type maxlap: 

2082 int 

2083 

2084 :param snap: 

2085 Rounding functions used when computing sample index from time 

2086 instance, for trace start and trace end, respectively. By default, 

2087 ``(round, round)`` is used. 

2088 :type snap: 

2089 tuple of 2 callables 

2090 

2091 :param include_last: 

2092 If ``True``, add one more sample to the returned traces (the sample 

2093 which would be the first sample of a query with ``tmin`` set to the 

2094 current value of ``tmax``). 

2095 :type include_last: 

2096 bool 

2097 

2098 :param load_data: 

2099 If ``True``, waveform data samples are read from files (or cache). 

2100 If ``False``, meta-information-only traces are returned (dummy 

2101 traces with no data samples). 

2102 :type load_data: 

2103 bool 

2104 

2105 :param accessor_id: 

2106 Name of consumer on who's behalf data is accessed. Used in cache 

2107 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2108 to distinguish different points of extraction for the decision of 

2109 when to release cached waveform data. Should be used when data is 

2110 alternately extracted from more than one region / selection. 

2111 :type accessor_id: 

2112 str 

2113 

2114 See :py:meth:`iter_nuts` for details on time span matching. 

2115 

2116 Loaded data is kept in memory (at least) until 

2117 :py:meth:`clear_accessor` has been called or 

2118 :py:meth:`advance_accessor` has been called two consecutive times 

2119 without data being accessed between the two calls (by this accessor). 

2120 Data may still be further kept in the memory cache if held alive by 

2121 consumers with a different ``accessor_id``. 

2122 ''' 

2123 

2124 tmin, tmax, codes = self._get_selection_args( 

2125 WAVEFORM, obj, tmin, tmax, time, codes) 

2126 

2127 self_tmin, self_tmax = self.get_time_span( 

2128 ['waveform', 'waveform_promise']) 

2129 

2130 if None in (self_tmin, self_tmax): 

2131 logger.warning( 

2132 'No waveforms available.') 

2133 return [] 

2134 

2135 tmin = tmin if tmin is not None else self_tmin 

2136 tmax = tmax if tmax is not None else self_tmax 

2137 

2138 if codes is not None and len(codes) == 1: 

2139 # TODO: fix for multiple / mixed codes 

2140 operator = self.get_operator(codes[0]) 

2141 if operator is not None: 

2142 return operator.get_waveforms( 

2143 self, codes[0], 

2144 tmin=tmin, tmax=tmax, 

2145 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2146 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2147 include_last=include_last, load_data=load_data, 

2148 accessor_id=accessor_id, params=operator_params) 

2149 

2150 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2151 

2152 if load_data: 

2153 traces = [ 

2154 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2155 

2156 else: 

2157 traces = [ 

2158 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2159 

2160 if uncut: 

2161 return traces 

2162 

2163 if snap is None: 

2164 snap = (round, round) 

2165 

2166 chopped = [] 

2167 for tr in traces: 

2168 if not load_data and tr.ydata is not None: 

2169 tr = tr.copy(data=False) 

2170 tr.ydata = None 

2171 

2172 try: 

2173 chopped.append(tr.chop( 

2174 tmin, tmax, 

2175 inplace=False, 

2176 snap=snap, 

2177 include_last=include_last)) 

2178 

2179 except trace.NoData: 

2180 pass 

2181 

2182 processed = self._process_chopped( 

2183 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2184 

2185 return processed 

2186 

2187 @filldocs 

2188 def chopper_waveforms( 

2189 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2190 tinc=None, tpad=0., 

2191 want_incomplete=True, snap_window=False, 

2192 degap=True, maxgap=5, maxlap=None, 

2193 snap=None, include_last=False, load_data=True, 

2194 accessor_id=None, clear_accessor=True, operator_params=None): 

2195 

2196 ''' 

2197 Iterate window-wise over waveform archive. 

2198 

2199 %(query_args)s 

2200 

2201 :param tinc: 

2202 Time increment (window shift time) (default uses ``tmax-tmin``). 

2203 :type tinc: 

2204 timestamp 

2205 

2206 :param tpad: 

2207 Padding time appended on either side of the data window (window 

2208 overlap is ``2*tpad``). 

2209 :type tpad: 

2210 timestamp 

2211 

2212 :param want_incomplete: 

2213 If ``True``, gappy/incomplete traces are included in the result. 

2214 :type want_incomplete: 

2215 bool 

2216 

2217 :param snap_window: 

2218 If ``True``, start time windows at multiples of tinc with respect 

2219 to system time zero. 

2220 :type snap_window: 

2221 bool 

2222 

2223 :param degap: 

2224 If ``True``, connect traces and remove gaps and overlaps. 

2225 :type degap: 

2226 bool 

2227 

2228 :param maxgap: 

2229 Maximum gap size in samples which is filled with interpolated 

2230 samples when ``degap`` is ``True``. 

2231 :type maxgap: 

2232 int 

2233 

2234 :param maxlap: 

2235 Maximum overlap size in samples which is removed when ``degap`` is 

2236 ``True``. 

2237 :type maxlap: 

2238 int 

2239 

2240 :param snap: 

2241 Rounding functions used when computing sample index from time 

2242 instance, for trace start and trace end, respectively. By default, 

2243 ``(round, round)`` is used. 

2244 :type snap: 

2245 tuple of 2 callables 

2246 

2247 :param include_last: 

2248 If ``True``, add one more sample to the returned traces (the sample 

2249 which would be the first sample of a query with ``tmin`` set to the 

2250 current value of ``tmax``). 

2251 :type include_last: 

2252 bool 

2253 

2254 :param load_data: 

2255 If ``True``, waveform data samples are read from files (or cache). 

2256 If ``False``, meta-information-only traces are returned (dummy 

2257 traces with no data samples). 

2258 :type load_data: 

2259 bool 

2260 

2261 :param accessor_id: 

2262 Name of consumer on who's behalf data is accessed. Used in cache 

2263 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2264 to distinguish different points of extraction for the decision of 

2265 when to release cached waveform data. Should be used when data is 

2266 alternately extracted from more than one region / selection. 

2267 :type accessor_id: 

2268 str 

2269 

2270 :param clear_accessor: 

2271 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2272 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2273 memory when the generator returns. 

2274 :type clear_accessor: 

2275 bool 

2276 

2277 :yields: 

2278 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2279 extracted time window. 

2280 

2281 See :py:meth:`iter_nuts` for details on time span matching. 

2282 ''' 

2283 

2284 tmin, tmax, codes = self._get_selection_args( 

2285 WAVEFORM, obj, tmin, tmax, time, codes) 

2286 

2287 self_tmin, self_tmax = self.get_time_span( 

2288 ['waveform', 'waveform_promise']) 

2289 

2290 if None in (self_tmin, self_tmax): 

2291 logger.warning( 

2292 'Content has undefined time span. No waveforms and no ' 

2293 'waveform promises?') 

2294 return 

2295 

2296 if snap_window and tinc is not None: 

2297 tmin = tmin if tmin is not None else self_tmin 

2298 tmax = tmax if tmax is not None else self_tmax 

2299 tmin = math.floor(tmin / tinc) * tinc 

2300 tmax = math.ceil(tmax / tinc) * tinc 

2301 else: 

2302 tmin = tmin if tmin is not None else self_tmin + tpad 

2303 tmax = tmax if tmax is not None else self_tmax - tpad 

2304 

2305 tinc = tinc if tinc is not None else tmax - tmin 

2306 

2307 try: 

2308 if accessor_id is None: 

2309 accessor_id = 'chopper%i' % self._n_choppers_active 

2310 

2311 self._n_choppers_active += 1 

2312 

2313 eps = tinc * 1e-6 

2314 if tinc != 0.0: 

2315 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2316 else: 

2317 nwin = 1 

2318 

2319 for iwin in range(nwin): 

2320 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2321 

2322 chopped = self.get_waveforms( 

2323 tmin=wmin-tpad, 

2324 tmax=wmax+tpad, 

2325 codes=codes, 

2326 snap=snap, 

2327 include_last=include_last, 

2328 load_data=load_data, 

2329 want_incomplete=want_incomplete, 

2330 degap=degap, 

2331 maxgap=maxgap, 

2332 maxlap=maxlap, 

2333 accessor_id=accessor_id, 

2334 operator_params=operator_params) 

2335 

2336 self.advance_accessor(accessor_id) 

2337 

2338 yield Batch( 

2339 tmin=wmin, 

2340 tmax=wmax, 

2341 i=iwin, 

2342 n=nwin, 

2343 traces=chopped) 

2344 

2345 iwin += 1 

2346 

2347 finally: 

2348 self._n_choppers_active -= 1 

2349 if clear_accessor: 

2350 self.clear_accessor(accessor_id, 'waveform') 

2351 

2352 def _process_chopped( 

2353 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2354 

2355 chopped.sort(key=lambda a: a.full_id) 

2356 if degap: 

2357 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2358 

2359 if not want_incomplete: 

2360 chopped_weeded = [] 

2361 for tr in chopped: 

2362 emin = tr.tmin - tmin 

2363 emax = tr.tmax + tr.deltat - tmax 

2364 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2365 chopped_weeded.append(tr) 

2366 

2367 elif degap: 

2368 if (0. < emin <= 5. * tr.deltat 

2369 and -5. * tr.deltat <= emax < 0.): 

2370 

2371 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2372 chopped_weeded.append(tr) 

2373 

2374 chopped = chopped_weeded 

2375 

2376 return chopped 

2377 

2378 def _get_pyrocko_stations( 

2379 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2380 

2381 from pyrocko import model as pmodel 

2382 

2383 by_nsl = defaultdict(lambda: (list(), list())) 

2384 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2385 sargs = station._get_pyrocko_station_args() 

2386 by_nsl[station.codes.nsl][0].append(sargs) 

2387 

2388 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2389 sargs = channel._get_pyrocko_station_args() 

2390 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2391 sargs_list.append(sargs) 

2392 channels_list.append(channel) 

2393 

2394 pstations = [] 

2395 nsls = list(by_nsl.keys()) 

2396 nsls.sort() 

2397 for nsl in nsls: 

2398 sargs_list, channels_list = by_nsl[nsl] 

2399 sargs = util.consistency_merge( 

2400 [('',) + x for x in sargs_list]) 

2401 

2402 by_c = defaultdict(list) 

2403 for ch in channels_list: 

2404 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2405 

2406 chas = list(by_c.keys()) 

2407 chas.sort() 

2408 pchannels = [] 

2409 for cha in chas: 

2410 list_of_cargs = by_c[cha] 

2411 cargs = util.consistency_merge( 

2412 [('',) + x for x in list_of_cargs]) 

2413 pchannels.append(pmodel.Channel(*cargs)) 

2414 

2415 pstations.append( 

2416 pmodel.Station(*sargs, channels=pchannels)) 

2417 

2418 return pstations 

2419 

2420 @property 

2421 def pile(self): 

2422 

2423 ''' 

2424 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2425 

2426 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2427 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2428 but uses the fluffy power of the Squirrel under the hood. 

2429 

2430 This interface can be used as a drop-in replacement for piles which are 

2431 used in existing scripts and programs for efficient waveform data 

2432 access. The Squirrel-based pile scales better for large datasets. Newer 

2433 scripts should use Squirrel's native methods to avoid the emulation 

2434 overhead. 

2435 ''' 

2436 from . import pile 

2437 

2438 if self._pile is None: 

2439 self._pile = pile.Pile(self) 

2440 

2441 return self._pile 

2442 

2443 def snuffle(self): 

2444 ''' 

2445 Look at dataset in Snuffler. 

2446 ''' 

2447 self.pile.snuffle() 

2448 

2449 def _gather_codes_keys(self, kind, gather, selector): 

2450 return set( 

2451 gather(codes) 

2452 for codes in self.iter_codes(kind) 

2453 if selector is None or selector(codes)) 

2454 

2455 def __str__(self): 

2456 return str(self.get_stats()) 

2457 

2458 def get_coverage( 

2459 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2460 

2461 ''' 

2462 Get coverage information. 

2463 

2464 Get information about strips of gapless data coverage. 

2465 

2466 :param kind: 

2467 Content kind to be queried. 

2468 :type kind: 

2469 str 

2470 

2471 :param tmin: 

2472 Start time of query interval. 

2473 :type tmin: 

2474 timestamp 

2475 

2476 :param tmax: 

2477 End time of query interval. 

2478 :type tmax: 

2479 timestamp 

2480 

2481 :param codes: 

2482 If given, restrict query to given content codes patterns. 

2483 :type codes: 

2484 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2485 objects appropriate for the queried content type, or anything which 

2486 can be converted to such objects. 

2487 

2488 :param limit: 

2489 Limit query to return only up to a given maximum number of entries 

2490 per matching time series (without setting this option, very gappy 

2491 data could cause the query to execute for a very long time). 

2492 :type limit: 

2493 int 

2494 

2495 :returns: 

2496 Information about time spans covered by the requested time series 

2497 data. 

2498 :rtype: 

2499 :py:class:`list` of :py:class:`Coverage` objects 

2500 ''' 

2501 

2502 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2503 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2504 kind_id = to_kind_id(kind) 

2505 

2506 codes_info = list(self._iter_codes_info(kind=kind)) 

2507 

2508 kdata_all = [] 

2509 if codes is None: 

2510 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2511 kdata_all.append( 

2512 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2513 

2514 else: 

2515 for codes_entry in codes: 

2516 pattern = to_codes(kind_id, codes_entry) 

2517 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2518 if model.match_codes(pattern, codes_entry): 

2519 kdata_all.append( 

2520 (pattern, kind_codes_id, codes_entry, deltat)) 

2521 

2522 kind_codes_ids = [x[1] for x in kdata_all] 

2523 

2524 counts_at_tmin = {} 

2525 if tmin is not None: 

2526 for nut in self.iter_nuts( 

2527 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2528 

2529 k = nut.codes, nut.deltat 

2530 if k not in counts_at_tmin: 

2531 counts_at_tmin[k] = 0 

2532 

2533 counts_at_tmin[k] += 1 

2534 

2535 coverages = [] 

2536 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2537 entry = [pattern, codes_entry, deltat, None, None, []] 

2538 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2539 sql = self._sql(''' 

2540 SELECT 

2541 time_seconds, 

2542 time_offset 

2543 FROM %(db)s.%(coverage)s 

2544 WHERE 

2545 kind_codes_id == ? 

2546 ORDER BY 

2547 kind_codes_id ''' + order + ''', 

2548 time_seconds ''' + order + ''', 

2549 time_offset ''' + order + ''' 

2550 LIMIT 1 

2551 ''') 

2552 

2553 for row in self._conn.execute(sql, [kind_codes_id]): 

2554 entry[3+i] = model.tjoin(row[0], row[1]) 

2555 

2556 if None in entry[3:5]: 

2557 continue 

2558 

2559 args = [kind_codes_id] 

2560 

2561 sql_time = '' 

2562 if tmin is not None: 

2563 # intentionally < because (== tmin) is queried from nuts 

2564 sql_time += ' AND ( ? < time_seconds ' \ 

2565 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2566 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2567 

2568 if tmax is not None: 

2569 sql_time += ' AND ( time_seconds < ? ' \ 

2570 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2571 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2572 

2573 sql_limit = '' 

2574 if limit is not None: 

2575 sql_limit = ' LIMIT ?' 

2576 args.append(limit) 

2577 

2578 sql = self._sql(''' 

2579 SELECT 

2580 time_seconds, 

2581 time_offset, 

2582 step 

2583 FROM %(db)s.%(coverage)s 

2584 WHERE 

2585 kind_codes_id == ? 

2586 ''' + sql_time + ''' 

2587 ORDER BY 

2588 kind_codes_id, 

2589 time_seconds, 

2590 time_offset 

2591 ''' + sql_limit) 

2592 

2593 rows = list(self._conn.execute(sql, args)) 

2594 

2595 if limit is not None and len(rows) == limit: 

2596 entry[-1] = None 

2597 else: 

2598 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2599 tlast = None 

2600 if tmin is not None: 

2601 entry[-1].append((tmin, counts)) 

2602 tlast = tmin 

2603 

2604 for row in rows: 

2605 t = model.tjoin(row[0], row[1]) 

2606 counts += row[2] 

2607 entry[-1].append((t, counts)) 

2608 tlast = t 

2609 

2610 if tmax is not None and (tlast is None or tlast != tmax): 

2611 entry[-1].append((tmax, counts)) 

2612 

2613 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2614 

2615 return coverages 

2616 

2617 def add_operator(self, op): 

2618 self._operators.append(op) 

2619 

2620 def update_operator_mappings(self): 

2621 available = self.get_codes(kind=('channel')) 

2622 

2623 for operator in self._operators: 

2624 operator.update_mappings(available, self._operator_registry) 

2625 

2626 def iter_operator_mappings(self): 

2627 for operator in self._operators: 

2628 for in_codes, out_codes in operator.iter_mappings(): 

2629 yield operator, in_codes, out_codes 

2630 

2631 def get_operator_mappings(self): 

2632 return list(self.iter_operator_mappings()) 

2633 

2634 def get_operator(self, codes): 

2635 try: 

2636 return self._operator_registry[codes][0] 

2637 except KeyError: 

2638 return None 

2639 

2640 def get_operator_group(self, codes): 

2641 try: 

2642 return self._operator_registry[codes] 

2643 except KeyError: 

2644 return None, (None, None, None) 

2645 

2646 def iter_operator_codes(self): 

2647 for _, _, out_codes in self.iter_operator_mappings(): 

2648 for codes in out_codes: 

2649 yield codes 

2650 

2651 def get_operator_codes(self): 

2652 return list(self.iter_operator_codes()) 

2653 

2654 def print_tables(self, table_names=None, stream=None): 

2655 ''' 

2656 Dump raw database tables in textual form (for debugging purposes). 

2657 

2658 :param table_names: 

2659 Names of tables to be dumped or ``None`` to dump all. 

2660 :type table_names: 

2661 :py:class:`list` of :py:class:`str` 

2662 

2663 :param stream: 

2664 Open file or ``None`` to dump to standard output. 

2665 ''' 

2666 

2667 if stream is None: 

2668 stream = sys.stdout 

2669 

2670 if isinstance(table_names, str): 

2671 table_names = [table_names] 

2672 

2673 if table_names is None: 

2674 table_names = [ 

2675 'selection_file_states', 

2676 'selection_nuts', 

2677 'selection_kind_codes_count', 

2678 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2679 

2680 m = { 

2681 'selection_file_states': '%(db)s.%(file_states)s', 

2682 'selection_nuts': '%(db)s.%(nuts)s', 

2683 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2684 'files': 'files', 

2685 'nuts': 'nuts', 

2686 'kind_codes': 'kind_codes', 

2687 'kind_codes_count': 'kind_codes_count'} 

2688 

2689 for table_name in table_names: 

2690 self._database.print_table( 

2691 m[table_name] % self._names, stream=stream) 

2692 

2693 

2694class SquirrelStats(Object): 

2695 ''' 

2696 Container to hold statistics about contents available from a Squirrel. 

2697 

2698 See also :py:meth:`Squirrel.get_stats`. 

2699 ''' 

2700 

2701 nfiles = Int.T( 

2702 help='Number of files in selection.') 

2703 nnuts = Int.T( 

2704 help='Number of index nuts in selection.') 

2705 codes = List.T( 

2706 Tuple.T(content_t=String.T()), 

2707 help='Available code sequences in selection, e.g. ' 

2708 '(agency, network, station, location) for stations nuts.') 

2709 kinds = List.T( 

2710 String.T(), 

2711 help='Available content types in selection.') 

2712 total_size = Int.T( 

2713 help='Aggregated file size of files is selection.') 

2714 counts = Dict.T( 

2715 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2716 help='Breakdown of how many nuts of any content type and code ' 

2717 'sequence are available in selection, ``counts[kind][codes]``.') 

2718 time_spans = Dict.T( 

2719 String.T(), Tuple.T(content_t=Timestamp.T()), 

2720 help='Time spans by content type.') 

2721 sources = List.T( 

2722 String.T(), 

2723 help='Descriptions of attached sources.') 

2724 operators = List.T( 

2725 String.T(), 

2726 help='Descriptions of attached operators.') 

2727 

2728 def __str__(self): 

2729 kind_counts = dict( 

2730 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2731 

2732 scodes = model.codes_to_str_abbreviated(self.codes) 

2733 

2734 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2735 ' ' + s for s in self.sources) 

2736 

2737 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2738 ' ' + s for s in self.operators) 

2739 

2740 def stime(t): 

2741 return util.tts(t) if t is not None and t not in ( 

2742 model.g_tmin, model.g_tmax) else '<none>' 

2743 

2744 def stable(rows): 

2745 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2746 return '\n'.join( 

2747 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2748 for row in rows) 

2749 

2750 def indent(s): 

2751 return '\n'.join(' '+line for line in s.splitlines()) 

2752 

2753 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2754 kind + ':', 

2755 str(kind_counts[kind]), 

2756 stime(self.time_spans[kind][0]), 

2757 '-', 

2758 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2759 

2760 s = ''' 

2761Number of files: %i 

2762Total size of known files: %s 

2763Number of index nuts: %i 

2764Available content kinds: %s 

2765Available codes: %s 

2766Sources: %s 

2767Operators: %s''' % ( 

2768 self.nfiles, 

2769 util.human_bytesize(self.total_size), 

2770 self.nnuts, 

2771 stspans, scodes, ssources, soperators) 

2772 

2773 return s.lstrip() 

2774 

2775 

2776__all__ = [ 

2777 'Squirrel', 

2778 'SquirrelStats', 

2779]