1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, separator, WaveformOrder 

24from .client import fdsn, catalog 

25from .selection import Selection, filldocs 

26from .database import abspath 

27from . import client, environment, error 

28 

29logger = logging.getLogger('psq.base') 

30 

31guts_prefix = 'squirrel' 

32 

33 

34def make_task(*args): 

35 return progress.task(*args, logger=logger) 

36 

37 

38def lpick(condition, seq): 

39 ft = [], [] 

40 for ele in seq: 

41 ft[int(bool(condition(ele)))].append(ele) 

42 

43 return ft 

44 

45 

46def codes_fill(n, codes): 

47 return codes[:n] + ('*',) * (n-len(codes)) 

48 

49 

50c_kind_to_ncodes = { 

51 'station': 4, 

52 'channel': 6, 

53 'response': 6, 

54 'waveform': 6, 

55 'event': 1, 

56 'waveform_promise': 6, 

57 'undefined': 1} 

58 

59 

60c_inflated = ['', '*', '*', '*', '*', '*'] 

61c_offsets = [0, 2, 1, 1, 1, 1, 0] 

62 

63 

64def codes_inflate(codes): 

65 codes = codes[:6] 

66 inflated = list(c_inflated) 

67 ncodes = len(codes) 

68 offset = c_offsets[ncodes] 

69 inflated[offset:offset+ncodes] = codes 

70 return inflated 

71 

72 

73def codes_inflate2(codes): 

74 inflated = list(c_inflated) 

75 ncodes = len(codes) 

76 inflated[:ncodes] = codes 

77 return tuple(inflated) 

78 

79 

80def codes_patterns_for_kind(kind, codes): 

81 if not codes: 

82 return [] 

83 

84 if not isinstance(codes[0], str): 

85 out = [] 

86 for subcodes in codes: 

87 out.extend(codes_patterns_for_kind(kind, subcodes)) 

88 return out 

89 

90 if kind in ('event', 'undefined'): 

91 return [codes] 

92 

93 cfill = codes_inflate(codes)[:c_kind_to_ncodes[kind]] 

94 

95 if kind == 'station': 

96 cfill2 = list(cfill) 

97 cfill2[3] = '[*]' 

98 return [cfill, cfill2] 

99 

100 return [cfill] 

101 

102 

103def group_channels(channels): 

104 groups = defaultdict(list) 

105 for channel in channels: 

106 codes = channel.codes 

107 gcodes = codes[:-1] + (codes[-1][:-1],) 

108 groups[gcodes].append(channel) 

109 

110 return groups 

111 

112 

113def pyrocko_station_from_channel_group(group, extra_args): 

114 list_of_args = [channel._get_pyrocko_station_args() for channel in group] 

115 args = util.consistency_merge(list_of_args + extra_args) 

116 from pyrocko import model as pmodel 

117 return pmodel.Station( 

118 network=args[0], 

119 station=args[1], 

120 location=args[2], 

121 lat=args[3], 

122 lon=args[4], 

123 elevation=args[5], 

124 depth=args[6], 

125 channels=[ch.get_pyrocko_channel() for ch in group]) 

126 

127 

128def blocks(tmin, tmax, deltat, nsamples_block=100000): 

129 tblock = deltat * nsamples_block 

130 iblock_min = int(math.floor(tmin / tblock)) 

131 iblock_max = int(math.ceil(tmax / tblock)) 

132 for iblock in range(iblock_min, iblock_max): 

133 yield iblock * tblock, (iblock+1) * tblock 

134 

135 

136def gaps(avail, tmin, tmax): 

137 assert tmin < tmax 

138 

139 data = [(tmax, 1), (tmin, -1)] 

140 for (tmin_a, tmax_a) in avail: 

141 assert tmin_a < tmax_a 

142 data.append((tmin_a, 1)) 

143 data.append((tmax_a, -1)) 

144 

145 data.sort() 

146 s = 1 

147 gaps = [] 

148 tmin_g = None 

149 for t, x in data: 

150 if s == 1 and x == -1: 

151 tmin_g = t 

152 elif s == 0 and x == 1 and tmin_g is not None: 

153 tmax_g = t 

154 if tmin_g != tmax_g: 

155 gaps.append((tmin_g, tmax_g)) 

156 

157 s += x 

158 

159 return gaps 

160 

161 

162def order_key(order): 

163 return (order.codes, order.tmin, order.tmax) 

164 

165 

166class Batch(object): 

167 ''' 

168 Batch of waveforms from window-wise data extraction. 

169 

170 Encapsulates state and results yielded for each window in window-wise 

171 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

172 

173 *Attributes:* 

174 

175 .. py:attribute:: tmin 

176 

177 Start of this time window. 

178 

179 .. py:attribute:: tmax 

180 

181 End of this time window. 

182 

183 .. py:attribute:: i 

184 

185 Index of this time window in sequence. 

186 

187 .. py:attribute:: n 

188 

189 Total number of time windows in sequence. 

190 

191 .. py:attribute:: traces 

192 

193 Extracted waveforms for this time window. 

194 ''' 

195 

196 def __init__(self, tmin, tmax, i, n, traces): 

197 self.tmin = tmin 

198 self.tmax = tmax 

199 self.i = i 

200 self.n = n 

201 self.traces = traces 

202 

203 

204class Squirrel(Selection): 

205 ''' 

206 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

207 

208 :param env: 

209 Squirrel environment instance or directory path to use as starting 

210 point for its detection. By default, the current directory is used as 

211 starting point. When searching for a usable environment the directory 

212 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

213 directory is used if it exists, otherwise the parent directories are 

214 search upwards for the existence of such a directory. If no such 

215 directory is found, the user's global Squirrel environment 

216 ``'$HOME/.pyrocko/squirrel'`` is used. 

217 :type env: 

218 :py:class:`~pyrocko.squirrel.environment.Environment` or 

219 :py:class:`str` 

220 

221 :param database: 

222 Database instance or path to database. By default the 

223 database found in the detected Squirrel environment is used. 

224 :type database: 

225 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

226 

227 :param cache_path: 

228 Directory path to use for data caching. By default, the ``'cache'`` 

229 directory in the detected Squirrel environment is used. 

230 :type cache_path: 

231 :py:class:`str` 

232 

233 :param persistent: 

234 If given a name, create a persistent selection. 

235 :type persistent: 

236 :py:class:`str` 

237 

238 This is the central class of the Squirrel framework. It provides a unified 

239 interface to query and access seismic waveforms, station meta-data and 

240 event information from local file collections and remote data sources. For 

241 prompt responses, a profound database setup is used under the hood. To 

242 speed up assemblage of ad-hoc data selections, files are indexed on first 

243 use and the extracted meta-data is remembered in the database for 

244 subsequent accesses. Bulk data is lazily loaded from disk and remote 

245 sources, just when requested. Once loaded, data is cached in memory to 

246 expedite typical access patterns. Files and data sources can be dynamically 

247 added to and removed from the Squirrel selection at runtime. 

248 

249 Queries are restricted to the contents of the files currently added to the 

250 Squirrel selection (usually a subset of the file meta-information 

251 collection in the database). This list of files is referred to here as the 

252 "selection". By default, temporary tables are created in the attached 

253 database to hold the names of the files in the selection as well as various 

254 indices and counters. These tables are only visible inside the application 

255 which created them and are deleted when the database connection is closed 

256 or the application exits. To create a selection which is not deleted at 

257 exit, supply a name to the ``persistent`` argument of the Squirrel 

258 constructor. Persistent selections are shared among applications using the 

259 same database. 

260 

261 **Method summary** 

262 

263 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

264 :py:class:`~pyrocko.squirrel.selection.Selection`. 

265 

266 .. autosummary:: 

267 

268 ~Squirrel.add 

269 ~Squirrel.add_source 

270 ~Squirrel.add_fdsn 

271 ~Squirrel.add_catalog 

272 ~Squirrel.add_dataset 

273 ~Squirrel.add_virtual 

274 ~Squirrel.update 

275 ~Squirrel.update_waveform_promises 

276 ~Squirrel.advance_accessor 

277 ~Squirrel.clear_accessor 

278 ~Squirrel.reload 

279 ~pyrocko.squirrel.selection.Selection.iter_paths 

280 ~Squirrel.iter_nuts 

281 ~Squirrel.iter_kinds 

282 ~Squirrel.iter_deltats 

283 ~Squirrel.iter_codes 

284 ~Squirrel.iter_counts 

285 ~pyrocko.squirrel.selection.Selection.get_paths 

286 ~Squirrel.get_nuts 

287 ~Squirrel.get_kinds 

288 ~Squirrel.get_deltats 

289 ~Squirrel.get_codes 

290 ~Squirrel.get_counts 

291 ~Squirrel.get_time_span 

292 ~Squirrel.get_deltat_span 

293 ~Squirrel.get_nfiles 

294 ~Squirrel.get_nnuts 

295 ~Squirrel.get_total_size 

296 ~Squirrel.get_stats 

297 ~Squirrel.get_content 

298 ~Squirrel.get_stations 

299 ~Squirrel.get_channels 

300 ~Squirrel.get_responses 

301 ~Squirrel.get_events 

302 ~Squirrel.get_waveform_nuts 

303 ~Squirrel.get_waveforms 

304 ~Squirrel.chopper_waveforms 

305 ~Squirrel.get_coverage 

306 ~Squirrel.pile 

307 ~Squirrel.snuffle 

308 ~Squirrel.glob_codes 

309 ~pyrocko.squirrel.selection.Selection.get_database 

310 ~Squirrel.print_tables 

311 ''' 

312 

313 def __init__( 

314 self, env=None, database=None, cache_path=None, persistent=None): 

315 

316 if not isinstance(env, environment.Environment): 

317 env = environment.get_environment(env) 

318 

319 if database is None: 

320 database = env.expand_path(env.database_path) 

321 

322 if cache_path is None: 

323 cache_path = env.expand_path(env.cache_path) 

324 

325 if persistent is None: 

326 persistent = env.persistent 

327 

328 Selection.__init__( 

329 self, database=database, persistent=persistent) 

330 

331 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

332 

333 self._content_caches = { 

334 'waveform': cache.ContentCache(), 

335 'default': cache.ContentCache()} 

336 

337 self._cache_path = cache_path 

338 

339 self._sources = [] 

340 self._operators = [] 

341 self._operator_registry = {} 

342 

343 self._pile = None 

344 self._n_choppers_active = 0 

345 

346 self._names.update({ 

347 'nuts': self.name + '_nuts', 

348 'kind_codes_count': self.name + '_kind_codes_count', 

349 'coverage': self.name + '_coverage'}) 

350 

351 with self.transaction() as cursor: 

352 self._create_tables_squirrel(cursor) 

353 

354 def _create_tables_squirrel(self, cursor): 

355 

356 cursor.execute(self._register_table(self._sql( 

357 ''' 

358 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

359 nut_id integer PRIMARY KEY, 

360 file_id integer, 

361 file_segment integer, 

362 file_element integer, 

363 kind_id integer, 

364 kind_codes_id integer, 

365 tmin_seconds integer, 

366 tmin_offset integer, 

367 tmax_seconds integer, 

368 tmax_offset integer, 

369 kscale integer) 

370 '''))) 

371 

372 cursor.execute(self._register_table(self._sql( 

373 ''' 

374 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

375 kind_codes_id integer PRIMARY KEY, 

376 count integer) 

377 '''))) 

378 

379 cursor.execute(self._sql( 

380 ''' 

381 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

382 ON %(nuts)s (file_id, file_segment, file_element) 

383 ''')) 

384 

385 cursor.execute(self._sql( 

386 ''' 

387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

388 ON %(nuts)s (file_id) 

389 ''')) 

390 

391 cursor.execute(self._sql( 

392 ''' 

393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

394 ON %(nuts)s (kind_id, tmin_seconds) 

395 ''')) 

396 

397 cursor.execute(self._sql( 

398 ''' 

399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

400 ON %(nuts)s (kind_id, tmax_seconds) 

401 ''')) 

402 

403 cursor.execute(self._sql( 

404 ''' 

405 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

406 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

407 ''')) 

408 

409 cursor.execute(self._sql( 

410 ''' 

411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

412 BEFORE DELETE ON main.files FOR EACH ROW 

413 BEGIN 

414 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

415 END 

416 ''')) 

417 

418 # trigger only on size to make silent update of mtime possible 

419 cursor.execute(self._sql( 

420 ''' 

421 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

422 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

423 BEGIN 

424 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

425 END 

426 ''')) 

427 

428 cursor.execute(self._sql( 

429 ''' 

430 CREATE TRIGGER IF NOT EXISTS 

431 %(db)s.%(file_states)s_delete_files 

432 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

433 BEGIN 

434 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

435 END 

436 ''')) 

437 

438 cursor.execute(self._sql( 

439 ''' 

440 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

441 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

442 BEGIN 

443 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

444 (new.kind_codes_id, 0); 

445 UPDATE %(kind_codes_count)s 

446 SET count = count + 1 

447 WHERE new.kind_codes_id 

448 == %(kind_codes_count)s.kind_codes_id; 

449 END 

450 ''')) 

451 

452 cursor.execute(self._sql( 

453 ''' 

454 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

455 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

456 BEGIN 

457 UPDATE %(kind_codes_count)s 

458 SET count = count - 1 

459 WHERE old.kind_codes_id 

460 == %(kind_codes_count)s.kind_codes_id; 

461 END 

462 ''')) 

463 

464 cursor.execute(self._register_table(self._sql( 

465 ''' 

466 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

467 kind_codes_id integer, 

468 time_seconds integer, 

469 time_offset integer, 

470 step integer) 

471 '''))) 

472 

473 cursor.execute(self._sql( 

474 ''' 

475 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

476 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

477 ''')) 

478 

479 cursor.execute(self._sql( 

480 ''' 

481 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

482 AFTER INSERT ON %(nuts)s FOR EACH ROW 

483 BEGIN 

484 INSERT OR IGNORE INTO %(coverage)s VALUES 

485 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

486 ; 

487 UPDATE %(coverage)s 

488 SET step = step + 1 

489 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

490 AND new.tmin_seconds == %(coverage)s.time_seconds 

491 AND new.tmin_offset == %(coverage)s.time_offset 

492 ; 

493 INSERT OR IGNORE INTO %(coverage)s VALUES 

494 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

495 ; 

496 UPDATE %(coverage)s 

497 SET step = step - 1 

498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

499 AND new.tmax_seconds == %(coverage)s.time_seconds 

500 AND new.tmax_offset == %(coverage)s.time_offset 

501 ; 

502 DELETE FROM %(coverage)s 

503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

504 AND new.tmin_seconds == %(coverage)s.time_seconds 

505 AND new.tmin_offset == %(coverage)s.time_offset 

506 AND step == 0 

507 ; 

508 DELETE FROM %(coverage)s 

509 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

510 AND new.tmax_seconds == %(coverage)s.time_seconds 

511 AND new.tmax_offset == %(coverage)s.time_offset 

512 AND step == 0 

513 ; 

514 END 

515 ''')) 

516 

517 cursor.execute(self._sql( 

518 ''' 

519 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

520 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

521 BEGIN 

522 INSERT OR IGNORE INTO %(coverage)s VALUES 

523 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

524 ; 

525 UPDATE %(coverage)s 

526 SET step = step - 1 

527 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

528 AND old.tmin_seconds == %(coverage)s.time_seconds 

529 AND old.tmin_offset == %(coverage)s.time_offset 

530 ; 

531 INSERT OR IGNORE INTO %(coverage)s VALUES 

532 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

533 ; 

534 UPDATE %(coverage)s 

535 SET step = step + 1 

536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

537 AND old.tmax_seconds == %(coverage)s.time_seconds 

538 AND old.tmax_offset == %(coverage)s.time_offset 

539 ; 

540 DELETE FROM %(coverage)s 

541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

542 AND old.tmin_seconds == %(coverage)s.time_seconds 

543 AND old.tmin_offset == %(coverage)s.time_offset 

544 AND step == 0 

545 ; 

546 DELETE FROM %(coverage)s 

547 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

548 AND old.tmax_seconds == %(coverage)s.time_seconds 

549 AND old.tmax_offset == %(coverage)s.time_offset 

550 AND step == 0 

551 ; 

552 END 

553 ''')) 

554 

555 def _delete(self): 

556 '''Delete database tables associated with this Squirrel.''' 

557 

558 for s in ''' 

559 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

560 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

561 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

562 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

563 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

564 DROP TABLE %(db)s.%(nuts)s; 

565 DROP TABLE %(db)s.%(kind_codes_count)s; 

566 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

567 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

568 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

569 '''.strip().splitlines(): 

570 

571 self._conn.execute(self._sql(s)) 

572 

573 Selection._delete(self) 

574 

575 @filldocs 

576 def add(self, 

577 paths, 

578 kinds=None, 

579 format='detect', 

580 include=None, 

581 exclude=None, 

582 check=True): 

583 

584 ''' 

585 Add files to the selection. 

586 

587 :param paths: 

588 Iterator yielding paths to files or directories to be added to the 

589 selection. Recurses into directories. If given a ``str``, it 

590 is treated as a single path to be added. 

591 :type paths: 

592 :py:class:`list` of :py:class:`str` 

593 

594 :param kinds: 

595 Content types to be made available through the Squirrel selection. 

596 By default, all known content types are accepted. 

597 :type kinds: 

598 :py:class:`list` of :py:class:`str` 

599 

600 :param format: 

601 File format identifier or ``'detect'`` to enable auto-detection 

602 (available: %(file_formats)s). 

603 :type format: 

604 str 

605 

606 :param include: 

607 If not ``None``, files are only included if their paths match the 

608 given regular expression pattern. 

609 :type format: 

610 str 

611 

612 :param exclude: 

613 If not ``None``, files are only included if their paths do not 

614 match the given regular expression pattern. 

615 :type format: 

616 str 

617 

618 :param check: 

619 If ``True``, all file modification times are checked to see if 

620 cached information has to be updated (slow). If ``False``, only 

621 previously unknown files are indexed and cached information is used 

622 for known files, regardless of file state (fast, corrresponds to 

623 Squirrel's ``--optimistic`` mode). File deletions will go 

624 undetected in the latter case. 

625 :type check: 

626 bool 

627 

628 :Complexity: 

629 O(log N) 

630 ''' 

631 

632 if isinstance(kinds, str): 

633 kinds = (kinds,) 

634 

635 if isinstance(paths, str): 

636 paths = [paths] 

637 

638 kind_mask = model.to_kind_mask(kinds) 

639 

640 with progress.view(): 

641 Selection.add( 

642 self, util.iter_select_files( 

643 paths, 

644 show_progress=False, 

645 include=include, 

646 exclude=exclude, 

647 pass_through=lambda path: path.startswith('virtual:') 

648 ), kind_mask, format) 

649 

650 self._load(check) 

651 self._update_nuts() 

652 

653 def reload(self): 

654 ''' 

655 Check for modifications and reindex modified files. 

656 

657 Based on file modification times. 

658 ''' 

659 

660 self._set_file_states_force_check() 

661 self._load(check=True) 

662 self._update_nuts() 

663 

664 def add_virtual(self, nuts, virtual_paths=None): 

665 ''' 

666 Add content which is not backed by files. 

667 

668 :param nuts: 

669 Content pieces to be added. 

670 :type nuts: 

671 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

672 

673 :param virtual_paths: 

674 List of virtual paths to prevent creating a temporary list of the 

675 nuts while aggregating the file paths for the selection. 

676 :type virtual_paths: 

677 :py:class:`list` of :py:class:`str` 

678 

679 Stores to the main database and the selection. 

680 ''' 

681 

682 if isinstance(virtual_paths, str): 

683 virtual_paths = [virtual_paths] 

684 

685 if virtual_paths is None: 

686 if not isinstance(nuts, list): 

687 nuts = list(nuts) 

688 virtual_paths = set(nut.file_path for nut in nuts) 

689 

690 Selection.add(self, virtual_paths) 

691 self.get_database().dig(nuts) 

692 self._update_nuts() 

693 

694 def add_volatile(self, nuts): 

695 if not isinstance(nuts, list): 

696 nuts = list(nuts) 

697 

698 paths = list(set(nut.file_path for nut in nuts)) 

699 io.backends.virtual.add_nuts(nuts) 

700 self.add_virtual(nuts, paths) 

701 self._volatile_paths.extend(paths) 

702 

703 def add_volatile_waveforms(self, traces): 

704 ''' 

705 Add in-memory waveforms which will be removed when the app closes. 

706 ''' 

707 

708 name = model.random_name() 

709 

710 path = 'virtual:volatile:%s' % name 

711 

712 nuts = [] 

713 for itr, tr in enumerate(traces): 

714 assert tr.tmin <= tr.tmax 

715 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

716 tmax_seconds, tmax_offset = model.tsplit( 

717 tr.tmin + tr.data_len()*tr.deltat) 

718 

719 nuts.append(model.Nut( 

720 file_path=path, 

721 file_format='virtual', 

722 file_segment=itr, 

723 file_element=0, 

724 file_mtime=0, 

725 codes=separator.join(tr.codes), 

726 tmin_seconds=tmin_seconds, 

727 tmin_offset=tmin_offset, 

728 tmax_seconds=tmax_seconds, 

729 tmax_offset=tmax_offset, 

730 deltat=tr.deltat, 

731 kind_id=to_kind_id('waveform'), 

732 content=tr)) 

733 

734 self.add_volatile(nuts) 

735 return path 

736 

737 def _load(self, check): 

738 for _ in io.iload( 

739 self, 

740 content=[], 

741 skip_unchanged=True, 

742 check=check): 

743 pass 

744 

745 def _update_nuts(self): 

746 transaction = self.transaction() 

747 with make_task('Aggregating selection') as task, \ 

748 transaction as cursor: 

749 

750 self._conn.set_progress_handler(task.update, 100000) 

751 nrows = cursor.execute(self._sql( 

752 ''' 

753 INSERT INTO %(db)s.%(nuts)s 

754 SELECT NULL, 

755 nuts.file_id, nuts.file_segment, nuts.file_element, 

756 nuts.kind_id, nuts.kind_codes_id, 

757 nuts.tmin_seconds, nuts.tmin_offset, 

758 nuts.tmax_seconds, nuts.tmax_offset, 

759 nuts.kscale 

760 FROM %(db)s.%(file_states)s 

761 INNER JOIN nuts 

762 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

763 INNER JOIN kind_codes 

764 ON nuts.kind_codes_id == 

765 kind_codes.kind_codes_id 

766 WHERE %(db)s.%(file_states)s.file_state != 2 

767 AND (((1 << kind_codes.kind_id) 

768 & %(db)s.%(file_states)s.kind_mask) != 0) 

769 ''')).rowcount 

770 

771 task.update(nrows) 

772 self._set_file_states_known(transaction) 

773 self._conn.set_progress_handler(None, 0) 

774 

775 def add_source(self, source, check=True): 

776 ''' 

777 Add remote resource. 

778 

779 :param source: 

780 Remote data access client instance. 

781 :type source: 

782 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

783 ''' 

784 

785 self._sources.append(source) 

786 source.setup(self, check=check) 

787 

788 def add_fdsn(self, *args, **kwargs): 

789 ''' 

790 Add FDSN site for transparent remote data access. 

791 

792 Arguments are passed to 

793 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

794 ''' 

795 

796 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

797 

798 def add_catalog(self, *args, **kwargs): 

799 ''' 

800 Add online catalog for transparent event data access. 

801 

802 Arguments are passed to 

803 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

804 ''' 

805 

806 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

807 

808 def add_dataset(self, ds, check=True, warn_persistent=True): 

809 ''' 

810 Read dataset description from file and add its contents. 

811 

812 :param ds: 

813 Path to dataset description file or dataset description object 

814 . See :py:mod:`~pyrocko.squirrel.dataset`. 

815 :type ds: 

816 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

817 

818 :param check: 

819 If ``True``, all file modification times are checked to see if 

820 cached information has to be updated (slow). If ``False``, only 

821 previously unknown files are indexed and cached information is used 

822 for known files, regardless of file state (fast, corrresponds to 

823 Squirrel's ``--optimistic`` mode). File deletions will go 

824 undetected in the latter case. 

825 :type check: 

826 bool 

827 ''' 

828 if isinstance(ds, str): 

829 ds = dataset.read_dataset(ds) 

830 path = ds 

831 else: 

832 path = None 

833 

834 if warn_persistent and ds.persistent and ( 

835 not self._persistent or (self._persistent != ds.persistent)): 

836 

837 logger.warning( 

838 'Dataset `persistent` flag ignored. Can not be set on already ' 

839 'existing Squirrel instance.%s' % ( 

840 ' Dataset: %s' % path if path else '')) 

841 

842 ds.setup(self, check=check) 

843 

844 def _get_selection_args( 

845 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

846 

847 if time is not None: 

848 tmin = time 

849 tmax = time 

850 

851 if obj is not None: 

852 tmin = tmin if tmin is not None else obj.tmin 

853 tmax = tmax if tmax is not None else obj.tmax 

854 codes = codes if codes is not None else codes_inflate2(obj.codes) 

855 

856 if isinstance(codes, str): 

857 codes = tuple(codes.split('.')) 

858 

859 return tmin, tmax, codes 

860 

861 def _selection_args_to_kwargs( 

862 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

863 

864 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

865 

866 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

867 

868 tmin_seconds, tmin_offset = model.tsplit(tmin) 

869 tmax_seconds, tmax_offset = model.tsplit(tmax) 

870 if naiv: 

871 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

872 args.append(tmax_seconds) 

873 else: 

874 tscale_edges = model.tscale_edges 

875 tmin_cond = [] 

876 for kscale in range(tscale_edges.size + 1): 

877 if kscale != tscale_edges.size: 

878 tscale = int(tscale_edges[kscale]) 

879 tmin_cond.append(''' 

880 (%(db)s.%(nuts)s.kind_id = ? 

881 AND %(db)s.%(nuts)s.kscale == ? 

882 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

883 ''') 

884 args.extend( 

885 (to_kind_id(kind), kscale, 

886 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

887 

888 else: 

889 tmin_cond.append(''' 

890 (%(db)s.%(nuts)s.kind_id == ? 

891 AND %(db)s.%(nuts)s.kscale == ? 

892 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

893 ''') 

894 

895 args.extend( 

896 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

897 if tmin_cond: 

898 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

899 

900 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

901 args.append(tmin_seconds) 

902 

903 def iter_nuts( 

904 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

905 kind_codes_ids=None, path=None): 

906 

907 ''' 

908 Iterate over content entities matching given constraints. 

909 

910 :param kind: 

911 Content kind (or kinds) to extract. 

912 :type kind: 

913 :py:class:`str`, :py:class:`list` of :py:class:`str` 

914 

915 :param tmin: 

916 Start time of query interval. 

917 :type tmin: 

918 timestamp 

919 

920 :param tmax: 

921 End time of query interval. 

922 :type tmax: 

923 timestamp 

924 

925 :param codes: 

926 Pattern of content codes to query. 

927 :type codes: 

928 :py:class:`tuple` of :py:class:`str` 

929 

930 :param naiv: 

931 Bypass time span lookup through indices (slow, for testing). 

932 :type naiv: 

933 :py:class:`bool` 

934 

935 :param kind_codes_ids: 

936 Kind-codes IDs of contents to be retrieved (internal use). 

937 :type kind_codes_ids: 

938 :py:class:`list` of :py:class:`str` 

939 

940 :yields: 

941 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

942 intersecting content. 

943 

944 :complexity: 

945 O(log N) for the time selection part due to heavy use of database 

946 indices. 

947 

948 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

949 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

950 closed-interval so that content intersecting with the time instant ``t 

951 = tmin = tmax`` is returned (otherwise nothing would be returned as 

952 ``[t, t)`` never matches anything). 

953 

954 Time spans of content entities to be matched are also treated as half 

955 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

956 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

957 modified to closed-interval when the content time span is an empty 

958 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

959 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

960 ''' 

961 

962 if not isinstance(kind, str): 

963 if kind is None: 

964 kind = model.g_content_kinds 

965 for kind_ in kind: 

966 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

967 yield nut 

968 

969 return 

970 

971 cond = [] 

972 args = [] 

973 if tmin is not None or tmax is not None: 

974 assert kind is not None 

975 if tmin is None: 

976 tmin = self.get_time_span()[0] 

977 if tmax is None: 

978 tmax = self.get_time_span()[1] + 1.0 

979 

980 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

981 

982 elif kind is not None: 

983 cond.append('kind_codes.kind_id == ?') 

984 args.append(to_kind_id(kind)) 

985 

986 if codes is not None: 

987 pats = codes_patterns_for_kind(kind, codes) 

988 if pats: 

989 cond.append( 

990 ' ( %s ) ' % ' OR '.join( 

991 ('kind_codes.codes GLOB ?',) * len(pats))) 

992 args.extend(separator.join(pat) for pat in pats) 

993 

994 if kind_codes_ids is not None: 

995 cond.append( 

996 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

997 '?'*len(kind_codes_ids))) 

998 

999 args.extend(kind_codes_ids) 

1000 

1001 db = self.get_database() 

1002 if path is not None: 

1003 cond.append('files.path == ?') 

1004 args.append(db.relpath(abspath(path))) 

1005 

1006 sql = (''' 

1007 SELECT 

1008 files.path, 

1009 files.format, 

1010 files.mtime, 

1011 files.size, 

1012 %(db)s.%(nuts)s.file_segment, 

1013 %(db)s.%(nuts)s.file_element, 

1014 kind_codes.kind_id, 

1015 kind_codes.codes, 

1016 %(db)s.%(nuts)s.tmin_seconds, 

1017 %(db)s.%(nuts)s.tmin_offset, 

1018 %(db)s.%(nuts)s.tmax_seconds, 

1019 %(db)s.%(nuts)s.tmax_offset, 

1020 kind_codes.deltat 

1021 FROM files 

1022 INNER JOIN %(db)s.%(nuts)s 

1023 ON files.file_id == %(db)s.%(nuts)s.file_id 

1024 INNER JOIN kind_codes 

1025 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1026 ''') 

1027 

1028 if cond: 

1029 sql += ''' WHERE ''' + ' AND '.join(cond) 

1030 

1031 sql = self._sql(sql) 

1032 if tmin is None and tmax is None: 

1033 for row in self._conn.execute(sql, args): 

1034 row = (db.abspath(row[0]),) + row[1:] 

1035 nut = model.Nut(values_nocheck=row) 

1036 yield nut 

1037 else: 

1038 assert tmin is not None and tmax is not None 

1039 if tmin == tmax: 

1040 for row in self._conn.execute(sql, args): 

1041 row = (db.abspath(row[0]),) + row[1:] 

1042 nut = model.Nut(values_nocheck=row) 

1043 if (nut.tmin <= tmin < nut.tmax) \ 

1044 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1045 

1046 yield nut 

1047 else: 

1048 for row in self._conn.execute(sql, args): 

1049 row = (db.abspath(row[0]),) + row[1:] 

1050 nut = model.Nut(values_nocheck=row) 

1051 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1052 or (nut.tmin == nut.tmax 

1053 and tmin <= nut.tmin < tmax): 

1054 

1055 yield nut 

1056 

1057 def get_nuts(self, *args, **kwargs): 

1058 ''' 

1059 Get content entities matching given constraints. 

1060 

1061 Like :py:meth:`iter_nuts` but returns results as a list. 

1062 ''' 

1063 

1064 return list(self.iter_nuts(*args, **kwargs)) 

1065 

1066 def _split_nuts( 

1067 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1068 

1069 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1070 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1071 

1072 names_main_nuts = dict(self._names) 

1073 names_main_nuts.update(db='main', nuts='nuts') 

1074 

1075 db = self.get_database() 

1076 

1077 def main_nuts(s): 

1078 return s % names_main_nuts 

1079 

1080 with self.transaction() as cursor: 

1081 # modify selection and main 

1082 for sql_subst in [ 

1083 self._sql, main_nuts]: 

1084 

1085 cond = [] 

1086 args = [] 

1087 

1088 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1089 

1090 if codes is not None: 

1091 pats = codes_patterns_for_kind(kind, codes) 

1092 if pats: 

1093 cond.append( 

1094 ' ( %s ) ' % ' OR '.join( 

1095 ('kind_codes.codes GLOB ?',) * len(pats))) 

1096 args.extend(separator.join(pat) for pat in pats) 

1097 

1098 if path is not None: 

1099 cond.append('files.path == ?') 

1100 args.append(db.relpath(abspath(path))) 

1101 

1102 sql = sql_subst(''' 

1103 SELECT 

1104 %(db)s.%(nuts)s.nut_id, 

1105 %(db)s.%(nuts)s.tmin_seconds, 

1106 %(db)s.%(nuts)s.tmin_offset, 

1107 %(db)s.%(nuts)s.tmax_seconds, 

1108 %(db)s.%(nuts)s.tmax_offset, 

1109 kind_codes.deltat 

1110 FROM files 

1111 INNER JOIN %(db)s.%(nuts)s 

1112 ON files.file_id == %(db)s.%(nuts)s.file_id 

1113 INNER JOIN kind_codes 

1114 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1115 WHERE ''' + ' AND '.join(cond)) # noqa 

1116 

1117 insert = [] 

1118 delete = [] 

1119 for row in cursor.execute(sql, args): 

1120 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1121 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1122 

1123 nut_tmin = model.tjoin( 

1124 nut_tmin_seconds, nut_tmin_offset) 

1125 nut_tmax = model.tjoin( 

1126 nut_tmax_seconds, nut_tmax_offset) 

1127 

1128 if nut_tmin < tmax and tmin < nut_tmax: 

1129 if nut_tmin < tmin: 

1130 insert.append(( 

1131 nut_tmin_seconds, nut_tmin_offset, 

1132 tmin_seconds, tmin_offset, 

1133 model.tscale_to_kscale( 

1134 tmin_seconds - nut_tmin_seconds), 

1135 nut_id)) 

1136 

1137 if tmax < nut_tmax: 

1138 insert.append(( 

1139 tmax_seconds, tmax_offset, 

1140 nut_tmax_seconds, nut_tmax_offset, 

1141 model.tscale_to_kscale( 

1142 nut_tmax_seconds - tmax_seconds), 

1143 nut_id)) 

1144 

1145 delete.append((nut_id,)) 

1146 

1147 sql_add = ''' 

1148 INSERT INTO %(db)s.%(nuts)s ( 

1149 file_id, file_segment, file_element, kind_id, 

1150 kind_codes_id, tmin_seconds, tmin_offset, 

1151 tmax_seconds, tmax_offset, kscale ) 

1152 SELECT 

1153 file_id, file_segment, file_element, 

1154 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1155 FROM %(db)s.%(nuts)s 

1156 WHERE nut_id == ? 

1157 ''' 

1158 cursor.executemany(sql_subst(sql_add), insert) 

1159 

1160 sql_delete = ''' 

1161 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1162 ''' 

1163 cursor.executemany(sql_subst(sql_delete), delete) 

1164 

1165 def get_time_span(self, kinds=None): 

1166 ''' 

1167 Get time interval over all content in selection. 

1168 

1169 :param kinds: 

1170 If not ``None``, restrict query to given content kinds. 

1171 :type kind: 

1172 list of str 

1173 

1174 :complexity: 

1175 O(1), independent of the number of nuts. 

1176 

1177 :returns: 

1178 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1179 ''' 

1180 

1181 sql_min = self._sql(''' 

1182 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1183 FROM %(db)s.%(nuts)s 

1184 WHERE kind_id == ? 

1185 AND tmin_seconds == ( 

1186 SELECT MIN(tmin_seconds) 

1187 FROM %(db)s.%(nuts)s 

1188 WHERE kind_id == ?) 

1189 ''') 

1190 

1191 sql_max = self._sql(''' 

1192 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1193 FROM %(db)s.%(nuts)s 

1194 WHERE kind_id == ? 

1195 AND tmax_seconds == ( 

1196 SELECT MAX(tmax_seconds) 

1197 FROM %(db)s.%(nuts)s 

1198 WHERE kind_id == ?) 

1199 ''') 

1200 

1201 gtmin = None 

1202 gtmax = None 

1203 

1204 if isinstance(kinds, str): 

1205 kinds = [kinds] 

1206 

1207 if kinds is None: 

1208 kind_ids = model.g_content_kind_ids 

1209 else: 

1210 kind_ids = model.to_kind_ids(kinds) 

1211 

1212 for kind_id in kind_ids: 

1213 for tmin_seconds, tmin_offset in self._conn.execute( 

1214 sql_min, (kind_id, kind_id)): 

1215 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1216 if tmin is not None and (gtmin is None or tmin < gtmin): 

1217 gtmin = tmin 

1218 

1219 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1220 sql_max, (kind_id, kind_id)): 

1221 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1222 if tmax is not None and (gtmax is None or tmax > gtmax): 

1223 gtmax = tmax 

1224 

1225 return gtmin, gtmax 

1226 

1227 def has(self, kinds): 

1228 ''' 

1229 Check availability of given content kinds. 

1230 

1231 :param kinds: 

1232 Content kinds to query. 

1233 :type kind: 

1234 list of str 

1235 

1236 :returns: 

1237 ``True`` if any of the queried content kinds is available 

1238 in the selection. 

1239 ''' 

1240 self_tmin, self_tmax = self.get_time_span(kinds) 

1241 

1242 return None not in (self_tmin, self_tmax) 

1243 

1244 def get_deltat_span(self, kind): 

1245 ''' 

1246 Get min and max sampling interval of all content of given kind. 

1247 

1248 :param kind: 

1249 Content kind 

1250 :type kind: 

1251 str 

1252 

1253 :returns: ``(deltat_min, deltat_max)`` 

1254 ''' 

1255 

1256 deltats = [ 

1257 deltat for deltat in self.get_deltats(kind) 

1258 if deltat is not None] 

1259 

1260 if deltats: 

1261 return min(deltats), max(deltats) 

1262 else: 

1263 return None, None 

1264 

1265 def iter_kinds(self, codes=None): 

1266 ''' 

1267 Iterate over content types available in selection. 

1268 

1269 :param codes: 

1270 If given, get kinds only for selected codes identifier. 

1271 :type codes: 

1272 :py:class:`tuple` of :py:class:`str` 

1273 

1274 :yields: 

1275 Available content kinds as :py:class:`str`. 

1276 

1277 :complexity: 

1278 O(1), independent of number of nuts. 

1279 ''' 

1280 

1281 return self._database._iter_kinds( 

1282 codes=codes, 

1283 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1284 

1285 def iter_deltats(self, kind=None): 

1286 ''' 

1287 Iterate over sampling intervals available in selection. 

1288 

1289 :param kind: 

1290 If given, get sampling intervals only for a given content type. 

1291 :type kind: 

1292 str 

1293 

1294 :yields: 

1295 :py:class:`float` values. 

1296 

1297 :complexity: 

1298 O(1), independent of number of nuts. 

1299 ''' 

1300 return self._database._iter_deltats( 

1301 kind=kind, 

1302 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1303 

1304 def iter_codes(self, kind=None): 

1305 ''' 

1306 Iterate over content identifier code sequences available in selection. 

1307 

1308 :param kind: 

1309 If given, get codes only for a given content type. 

1310 :type kind: 

1311 str 

1312 

1313 :yields: 

1314 :py:class:`tuple` of :py:class:`str` 

1315 

1316 :complexity: 

1317 O(1), independent of number of nuts. 

1318 ''' 

1319 return self._database._iter_codes( 

1320 kind=kind, 

1321 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1322 

1323 def iter_counts(self, kind=None): 

1324 ''' 

1325 Iterate over number of occurrences of any (kind, codes) combination. 

1326 

1327 :param kind: 

1328 If given, get counts only for selected content type. 

1329 :type kind: 

1330 str 

1331 

1332 :yields: 

1333 Tuples of the form ``((kind, codes), count)``. 

1334 

1335 :complexity: 

1336 O(1), independent of number of nuts. 

1337 ''' 

1338 return self._database._iter_counts( 

1339 kind=kind, 

1340 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1341 

1342 def get_kinds(self, codes=None): 

1343 ''' 

1344 Get content types available in selection. 

1345 

1346 :param codes: 

1347 If given, get kinds only for selected codes identifier. 

1348 :type codes: 

1349 :py:class:`tuple` of :py:class:`str` 

1350 

1351 :returns: 

1352 Sorted list of available content types. 

1353 

1354 :complexity: 

1355 O(1), independent of number of nuts. 

1356 

1357 ''' 

1358 return sorted(list(self.iter_kinds(codes=codes))) 

1359 

1360 def get_deltats(self, kind=None): 

1361 ''' 

1362 Get sampling intervals available in selection. 

1363 

1364 :param kind: 

1365 If given, get sampling intervals only for selected content type. 

1366 :type kind: 

1367 str 

1368 

1369 :complexity: 

1370 O(1), independent of number of nuts. 

1371 

1372 :returns: Sorted list of available sampling intervals. 

1373 ''' 

1374 return sorted(list(self.iter_deltats(kind=kind))) 

1375 

1376 def get_codes(self, kind=None): 

1377 ''' 

1378 Get identifier code sequences available in selection. 

1379 

1380 :param kind: 

1381 If given, get codes only for selected content type. 

1382 :type kind: 

1383 str 

1384 

1385 :complexity: 

1386 O(1), independent of number of nuts. 

1387 

1388 :returns: Sorted list of available codes as tuples of strings. 

1389 ''' 

1390 return sorted(list(self.iter_codes(kind=kind))) 

1391 

1392 def get_counts(self, kind=None): 

1393 ''' 

1394 Get number of occurrences of any (kind, codes) combination. 

1395 

1396 :param kind: 

1397 If given, get codes only for selected content type. 

1398 :type kind: 

1399 str 

1400 

1401 :complexity: 

1402 O(1), independent of number of nuts. 

1403 

1404 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1405 if kind is not ``None`` 

1406 ''' 

1407 d = {} 

1408 for (k, codes, deltat), count in self.iter_counts(): 

1409 if k not in d: 

1410 v = d[k] = {} 

1411 else: 

1412 v = d[k] 

1413 

1414 if codes not in v: 

1415 v[codes] = 0 

1416 

1417 v[codes] += count 

1418 

1419 if kind is not None: 

1420 return d[kind] 

1421 else: 

1422 return d 

1423 

1424 def glob_codes(self, kind, codes_list): 

1425 ''' 

1426 Find codes matching given patterns. 

1427 

1428 :param kind: 

1429 Content kind to be queried. 

1430 :type kind: 

1431 str 

1432 

1433 :param codes_list: 

1434 List of code patterns to query. If not given or empty, an empty 

1435 list is returned. 

1436 :type codes_list: 

1437 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

1438 

1439 :returns: 

1440 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1441 ''' 

1442 

1443 args = [to_kind_id(kind)] 

1444 pats = [] 

1445 for codes in codes_list: 

1446 pats.extend(codes_patterns_for_kind(kind, codes)) 

1447 

1448 if pats: 

1449 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1450 ('kind_codes.codes GLOB ?',) * len(pats)) 

1451 

1452 args.extend(separator.join(pat) for pat in pats) 

1453 else: 

1454 codes_cond = '' 

1455 

1456 sql = self._sql(''' 

1457 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1458 WHERE 

1459 kind_id == ? ''' + codes_cond) 

1460 

1461 return list(map(list, self._conn.execute(sql, args))) 

1462 

1463 def update(self, constraint=None, **kwargs): 

1464 ''' 

1465 Update or partially update channel and event inventories. 

1466 

1467 :param constraint: 

1468 Selection of times or areas to be brought up to date. 

1469 :type constraint: 

1470 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1471 

1472 :param \\*\\*kwargs: 

1473 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1474 

1475 This function triggers all attached remote sources, to check for 

1476 updates in the meta-data. The sources will only submit queries when 

1477 their expiration date has passed, or if the selection spans into 

1478 previously unseen times or areas. 

1479 ''' 

1480 

1481 if constraint is None: 

1482 constraint = client.Constraint(**kwargs) 

1483 

1484 for source in self._sources: 

1485 source.update_channel_inventory(self, constraint) 

1486 source.update_event_inventory(self, constraint) 

1487 

1488 def update_waveform_promises(self, constraint=None, **kwargs): 

1489 ''' 

1490 Permit downloading of remote waveforms. 

1491 

1492 :param constraint: 

1493 Remote waveforms compatible with the given constraint are enabled 

1494 for download. 

1495 :type constraint: 

1496 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1497 

1498 :param \\*\\*kwargs: 

1499 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1500 

1501 Calling this method permits Squirrel to download waveforms from remote 

1502 sources when processing subsequent waveform requests. This works by 

1503 inserting so called waveform promises into the database. It will look 

1504 into the available channels for each remote source and create a promise 

1505 for each channel compatible with the given constraint. If the promise 

1506 then matches in a waveform request, Squirrel tries to download the 

1507 waveform. If the download is successful, the downloaded waveform is 

1508 added to the Squirrel and the promise is deleted. If the download 

1509 fails, the promise is kept if the reason of failure looks like being 

1510 temporary, e.g. because of a network failure. If the cause of failure 

1511 however seems to be permanent, the promise is deleted so that no 

1512 further attempts are made to download a waveform which might not be 

1513 available from that server at all. To force re-scheduling after a 

1514 permanent failure, call :py:meth:`update_waveform_promises` 

1515 yet another time. 

1516 ''' 

1517 

1518 if constraint is None: 

1519 constraint = client.Constraint(**kwargs) 

1520 

1521 # TODO 

1522 print('contraint ignored atm') 

1523 

1524 for source in self._sources: 

1525 source.update_waveform_promises(self, constraint) 

1526 

1527 def update_responses(self, constraint=None, **kwargs): 

1528 # TODO 

1529 if constraint is None: 

1530 constraint = client.Constraint(**kwargs) 

1531 

1532 print('contraint ignored atm') 

1533 for source in self._sources: 

1534 source.update_response_inventory(self, constraint) 

1535 

1536 def get_nfiles(self): 

1537 ''' 

1538 Get number of files in selection. 

1539 ''' 

1540 

1541 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1542 for row in self._conn.execute(sql): 

1543 return row[0] 

1544 

1545 def get_nnuts(self): 

1546 ''' 

1547 Get number of nuts in selection. 

1548 ''' 

1549 

1550 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1551 for row in self._conn.execute(sql): 

1552 return row[0] 

1553 

1554 def get_total_size(self): 

1555 ''' 

1556 Get aggregated file size available in selection. 

1557 ''' 

1558 

1559 sql = self._sql(''' 

1560 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1561 INNER JOIN files 

1562 ON %(db)s.%(file_states)s.file_id = files.file_id 

1563 ''') 

1564 

1565 for row in self._conn.execute(sql): 

1566 return row[0] or 0 

1567 

1568 def get_stats(self): 

1569 ''' 

1570 Get statistics on contents available through this selection. 

1571 ''' 

1572 

1573 kinds = self.get_kinds() 

1574 time_spans = {} 

1575 for kind in kinds: 

1576 time_spans[kind] = self.get_time_span([kind]) 

1577 

1578 return SquirrelStats( 

1579 nfiles=self.get_nfiles(), 

1580 nnuts=self.get_nnuts(), 

1581 kinds=kinds, 

1582 codes=self.get_codes(), 

1583 total_size=self.get_total_size(), 

1584 counts=self.get_counts(), 

1585 time_spans=time_spans, 

1586 sources=[s.describe() for s in self._sources], 

1587 operators=[op.describe() for op in self._operators]) 

1588 

1589 def get_content( 

1590 self, 

1591 nut, 

1592 cache_id='default', 

1593 accessor_id='default', 

1594 show_progress=False): 

1595 

1596 ''' 

1597 Get and possibly load full content for a given index entry from file. 

1598 

1599 Loads the actual content objects (channel, station, waveform, ...) from 

1600 file. For efficiency sibling content (all stuff in the same file 

1601 segment) will also be loaded as a side effect. The loaded contents are 

1602 cached in the Squirrel object. 

1603 ''' 

1604 

1605 content_cache = self._content_caches[cache_id] 

1606 if not content_cache.has(nut): 

1607 

1608 for nut_loaded in io.iload( 

1609 nut.file_path, 

1610 segment=nut.file_segment, 

1611 format=nut.file_format, 

1612 database=self._database, 

1613 show_progress=show_progress): 

1614 

1615 content_cache.put(nut_loaded) 

1616 

1617 try: 

1618 return content_cache.get(nut, accessor_id) 

1619 except KeyError: 

1620 raise error.NotAvailable( 

1621 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1622 

1623 def advance_accessor(self, accessor_id, cache_id=None): 

1624 ''' 

1625 Notify memory caches about consumer moving to a new data batch. 

1626 

1627 :param accessor_id: 

1628 Name of accessing consumer to be advanced. 

1629 :type accessor_id: 

1630 str 

1631 

1632 :param cache_id: 

1633 Name of cache to for which the accessor should be advanced. By 

1634 default the named accessor is advanced in all registered caches. 

1635 By default, two caches named ``'default'`` and ``'waveforms'`` are 

1636 available. 

1637 :type cache_id: 

1638 str 

1639 

1640 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1641 Squirrel's memory caching works and can be tuned. Default behaviour is 

1642 to release data when it has not been used in the latest data 

1643 window/batch. If the accessor is never advanced, data is cached 

1644 indefinitely - which is often desired e.g. for station meta-data. 

1645 Methods for consecutive data traversal, like 

1646 :py:meth:`chopper_waveforms` automatically advance and clear 

1647 their accessor. 

1648 ''' 

1649 for cache_ in ( 

1650 self._content_caches.keys() 

1651 if cache_id is None 

1652 else [cache_id]): 

1653 

1654 self._content_caches[cache_].advance_accessor(accessor_id) 

1655 

1656 def clear_accessor(self, accessor_id, cache_id=None): 

1657 ''' 

1658 Notify memory caches about a consumer having finished. 

1659 

1660 :param accessor_id: 

1661 Name of accessor to be cleared. 

1662 :type accessor_id: 

1663 str 

1664 

1665 :param cache_id: 

1666 Name of cache for which the accessor should be cleared. By default 

1667 the named accessor is cleared from all registered caches. By 

1668 default, two caches named ``'default'`` and ``'waveforms'`` are 

1669 available. 

1670 :type cache_id: 

1671 str 

1672 

1673 Calling this method clears all references to cache entries held by the 

1674 named accessor. Cache entries are then freed if not referenced by any 

1675 other accessor. 

1676 ''' 

1677 

1678 for cache_ in ( 

1679 self._content_caches.keys() 

1680 if cache_id is None 

1681 else [cache_id]): 

1682 

1683 self._content_caches[cache_].clear_accessor(accessor_id) 

1684 

1685 def get_cache_stats(self, cache_id): 

1686 return self._content_caches[cache_id].get_stats() 

1687 

1688 def _check_duplicates(self, nuts): 

1689 d = defaultdict(list) 

1690 for nut in nuts: 

1691 d[nut.codes].append(nut) 

1692 

1693 for codes, group in d.items(): 

1694 if len(group) > 1: 

1695 logger.warning( 

1696 'Multiple entries matching codes: %s' 

1697 % '.'.join(codes.split(separator))) 

1698 

1699 @filldocs 

1700 def get_stations( 

1701 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1702 model='squirrel'): 

1703 

1704 ''' 

1705 Get stations matching given constraints. 

1706 

1707 %(query_args)s 

1708 

1709 :param model: 

1710 Select object model for returned values: ``'squirrel'`` to get 

1711 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1712 objects with channel information attached. 

1713 :type model: 

1714 str 

1715 

1716 :returns: 

1717 List of :py:class:`pyrocko.squirrel.Station 

1718 <pyrocko.squirrel.model.Station>` objects by default or list of 

1719 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1720 objects if ``model='pyrocko'`` is requested. 

1721 

1722 See :py:meth:`iter_nuts` for details on time span matching. 

1723 ''' 

1724 

1725 if model == 'pyrocko': 

1726 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1727 elif model == 'squirrel': 

1728 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1729 nuts = sorted( 

1730 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1731 self._check_duplicates(nuts) 

1732 return [self.get_content(nut) for nut in nuts] 

1733 else: 

1734 raise ValueError('Invalid station model: %s' % model) 

1735 

1736 @filldocs 

1737 def get_channels( 

1738 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1739 

1740 ''' 

1741 Get channels matching given constraints. 

1742 

1743 %(query_args)s 

1744 

1745 :returns: 

1746 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1747 

1748 See :py:meth:`iter_nuts` for details on time span matching. 

1749 ''' 

1750 

1751 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1752 nuts = sorted( 

1753 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1754 self._check_duplicates(nuts) 

1755 return [self.get_content(nut) for nut in nuts] 

1756 

1757 @filldocs 

1758 def get_sensors( 

1759 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1760 

1761 ''' 

1762 Get sensors matching given constraints. 

1763 

1764 %(query_args)s 

1765 

1766 :returns: 

1767 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1768 

1769 See :py:meth:`iter_nuts` for details on time span matching. 

1770 ''' 

1771 

1772 tmin, tmax, codes = self._get_selection_args( 

1773 obj, tmin, tmax, time, codes) 

1774 

1775 if codes is not None: 

1776 if isinstance(codes, str): 

1777 codes = codes.split('.') 

1778 codes = tuple(codes_inflate(codes)) 

1779 if codes[4] != '*': 

1780 codes = codes[:4] + (codes[4][:-1] + '?',) + codes[5:] 

1781 

1782 nuts = sorted( 

1783 self.iter_nuts( 

1784 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1785 self._check_duplicates(nuts) 

1786 return model.Sensor.from_channels( 

1787 self.get_content(nut) for nut in nuts) 

1788 

1789 @filldocs 

1790 def get_responses( 

1791 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1792 

1793 ''' 

1794 Get instrument responses matching given constraints. 

1795 

1796 %(query_args)s 

1797 

1798 :returns: 

1799 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1800 

1801 See :py:meth:`iter_nuts` for details on time span matching. 

1802 ''' 

1803 

1804 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1805 nuts = sorted( 

1806 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1807 self._check_duplicates(nuts) 

1808 return [self.get_content(nut) for nut in nuts] 

1809 

1810 @filldocs 

1811 def get_response( 

1812 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1813 

1814 ''' 

1815 Get instrument response matching given constraints. 

1816 

1817 %(query_args)s 

1818 

1819 :returns: 

1820 :py:class:`~pyrocko.squirrel.model.Response` object. 

1821 

1822 Same as :py:meth:`get_responses` but returning exactly one response. 

1823 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1824 than one is available. 

1825 

1826 See :py:meth:`iter_nuts` for details on time span matching. 

1827 ''' 

1828 

1829 responses = self.get_responses(obj, tmin, tmax, time, codes) 

1830 if len(responses) == 0: 

1831 raise error.NotAvailable( 

1832 'No instrument response available.') 

1833 elif len(responses) > 1: 

1834 raise error.NotAvailable( 

1835 'Multiple instrument responses matching given constraints.') 

1836 

1837 return responses[0] 

1838 

1839 @filldocs 

1840 def get_events( 

1841 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1842 

1843 ''' 

1844 Get events matching given constraints. 

1845 

1846 %(query_args)s 

1847 

1848 :returns: 

1849 List of :py:class:`~pyrocko.model.event.Event` objects. 

1850 

1851 See :py:meth:`iter_nuts` for details on time span matching. 

1852 ''' 

1853 

1854 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1855 nuts = sorted( 

1856 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1857 self._check_duplicates(nuts) 

1858 return [self.get_content(nut) for nut in nuts] 

1859 

1860 def _redeem_promises(self, *args): 

1861 

1862 tmin, tmax, _ = args 

1863 

1864 waveforms = list(self.iter_nuts('waveform', *args)) 

1865 promises = list(self.iter_nuts('waveform_promise', *args)) 

1866 

1867 codes_to_avail = defaultdict(list) 

1868 for nut in waveforms: 

1869 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1870 

1871 def tts(x): 

1872 if isinstance(x, tuple): 

1873 return tuple(tts(e) for e in x) 

1874 elif isinstance(x, list): 

1875 return list(tts(e) for e in x) 

1876 else: 

1877 return util.time_to_str(x) 

1878 

1879 orders = [] 

1880 for promise in promises: 

1881 waveforms_avail = codes_to_avail[promise.codes] 

1882 for block_tmin, block_tmax in blocks( 

1883 max(tmin, promise.tmin), 

1884 min(tmax, promise.tmax), 

1885 promise.deltat): 

1886 

1887 orders.append( 

1888 WaveformOrder( 

1889 source_id=promise.file_path, 

1890 codes=tuple(promise.codes.split(separator)), 

1891 tmin=block_tmin, 

1892 tmax=block_tmax, 

1893 deltat=promise.deltat, 

1894 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1895 

1896 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1897 

1898 order_keys_noop = set(order_key(order) for order in orders_noop) 

1899 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1900 logger.info( 

1901 'Waveform orders already satisified with cached/local data: ' 

1902 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1903 

1904 source_ids = [] 

1905 sources = {} 

1906 for source in self._sources: 

1907 if isinstance(source, fdsn.FDSNSource): 

1908 source_ids.append(source._source_id) 

1909 sources[source._source_id] = source 

1910 

1911 source_priority = dict( 

1912 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1913 

1914 order_groups = defaultdict(list) 

1915 for order in orders: 

1916 order_groups[order_key(order)].append(order) 

1917 

1918 for k, order_group in order_groups.items(): 

1919 order_group.sort( 

1920 key=lambda order: source_priority[order.source_id]) 

1921 

1922 n_order_groups = len(order_groups) 

1923 

1924 if len(order_groups) != 0 or len(orders) != 0: 

1925 logger.info( 

1926 'Waveform orders standing for download: %i (%i)' 

1927 % (len(order_groups), len(orders))) 

1928 

1929 task = make_task('Waveform orders processed', n_order_groups) 

1930 else: 

1931 task = None 

1932 

1933 def split_promise(order): 

1934 self._split_nuts( 

1935 'waveform_promise', 

1936 order.tmin, order.tmax, 

1937 codes=order.codes, 

1938 path=order.source_id) 

1939 

1940 def release_order_group(order): 

1941 okey = order_key(order) 

1942 for followup in order_groups[okey]: 

1943 split_promise(followup) 

1944 

1945 del order_groups[okey] 

1946 

1947 if task: 

1948 task.update(n_order_groups - len(order_groups)) 

1949 

1950 def noop(order): 

1951 pass 

1952 

1953 def success(order): 

1954 release_order_group(order) 

1955 split_promise(order) 

1956 

1957 def batch_add(paths): 

1958 self.add(paths) 

1959 

1960 calls = queue.Queue() 

1961 

1962 def enqueue(f): 

1963 def wrapper(*args): 

1964 calls.put((f, args)) 

1965 

1966 return wrapper 

1967 

1968 for order in orders_noop: 

1969 split_promise(order) 

1970 

1971 while order_groups: 

1972 

1973 orders_now = [] 

1974 empty = [] 

1975 for k, order_group in order_groups.items(): 

1976 try: 

1977 orders_now.append(order_group.pop(0)) 

1978 except IndexError: 

1979 empty.append(k) 

1980 

1981 for k in empty: 

1982 del order_groups[k] 

1983 

1984 by_source_id = defaultdict(list) 

1985 for order in orders_now: 

1986 by_source_id[order.source_id].append(order) 

1987 

1988 threads = [] 

1989 for source_id in by_source_id: 

1990 def download(): 

1991 try: 

1992 sources[source_id].download_waveforms( 

1993 by_source_id[source_id], 

1994 success=enqueue(success), 

1995 error_permanent=enqueue(split_promise), 

1996 error_temporary=noop, 

1997 batch_add=enqueue(batch_add)) 

1998 

1999 finally: 

2000 calls.put(None) 

2001 

2002 thread = threading.Thread(target=download) 

2003 thread.start() 

2004 threads.append(thread) 

2005 

2006 ndone = 0 

2007 while ndone < len(threads): 

2008 ret = calls.get() 

2009 if ret is None: 

2010 ndone += 1 

2011 else: 

2012 ret[0](*ret[1]) 

2013 

2014 for thread in threads: 

2015 thread.join() 

2016 

2017 if task: 

2018 task.update(n_order_groups - len(order_groups)) 

2019 

2020 if task: 

2021 task.done() 

2022 

2023 @filldocs 

2024 def get_waveform_nuts( 

2025 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2026 

2027 ''' 

2028 Get waveform content entities matching given constraints. 

2029 

2030 %(query_args)s 

2031 

2032 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2033 resolves matching waveform promises (downloads waveforms from remote 

2034 sources). 

2035 

2036 See :py:meth:`iter_nuts` for details on time span matching. 

2037 ''' 

2038 

2039 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

2040 self._redeem_promises(*args) 

2041 return sorted( 

2042 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2043 

2044 @filldocs 

2045 def get_waveforms( 

2046 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2047 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2048 maxlap=None, snap=None, include_last=False, load_data=True, 

2049 accessor_id='default', operator_params=None): 

2050 

2051 ''' 

2052 Get waveforms matching given constraints. 

2053 

2054 %(query_args)s 

2055 

2056 :param uncut: 

2057 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2058 and to disable degapping/deoverlapping. Returns untouched traces as 

2059 they are read from file segment. File segments are always read in 

2060 their entirety. 

2061 :type uncut: 

2062 bool 

2063 

2064 :param want_incomplete: 

2065 If ``True``, gappy/incomplete traces are included in the result. 

2066 :type want_incomplete: 

2067 bool 

2068 

2069 :param degap: 

2070 If ``True``, connect traces and remove gaps and overlaps. 

2071 :type degap: 

2072 bool 

2073 

2074 :param maxgap: 

2075 Maximum gap size in samples which is filled with interpolated 

2076 samples when ``degap`` is ``True``. 

2077 :type maxgap: 

2078 int 

2079 

2080 :param maxlap: 

2081 Maximum overlap size in samples which is removed when ``degap`` is 

2082 ``True``. 

2083 :type maxlap: 

2084 int 

2085 

2086 :param snap: 

2087 Rounding functions used when computing sample index from time 

2088 instance, for trace start and trace end, respectively. By default, 

2089 ``(round, round)`` is used. 

2090 :type snap: 

2091 tuple of 2 callables 

2092 

2093 :param include_last: 

2094 If ``True``, add one more sample to the returned traces (the sample 

2095 which would be the first sample of a query with ``tmin`` set to the 

2096 current value of ``tmax``). 

2097 :type include_last: 

2098 bool 

2099 

2100 :param load_data: 

2101 If ``True``, waveform data samples are read from files (or cache). 

2102 If ``False``, meta-information-only traces are returned (dummy 

2103 traces with no data samples). 

2104 :type load_data: 

2105 bool 

2106 

2107 :param accessor_id: 

2108 Name of consumer on who's behalf data is accessed. Used in cache 

2109 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2110 to distinguish different points of extraction for the decision of 

2111 when to release cached waveform data. Should be used when data is 

2112 alternately extracted from more than one region / selection. 

2113 :type accessor_id: 

2114 str 

2115 

2116 See :py:meth:`iter_nuts` for details on time span matching. 

2117 

2118 Loaded data is kept in memory (at least) until 

2119 :py:meth:`clear_accessor` has been called or 

2120 :py:meth:`advance_accessor` has been called two consecutive times 

2121 without data being accessed between the two calls (by this accessor). 

2122 Data may still be further kept in the memory cache if held alive by 

2123 consumers with a different ``accessor_id``. 

2124 ''' 

2125 

2126 tmin, tmax, codes = self._get_selection_args( 

2127 obj, tmin, tmax, time, codes) 

2128 

2129 self_tmin, self_tmax = self.get_time_span( 

2130 ['waveform', 'waveform_promise']) 

2131 

2132 if None in (self_tmin, self_tmax): 

2133 logger.warning( 

2134 'No waveforms available.') 

2135 return [] 

2136 

2137 tmin = tmin if tmin is not None else self_tmin 

2138 tmax = tmax if tmax is not None else self_tmax 

2139 

2140 if codes is not None: 

2141 operator = self.get_operator(codes) 

2142 if operator is not None: 

2143 return operator.get_waveforms( 

2144 self, codes, 

2145 tmin=tmin, tmax=tmax, 

2146 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2147 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2148 include_last=include_last, load_data=load_data, 

2149 accessor_id=accessor_id, params=operator_params) 

2150 

2151 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2152 

2153 if load_data: 

2154 traces = [ 

2155 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2156 

2157 else: 

2158 traces = [ 

2159 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2160 

2161 if uncut: 

2162 return traces 

2163 

2164 if snap is None: 

2165 snap = (round, round) 

2166 

2167 chopped = [] 

2168 for tr in traces: 

2169 if not load_data and tr.ydata is not None: 

2170 tr = tr.copy(data=False) 

2171 tr.ydata = None 

2172 

2173 try: 

2174 chopped.append(tr.chop( 

2175 tmin, tmax, 

2176 inplace=False, 

2177 snap=snap, 

2178 include_last=include_last)) 

2179 

2180 except trace.NoData: 

2181 pass 

2182 

2183 processed = self._process_chopped( 

2184 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2185 

2186 return processed 

2187 

2188 @filldocs 

2189 def chopper_waveforms( 

2190 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2191 tinc=None, tpad=0., 

2192 want_incomplete=True, snap_window=False, 

2193 degap=True, maxgap=5, maxlap=None, 

2194 snap=None, include_last=False, load_data=True, 

2195 accessor_id=None, clear_accessor=True, operator_params=None): 

2196 

2197 ''' 

2198 Iterate window-wise over waveform archive. 

2199 

2200 %(query_args)s 

2201 

2202 :param tinc: 

2203 Time increment (window shift time) (default uses ``tmax-tmin``). 

2204 :type tinc: 

2205 timestamp 

2206 

2207 :param tpad: 

2208 Padding time appended on either side of the data window (window 

2209 overlap is ``2*tpad``). 

2210 :type tpad: 

2211 timestamp 

2212 

2213 :param want_incomplete: 

2214 If ``True``, gappy/incomplete traces are included in the result. 

2215 :type want_incomplete: 

2216 bool 

2217 

2218 :param snap_window: 

2219 If ``True``, start time windows at multiples of tinc with respect 

2220 to system time zero. 

2221 :type snap_window: 

2222 bool 

2223 

2224 :param degap: 

2225 If ``True``, connect traces and remove gaps and overlaps. 

2226 :type degap: 

2227 bool 

2228 

2229 :param maxgap: 

2230 Maximum gap size in samples which is filled with interpolated 

2231 samples when ``degap`` is ``True``. 

2232 :type maxgap: 

2233 int 

2234 

2235 :param maxlap: 

2236 Maximum overlap size in samples which is removed when ``degap`` is 

2237 ``True``. 

2238 :type maxlap: 

2239 int 

2240 

2241 :param snap: 

2242 Rounding functions used when computing sample index from time 

2243 instance, for trace start and trace end, respectively. By default, 

2244 ``(round, round)`` is used. 

2245 :type snap: 

2246 tuple of 2 callables 

2247 

2248 :param include_last: 

2249 If ``True``, add one more sample to the returned traces (the sample 

2250 which would be the first sample of a query with ``tmin`` set to the 

2251 current value of ``tmax``). 

2252 :type include_last: 

2253 bool 

2254 

2255 :param load_data: 

2256 If ``True``, waveform data samples are read from files (or cache). 

2257 If ``False``, meta-information-only traces are returned (dummy 

2258 traces with no data samples). 

2259 :type load_data: 

2260 bool 

2261 

2262 :param accessor_id: 

2263 Name of consumer on who's behalf data is accessed. Used in cache 

2264 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2265 to distinguish different points of extraction for the decision of 

2266 when to release cached waveform data. Should be used when data is 

2267 alternately extracted from more than one region / selection. 

2268 :type accessor_id: 

2269 str 

2270 

2271 :param clear_accessor: 

2272 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2273 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2274 memory when the generator returns. 

2275 :type clear_accessor: 

2276 bool 

2277 

2278 :yields: 

2279 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2280 extracted time window. 

2281 

2282 See :py:meth:`iter_nuts` for details on time span matching. 

2283 ''' 

2284 

2285 tmin, tmax, codes = self._get_selection_args( 

2286 obj, tmin, tmax, time, codes) 

2287 

2288 self_tmin, self_tmax = self.get_time_span( 

2289 ['waveform', 'waveform_promise']) 

2290 

2291 if None in (self_tmin, self_tmax): 

2292 logger.warning( 

2293 'Content has undefined time span. No waveforms and no ' 

2294 'waveform promises?') 

2295 return 

2296 

2297 if snap_window and tinc is not None: 

2298 tmin = tmin if tmin is not None else self_tmin 

2299 tmax = tmax if tmax is not None else self_tmax 

2300 tmin = math.floor(tmin / tinc) * tinc 

2301 tmax = math.ceil(tmax / tinc) * tinc 

2302 else: 

2303 tmin = tmin if tmin is not None else self_tmin + tpad 

2304 tmax = tmax if tmax is not None else self_tmax - tpad 

2305 

2306 tinc = tinc if tinc is not None else tmax - tmin 

2307 

2308 try: 

2309 if accessor_id is None: 

2310 accessor_id = 'chopper%i' % self._n_choppers_active 

2311 

2312 self._n_choppers_active += 1 

2313 

2314 eps = tinc * 1e-6 

2315 if tinc != 0.0: 

2316 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2317 else: 

2318 nwin = 1 

2319 

2320 for iwin in range(nwin): 

2321 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2322 chopped = [] 

2323 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2324 eps = tinc*1e-6 

2325 if wmin >= tmax-eps: 

2326 break 

2327 

2328 chopped = self.get_waveforms( 

2329 tmin=wmin-tpad, 

2330 tmax=wmax+tpad, 

2331 codes=codes, 

2332 snap=snap, 

2333 include_last=include_last, 

2334 load_data=load_data, 

2335 want_incomplete=want_incomplete, 

2336 degap=degap, 

2337 maxgap=maxgap, 

2338 maxlap=maxlap, 

2339 accessor_id=accessor_id, 

2340 operator_params=operator_params) 

2341 

2342 self.advance_accessor(accessor_id) 

2343 

2344 yield Batch( 

2345 tmin=wmin, 

2346 tmax=wmax, 

2347 i=iwin, 

2348 n=nwin, 

2349 traces=chopped) 

2350 

2351 iwin += 1 

2352 

2353 finally: 

2354 self._n_choppers_active -= 1 

2355 if clear_accessor: 

2356 self.clear_accessor(accessor_id, 'waveform') 

2357 

2358 def _process_chopped( 

2359 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2360 

2361 chopped.sort(key=lambda a: a.full_id) 

2362 if degap: 

2363 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2364 

2365 if not want_incomplete: 

2366 chopped_weeded = [] 

2367 for tr in chopped: 

2368 emin = tr.tmin - tmin 

2369 emax = tr.tmax + tr.deltat - tmax 

2370 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2371 chopped_weeded.append(tr) 

2372 

2373 elif degap: 

2374 if (0. < emin <= 5. * tr.deltat 

2375 and -5. * tr.deltat <= emax < 0.): 

2376 

2377 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2378 chopped_weeded.append(tr) 

2379 

2380 chopped = chopped_weeded 

2381 

2382 return chopped 

2383 

2384 def _get_pyrocko_stations( 

2385 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2386 

2387 from pyrocko import model as pmodel 

2388 

2389 by_nsl = defaultdict(lambda: (list(), list())) 

2390 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2391 sargs = station._get_pyrocko_station_args() 

2392 nsl = sargs[1:4] 

2393 by_nsl[nsl][0].append(sargs) 

2394 

2395 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2396 sargs = channel._get_pyrocko_station_args() 

2397 nsl = sargs[1:4] 

2398 sargs_list, channels_list = by_nsl[nsl] 

2399 sargs_list.append(sargs) 

2400 channels_list.append(channel) 

2401 

2402 pstations = [] 

2403 nsls = list(by_nsl.keys()) 

2404 nsls.sort() 

2405 for nsl in nsls: 

2406 sargs_list, channels_list = by_nsl[nsl] 

2407 sargs = util.consistency_merge(sargs_list) 

2408 

2409 by_c = defaultdict(list) 

2410 for ch in channels_list: 

2411 by_c[ch.channel].append(ch._get_pyrocko_channel_args()) 

2412 

2413 chas = list(by_c.keys()) 

2414 chas.sort() 

2415 pchannels = [] 

2416 for cha in chas: 

2417 list_of_cargs = by_c[cha] 

2418 cargs = util.consistency_merge(list_of_cargs) 

2419 pchannels.append(pmodel.Channel( 

2420 name=cargs[0], 

2421 azimuth=cargs[1], 

2422 dip=cargs[2])) 

2423 

2424 pstations.append(pmodel.Station( 

2425 network=sargs[0], 

2426 station=sargs[1], 

2427 location=sargs[2], 

2428 lat=sargs[3], 

2429 lon=sargs[4], 

2430 elevation=sargs[5], 

2431 depth=sargs[6] or 0.0, 

2432 channels=pchannels)) 

2433 

2434 return pstations 

2435 

2436 @property 

2437 def pile(self): 

2438 

2439 ''' 

2440 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2441 

2442 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2443 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2444 but uses the fluffy power of the Squirrel under the hood. 

2445 

2446 This interface can be used as a drop-in replacement for piles which are 

2447 used in existing scripts and programs for efficient waveform data 

2448 access. The Squirrel-based pile scales better for large datasets. Newer 

2449 scripts should use Squirrel's native methods to avoid the emulation 

2450 overhead. 

2451 ''' 

2452 from . import pile 

2453 

2454 if self._pile is None: 

2455 self._pile = pile.Pile(self) 

2456 

2457 return self._pile 

2458 

2459 def snuffle(self): 

2460 ''' 

2461 Look at dataset in Snuffler. 

2462 ''' 

2463 self.pile.snuffle() 

2464 

2465 def _gather_codes_keys(self, kind, gather, selector): 

2466 return set( 

2467 gather(codes) 

2468 for codes in self.iter_codes(kind) 

2469 if selector is None or selector(codes)) 

2470 

2471 def __str__(self): 

2472 return str(self.get_stats()) 

2473 

2474 def get_coverage( 

2475 self, kind, tmin=None, tmax=None, codes_list=None, limit=None, 

2476 return_raw=True): 

2477 

2478 ''' 

2479 Get coverage information. 

2480 

2481 Get information about strips of gapless data coverage. 

2482 

2483 :param kind: 

2484 Content kind to be queried. 

2485 :type kind: 

2486 str 

2487 

2488 :param tmin: 

2489 Start time of query interval. 

2490 :type tmin: 

2491 timestamp 

2492 

2493 :param tmax: 

2494 End time of query interval. 

2495 :type tmax: 

2496 timestamp 

2497 

2498 :param codes_list: 

2499 List of code patterns to query. If not given or empty, an empty 

2500 list is returned. 

2501 :type codes_list: 

2502 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

2503 

2504 :param limit: 

2505 Limit query to return only up to a given maximum number of entries 

2506 per matching channel (without setting this option, very gappy data 

2507 could cause the query to execute for a very long time). 

2508 :type limit: 

2509 int 

2510 

2511 :returns: 

2512 List of entries of the form ``(pattern, codes, deltat, tmin, tmax, 

2513 data)`` where ``pattern`` is the request code pattern which 

2514 yielded this entry, ``codes`` are the matching channel codes, 

2515 ``tmin`` and ``tmax`` are the global min and max times for which 

2516 data for this channel is available, regardless of any time 

2517 restrictions in the query. ``data`` is a list with (up to 

2518 ``limit``) change-points of the form ``(time, count)`` where a 

2519 ``count`` of zero indicates a data gap, a value of 1 normal data 

2520 coverage and higher values indicate duplicate/redundant data. 

2521 ''' 

2522 

2523 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2524 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2525 kind_id = to_kind_id(kind) 

2526 

2527 if codes_list is None: 

2528 codes_list = self.get_codes(kind=kind) 

2529 

2530 kdata_all = [] 

2531 for pattern in codes_list: 

2532 kdata = self.glob_codes(kind, [pattern]) 

2533 for row in kdata: 

2534 row[0:0] = [pattern] 

2535 

2536 kdata_all.extend(kdata) 

2537 

2538 kind_codes_ids = [x[1] for x in kdata_all] 

2539 

2540 counts_at_tmin = {} 

2541 if tmin is not None: 

2542 for nut in self.iter_nuts( 

2543 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2544 

2545 k = nut.codes, nut.deltat 

2546 if k not in counts_at_tmin: 

2547 counts_at_tmin[k] = 0 

2548 

2549 counts_at_tmin[k] += 1 

2550 

2551 coverage = [] 

2552 for pattern, kind_codes_id, codes, deltat in kdata_all: 

2553 entry = [pattern, codes, deltat, None, None, []] 

2554 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2555 sql = self._sql(''' 

2556 SELECT 

2557 time_seconds, 

2558 time_offset 

2559 FROM %(db)s.%(coverage)s 

2560 WHERE 

2561 kind_codes_id == ? 

2562 ORDER BY 

2563 kind_codes_id ''' + order + ''', 

2564 time_seconds ''' + order + ''', 

2565 time_offset ''' + order + ''' 

2566 LIMIT 1 

2567 ''') 

2568 

2569 for row in self._conn.execute(sql, [kind_codes_id]): 

2570 entry[3+i] = model.tjoin(row[0], row[1]) 

2571 

2572 if None in entry[3:5]: 

2573 continue 

2574 

2575 args = [kind_codes_id] 

2576 

2577 sql_time = '' 

2578 if tmin is not None: 

2579 # intentionally < because (== tmin) is queried from nuts 

2580 sql_time += ' AND ( ? < time_seconds ' \ 

2581 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2582 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2583 

2584 if tmax is not None: 

2585 sql_time += ' AND ( time_seconds < ? ' \ 

2586 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2587 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2588 

2589 sql_limit = '' 

2590 if limit is not None: 

2591 sql_limit = ' LIMIT ?' 

2592 args.append(limit) 

2593 

2594 sql = self._sql(''' 

2595 SELECT 

2596 time_seconds, 

2597 time_offset, 

2598 step 

2599 FROM %(db)s.%(coverage)s 

2600 WHERE 

2601 kind_codes_id == ? 

2602 ''' + sql_time + ''' 

2603 ORDER BY 

2604 kind_codes_id, 

2605 time_seconds, 

2606 time_offset 

2607 ''' + sql_limit) 

2608 

2609 rows = list(self._conn.execute(sql, args)) 

2610 

2611 if limit is not None and len(rows) == limit: 

2612 entry[-1] = None 

2613 else: 

2614 counts = counts_at_tmin.get((codes, deltat), 0) 

2615 tlast = None 

2616 if tmin is not None: 

2617 entry[-1].append((tmin, counts)) 

2618 tlast = tmin 

2619 

2620 for row in rows: 

2621 t = model.tjoin(row[0], row[1]) 

2622 counts += row[2] 

2623 entry[-1].append((t, counts)) 

2624 tlast = t 

2625 

2626 if tmax is not None and (tlast is None or tlast != tmax): 

2627 entry[-1].append((tmax, counts)) 

2628 

2629 coverage.append(entry) 

2630 

2631 if return_raw: 

2632 return coverage 

2633 else: 

2634 return [model.Coverage.from_values( 

2635 entry + [kind_id]) for entry in coverage] 

2636 

2637 def add_operator(self, op): 

2638 self._operators.append(op) 

2639 

2640 def update_operator_mappings(self): 

2641 available = [ 

2642 separator.join(codes) 

2643 for codes in self.get_codes(kind=('channel'))] 

2644 

2645 for operator in self._operators: 

2646 operator.update_mappings(available, self._operator_registry) 

2647 

2648 def iter_operator_mappings(self): 

2649 for operator in self._operators: 

2650 for in_codes, out_codes in operator.iter_mappings(): 

2651 yield operator, in_codes, out_codes 

2652 

2653 def get_operator_mappings(self): 

2654 return list(self.iter_operator_mappings()) 

2655 

2656 def get_operator(self, codes): 

2657 if isinstance(codes, tuple): 

2658 codes = separator.join(codes) 

2659 try: 

2660 return self._operator_registry[codes][0] 

2661 except KeyError: 

2662 return None 

2663 

2664 def get_operator_group(self, codes): 

2665 if isinstance(codes, tuple): 

2666 codes = separator.join(codes) 

2667 try: 

2668 return self._operator_registry[codes] 

2669 except KeyError: 

2670 return None, (None, None, None) 

2671 

2672 def iter_operator_codes(self): 

2673 for _, _, out_codes in self.iter_operator_mappings(): 

2674 for codes in out_codes: 

2675 yield tuple(codes.split(separator)) 

2676 

2677 def get_operator_codes(self): 

2678 return list(self.iter_operator_codes()) 

2679 

2680 def print_tables(self, table_names=None, stream=None): 

2681 ''' 

2682 Dump raw database tables in textual form (for debugging purposes). 

2683 

2684 :param table_names: 

2685 Names of tables to be dumped or ``None`` to dump all. 

2686 :type table_names: 

2687 :py:class:`list` of :py:class:`str` 

2688 

2689 :param stream: 

2690 Open file or ``None`` to dump to standard output. 

2691 ''' 

2692 

2693 if stream is None: 

2694 stream = sys.stdout 

2695 

2696 if isinstance(table_names, str): 

2697 table_names = [table_names] 

2698 

2699 if table_names is None: 

2700 table_names = [ 

2701 'selection_file_states', 

2702 'selection_nuts', 

2703 'selection_kind_codes_count', 

2704 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2705 

2706 m = { 

2707 'selection_file_states': '%(db)s.%(file_states)s', 

2708 'selection_nuts': '%(db)s.%(nuts)s', 

2709 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2710 'files': 'files', 

2711 'nuts': 'nuts', 

2712 'kind_codes': 'kind_codes', 

2713 'kind_codes_count': 'kind_codes_count'} 

2714 

2715 for table_name in table_names: 

2716 self._database.print_table( 

2717 m[table_name] % self._names, stream=stream) 

2718 

2719 

2720class SquirrelStats(Object): 

2721 ''' 

2722 Container to hold statistics about contents available from a Squirrel. 

2723 

2724 See also :py:meth:`Squirrel.get_stats`. 

2725 ''' 

2726 

2727 nfiles = Int.T( 

2728 help='Number of files in selection.') 

2729 nnuts = Int.T( 

2730 help='Number of index nuts in selection.') 

2731 codes = List.T( 

2732 Tuple.T(content_t=String.T()), 

2733 help='Available code sequences in selection, e.g. ' 

2734 '(agency, network, station, location) for stations nuts.') 

2735 kinds = List.T( 

2736 String.T(), 

2737 help='Available content types in selection.') 

2738 total_size = Int.T( 

2739 help='Aggregated file size of files is selection.') 

2740 counts = Dict.T( 

2741 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2742 help='Breakdown of how many nuts of any content type and code ' 

2743 'sequence are available in selection, ``counts[kind][codes]``.') 

2744 time_spans = Dict.T( 

2745 String.T(), Tuple.T(content_t=Timestamp.T()), 

2746 help='Time spans by content type.') 

2747 sources = List.T( 

2748 String.T(), 

2749 help='Descriptions of attached sources.') 

2750 operators = List.T( 

2751 String.T(), 

2752 help='Descriptions of attached operators.') 

2753 

2754 def __str__(self): 

2755 kind_counts = dict( 

2756 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2757 

2758 scodes = model.codes_to_str_abbreviated(self.codes) 

2759 

2760 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2761 ' ' + s for s in self.sources) 

2762 

2763 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2764 ' ' + s for s in self.operators) 

2765 

2766 def stime(t): 

2767 return util.tts(t) if t is not None and t not in ( 

2768 model.g_tmin, model.g_tmax) else '<none>' 

2769 

2770 def stable(rows): 

2771 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2772 return '\n'.join( 

2773 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2774 for row in rows) 

2775 

2776 def indent(s): 

2777 return '\n'.join(' '+line for line in s.splitlines()) 

2778 

2779 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2780 kind + ':', 

2781 str(kind_counts[kind]), 

2782 stime(self.time_spans[kind][0]), 

2783 '-', 

2784 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2785 

2786 s = ''' 

2787Number of files: %i 

2788Total size of known files: %s 

2789Number of index nuts: %i 

2790Available content kinds: %s 

2791Available codes: %s 

2792Sources: %s 

2793Operators: %s''' % ( 

2794 self.nfiles, 

2795 util.human_bytesize(self.total_size), 

2796 self.nnuts, 

2797 stspans, scodes, ssources, soperators) 

2798 

2799 return s.lstrip() 

2800 

2801 

2802__all__ = [ 

2803 'Squirrel', 

2804 'SquirrelStats', 

2805]