1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from . import client, environment, error 

29 

30logger = logging.getLogger('psq.base') 

31 

32guts_prefix = 'squirrel' 

33 

34 

35def make_task(*args): 

36 return progress.task(*args, logger=logger) 

37 

38 

39def lpick(condition, seq): 

40 ft = [], [] 

41 for ele in seq: 

42 ft[int(bool(condition(ele)))].append(ele) 

43 

44 return ft 

45 

46 

47def codes_patterns_for_kind(kind_id, codes): 

48 if isinstance(codes, list): 

49 lcodes = [] 

50 for sc in codes: 

51 lcodes.extend(codes_patterns_for_kind(kind_id, sc)) 

52 

53 return lcodes 

54 

55 codes = to_codes(kind_id, codes) 

56 

57 if kind_id == model.STATION: 

58 return [codes, codes.replace(location='[*]')] 

59 else: 

60 return [codes] 

61 

62 

63def blocks(tmin, tmax, deltat, nsamples_block=100000): 

64 tblock = util.to_time_float(deltat * nsamples_block) 

65 iblock_min = int(math.floor(tmin / tblock)) 

66 iblock_max = int(math.ceil(tmax / tblock)) 

67 for iblock in range(iblock_min, iblock_max): 

68 yield iblock * tblock, (iblock+1) * tblock 

69 

70 

71def gaps(avail, tmin, tmax): 

72 assert tmin < tmax 

73 

74 data = [(tmax, 1), (tmin, -1)] 

75 for (tmin_a, tmax_a) in avail: 

76 assert tmin_a < tmax_a 

77 data.append((tmin_a, 1)) 

78 data.append((tmax_a, -1)) 

79 

80 data.sort() 

81 s = 1 

82 gaps = [] 

83 tmin_g = None 

84 for t, x in data: 

85 if s == 1 and x == -1: 

86 tmin_g = t 

87 elif s == 0 and x == 1 and tmin_g is not None: 

88 tmax_g = t 

89 if tmin_g != tmax_g: 

90 gaps.append((tmin_g, tmax_g)) 

91 

92 s += x 

93 

94 return gaps 

95 

96 

97def order_key(order): 

98 return (order.codes, order.tmin, order.tmax) 

99 

100 

101class Batch(object): 

102 ''' 

103 Batch of waveforms from window-wise data extraction. 

104 

105 Encapsulates state and results yielded for each window in window-wise 

106 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

107 

108 *Attributes:* 

109 

110 .. py:attribute:: tmin 

111 

112 Start of this time window. 

113 

114 .. py:attribute:: tmax 

115 

116 End of this time window. 

117 

118 .. py:attribute:: i 

119 

120 Index of this time window in sequence. 

121 

122 .. py:attribute:: n 

123 

124 Total number of time windows in sequence. 

125 

126 .. py:attribute:: traces 

127 

128 Extracted waveforms for this time window. 

129 ''' 

130 

131 def __init__(self, tmin, tmax, i, n, traces): 

132 self.tmin = tmin 

133 self.tmax = tmax 

134 self.i = i 

135 self.n = n 

136 self.traces = traces 

137 

138 

139class Squirrel(Selection): 

140 ''' 

141 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

142 

143 :param env: 

144 Squirrel environment instance or directory path to use as starting 

145 point for its detection. By default, the current directory is used as 

146 starting point. When searching for a usable environment the directory 

147 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

148 directory is used if it exists, otherwise the parent directories are 

149 search upwards for the existence of such a directory. If no such 

150 directory is found, the user's global Squirrel environment 

151 ``'$HOME/.pyrocko/squirrel'`` is used. 

152 :type env: 

153 :py:class:`~pyrocko.squirrel.environment.Environment` or 

154 :py:class:`str` 

155 

156 :param database: 

157 Database instance or path to database. By default the 

158 database found in the detected Squirrel environment is used. 

159 :type database: 

160 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

161 

162 :param cache_path: 

163 Directory path to use for data caching. By default, the ``'cache'`` 

164 directory in the detected Squirrel environment is used. 

165 :type cache_path: 

166 :py:class:`str` 

167 

168 :param persistent: 

169 If given a name, create a persistent selection. 

170 :type persistent: 

171 :py:class:`str` 

172 

173 This is the central class of the Squirrel framework. It provides a unified 

174 interface to query and access seismic waveforms, station meta-data and 

175 event information from local file collections and remote data sources. For 

176 prompt responses, a profound database setup is used under the hood. To 

177 speed up assemblage of ad-hoc data selections, files are indexed on first 

178 use and the extracted meta-data is remembered in the database for 

179 subsequent accesses. Bulk data is lazily loaded from disk and remote 

180 sources, just when requested. Once loaded, data is cached in memory to 

181 expedite typical access patterns. Files and data sources can be dynamically 

182 added to and removed from the Squirrel selection at runtime. 

183 

184 Queries are restricted to the contents of the files currently added to the 

185 Squirrel selection (usually a subset of the file meta-information 

186 collection in the database). This list of files is referred to here as the 

187 "selection". By default, temporary tables are created in the attached 

188 database to hold the names of the files in the selection as well as various 

189 indices and counters. These tables are only visible inside the application 

190 which created them and are deleted when the database connection is closed 

191 or the application exits. To create a selection which is not deleted at 

192 exit, supply a name to the ``persistent`` argument of the Squirrel 

193 constructor. Persistent selections are shared among applications using the 

194 same database. 

195 

196 **Method summary** 

197 

198 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

199 :py:class:`~pyrocko.squirrel.selection.Selection`. 

200 

201 .. autosummary:: 

202 

203 ~Squirrel.add 

204 ~Squirrel.add_source 

205 ~Squirrel.add_fdsn 

206 ~Squirrel.add_catalog 

207 ~Squirrel.add_dataset 

208 ~Squirrel.add_virtual 

209 ~Squirrel.update 

210 ~Squirrel.update_waveform_promises 

211 ~Squirrel.advance_accessor 

212 ~Squirrel.clear_accessor 

213 ~Squirrel.reload 

214 ~pyrocko.squirrel.selection.Selection.iter_paths 

215 ~Squirrel.iter_nuts 

216 ~Squirrel.iter_kinds 

217 ~Squirrel.iter_deltats 

218 ~Squirrel.iter_codes 

219 ~pyrocko.squirrel.selection.Selection.get_paths 

220 ~Squirrel.get_nuts 

221 ~Squirrel.get_kinds 

222 ~Squirrel.get_deltats 

223 ~Squirrel.get_codes 

224 ~Squirrel.get_counts 

225 ~Squirrel.get_time_span 

226 ~Squirrel.get_deltat_span 

227 ~Squirrel.get_nfiles 

228 ~Squirrel.get_nnuts 

229 ~Squirrel.get_total_size 

230 ~Squirrel.get_stats 

231 ~Squirrel.get_content 

232 ~Squirrel.get_stations 

233 ~Squirrel.get_channels 

234 ~Squirrel.get_responses 

235 ~Squirrel.get_events 

236 ~Squirrel.get_waveform_nuts 

237 ~Squirrel.get_waveforms 

238 ~Squirrel.chopper_waveforms 

239 ~Squirrel.get_coverage 

240 ~Squirrel.pile 

241 ~Squirrel.snuffle 

242 ~Squirrel.glob_codes 

243 ~pyrocko.squirrel.selection.Selection.get_database 

244 ~Squirrel.print_tables 

245 ''' 

246 

247 def __init__( 

248 self, env=None, database=None, cache_path=None, persistent=None): 

249 

250 if not isinstance(env, environment.Environment): 

251 env = environment.get_environment(env) 

252 

253 if database is None: 

254 database = env.expand_path(env.database_path) 

255 

256 if cache_path is None: 

257 cache_path = env.expand_path(env.cache_path) 

258 

259 if persistent is None: 

260 persistent = env.persistent 

261 

262 Selection.__init__( 

263 self, database=database, persistent=persistent) 

264 

265 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

266 

267 self._content_caches = { 

268 'waveform': cache.ContentCache(), 

269 'default': cache.ContentCache()} 

270 

271 self._cache_path = cache_path 

272 

273 self._sources = [] 

274 self._operators = [] 

275 self._operator_registry = {} 

276 

277 self._pile = None 

278 self._n_choppers_active = 0 

279 

280 self._names.update({ 

281 'nuts': self.name + '_nuts', 

282 'kind_codes_count': self.name + '_kind_codes_count', 

283 'coverage': self.name + '_coverage'}) 

284 

285 with self.transaction('create tables') as cursor: 

286 self._create_tables_squirrel(cursor) 

287 

288 def _create_tables_squirrel(self, cursor): 

289 

290 cursor.execute(self._register_table(self._sql( 

291 ''' 

292 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

293 nut_id integer PRIMARY KEY, 

294 file_id integer, 

295 file_segment integer, 

296 file_element integer, 

297 kind_id integer, 

298 kind_codes_id integer, 

299 tmin_seconds integer, 

300 tmin_offset integer, 

301 tmax_seconds integer, 

302 tmax_offset integer, 

303 kscale integer) 

304 '''))) 

305 

306 cursor.execute(self._register_table(self._sql( 

307 ''' 

308 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

309 kind_codes_id integer PRIMARY KEY, 

310 count integer) 

311 '''))) 

312 

313 cursor.execute(self._sql( 

314 ''' 

315 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

316 ON %(nuts)s (file_id, file_segment, file_element) 

317 ''')) 

318 

319 cursor.execute(self._sql( 

320 ''' 

321 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

322 ON %(nuts)s (file_id) 

323 ''')) 

324 

325 cursor.execute(self._sql( 

326 ''' 

327 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

328 ON %(nuts)s (kind_id, tmin_seconds) 

329 ''')) 

330 

331 cursor.execute(self._sql( 

332 ''' 

333 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

334 ON %(nuts)s (kind_id, tmax_seconds) 

335 ''')) 

336 

337 cursor.execute(self._sql( 

338 ''' 

339 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

340 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

341 ''')) 

342 

343 cursor.execute(self._sql( 

344 ''' 

345 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

346 BEFORE DELETE ON main.files FOR EACH ROW 

347 BEGIN 

348 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

349 END 

350 ''')) 

351 

352 # trigger only on size to make silent update of mtime possible 

353 cursor.execute(self._sql( 

354 ''' 

355 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

356 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

357 BEGIN 

358 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

359 END 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE TRIGGER IF NOT EXISTS 

365 %(db)s.%(file_states)s_delete_files 

366 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

367 BEGIN 

368 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

369 END 

370 ''')) 

371 

372 cursor.execute(self._sql( 

373 ''' 

374 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

375 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

376 BEGIN 

377 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

378 (new.kind_codes_id, 0); 

379 UPDATE %(kind_codes_count)s 

380 SET count = count + 1 

381 WHERE new.kind_codes_id 

382 == %(kind_codes_count)s.kind_codes_id; 

383 END 

384 ''')) 

385 

386 cursor.execute(self._sql( 

387 ''' 

388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

389 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

390 BEGIN 

391 UPDATE %(kind_codes_count)s 

392 SET count = count - 1 

393 WHERE old.kind_codes_id 

394 == %(kind_codes_count)s.kind_codes_id; 

395 END 

396 ''')) 

397 

398 cursor.execute(self._register_table(self._sql( 

399 ''' 

400 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

401 kind_codes_id integer, 

402 time_seconds integer, 

403 time_offset integer, 

404 step integer) 

405 '''))) 

406 

407 cursor.execute(self._sql( 

408 ''' 

409 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

410 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

411 ''')) 

412 

413 cursor.execute(self._sql( 

414 ''' 

415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

416 AFTER INSERT ON %(nuts)s FOR EACH ROW 

417 BEGIN 

418 INSERT OR IGNORE INTO %(coverage)s VALUES 

419 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

420 ; 

421 UPDATE %(coverage)s 

422 SET step = step + 1 

423 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

424 AND new.tmin_seconds == %(coverage)s.time_seconds 

425 AND new.tmin_offset == %(coverage)s.time_offset 

426 ; 

427 INSERT OR IGNORE INTO %(coverage)s VALUES 

428 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

429 ; 

430 UPDATE %(coverage)s 

431 SET step = step - 1 

432 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

433 AND new.tmax_seconds == %(coverage)s.time_seconds 

434 AND new.tmax_offset == %(coverage)s.time_offset 

435 ; 

436 DELETE FROM %(coverage)s 

437 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

438 AND new.tmin_seconds == %(coverage)s.time_seconds 

439 AND new.tmin_offset == %(coverage)s.time_offset 

440 AND step == 0 

441 ; 

442 DELETE FROM %(coverage)s 

443 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

444 AND new.tmax_seconds == %(coverage)s.time_seconds 

445 AND new.tmax_offset == %(coverage)s.time_offset 

446 AND step == 0 

447 ; 

448 END 

449 ''')) 

450 

451 cursor.execute(self._sql( 

452 ''' 

453 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

454 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

455 BEGIN 

456 INSERT OR IGNORE INTO %(coverage)s VALUES 

457 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

458 ; 

459 UPDATE %(coverage)s 

460 SET step = step - 1 

461 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

462 AND old.tmin_seconds == %(coverage)s.time_seconds 

463 AND old.tmin_offset == %(coverage)s.time_offset 

464 ; 

465 INSERT OR IGNORE INTO %(coverage)s VALUES 

466 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

467 ; 

468 UPDATE %(coverage)s 

469 SET step = step + 1 

470 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

471 AND old.tmax_seconds == %(coverage)s.time_seconds 

472 AND old.tmax_offset == %(coverage)s.time_offset 

473 ; 

474 DELETE FROM %(coverage)s 

475 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

476 AND old.tmin_seconds == %(coverage)s.time_seconds 

477 AND old.tmin_offset == %(coverage)s.time_offset 

478 AND step == 0 

479 ; 

480 DELETE FROM %(coverage)s 

481 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

482 AND old.tmax_seconds == %(coverage)s.time_seconds 

483 AND old.tmax_offset == %(coverage)s.time_offset 

484 AND step == 0 

485 ; 

486 END 

487 ''')) 

488 

489 def _delete(self): 

490 '''Delete database tables associated with this Squirrel.''' 

491 

492 with self.transaction('delete tables') as cursor: 

493 for s in ''' 

494 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

495 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

496 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

497 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

498 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

499 DROP TABLE %(db)s.%(nuts)s; 

500 DROP TABLE %(db)s.%(kind_codes_count)s; 

501 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

502 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

503 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

504 '''.strip().splitlines(): 

505 

506 cursor.execute(self._sql(s)) 

507 

508 Selection._delete(self) 

509 

510 @filldocs 

511 def add(self, 

512 paths, 

513 kinds=None, 

514 format='detect', 

515 include=None, 

516 exclude=None, 

517 check=True): 

518 

519 ''' 

520 Add files to the selection. 

521 

522 :param paths: 

523 Iterator yielding paths to files or directories to be added to the 

524 selection. Recurses into directories. If given a ``str``, it 

525 is treated as a single path to be added. 

526 :type paths: 

527 :py:class:`list` of :py:class:`str` 

528 

529 :param kinds: 

530 Content types to be made available through the Squirrel selection. 

531 By default, all known content types are accepted. 

532 :type kinds: 

533 :py:class:`list` of :py:class:`str` 

534 

535 :param format: 

536 File format identifier or ``'detect'`` to enable auto-detection 

537 (available: %(file_formats)s). 

538 :type format: 

539 str 

540 

541 :param include: 

542 If not ``None``, files are only included if their paths match the 

543 given regular expression pattern. 

544 :type format: 

545 str 

546 

547 :param exclude: 

548 If not ``None``, files are only included if their paths do not 

549 match the given regular expression pattern. 

550 :type format: 

551 str 

552 

553 :param check: 

554 If ``True``, all file modification times are checked to see if 

555 cached information has to be updated (slow). If ``False``, only 

556 previously unknown files are indexed and cached information is used 

557 for known files, regardless of file state (fast, corrresponds to 

558 Squirrel's ``--optimistic`` mode). File deletions will go 

559 undetected in the latter case. 

560 :type check: 

561 bool 

562 

563 :Complexity: 

564 O(log N) 

565 ''' 

566 

567 if isinstance(kinds, str): 

568 kinds = (kinds,) 

569 

570 if isinstance(paths, str): 

571 paths = [paths] 

572 

573 kind_mask = model.to_kind_mask(kinds) 

574 

575 with progress.view(): 

576 Selection.add( 

577 self, util.iter_select_files( 

578 paths, 

579 show_progress=False, 

580 include=include, 

581 exclude=exclude, 

582 pass_through=lambda path: path.startswith('virtual:') 

583 ), kind_mask, format) 

584 

585 self._load(check) 

586 self._update_nuts() 

587 

588 def reload(self): 

589 ''' 

590 Check for modifications and reindex modified files. 

591 

592 Based on file modification times. 

593 ''' 

594 

595 self._set_file_states_force_check() 

596 self._load(check=True) 

597 self._update_nuts() 

598 

599 def add_virtual(self, nuts, virtual_paths=None): 

600 ''' 

601 Add content which is not backed by files. 

602 

603 :param nuts: 

604 Content pieces to be added. 

605 :type nuts: 

606 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

607 

608 :param virtual_paths: 

609 List of virtual paths to prevent creating a temporary list of the 

610 nuts while aggregating the file paths for the selection. 

611 :type virtual_paths: 

612 :py:class:`list` of :py:class:`str` 

613 

614 Stores to the main database and the selection. 

615 ''' 

616 

617 if isinstance(virtual_paths, str): 

618 virtual_paths = [virtual_paths] 

619 

620 if virtual_paths is None: 

621 if not isinstance(nuts, list): 

622 nuts = list(nuts) 

623 virtual_paths = set(nut.file_path for nut in nuts) 

624 

625 Selection.add(self, virtual_paths) 

626 self.get_database().dig(nuts) 

627 self._update_nuts() 

628 

629 def add_volatile(self, nuts): 

630 if not isinstance(nuts, list): 

631 nuts = list(nuts) 

632 

633 paths = list(set(nut.file_path for nut in nuts)) 

634 io.backends.virtual.add_nuts(nuts) 

635 self.add_virtual(nuts, paths) 

636 self._volatile_paths.extend(paths) 

637 

638 def add_volatile_waveforms(self, traces): 

639 ''' 

640 Add in-memory waveforms which will be removed when the app closes. 

641 ''' 

642 

643 name = model.random_name() 

644 

645 path = 'virtual:volatile:%s' % name 

646 

647 nuts = [] 

648 for itr, tr in enumerate(traces): 

649 assert tr.tmin <= tr.tmax 

650 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

651 tmax_seconds, tmax_offset = model.tsplit( 

652 tr.tmin + tr.data_len()*tr.deltat) 

653 

654 nuts.append(model.Nut( 

655 file_path=path, 

656 file_format='virtual', 

657 file_segment=itr, 

658 file_element=0, 

659 file_mtime=0, 

660 codes=tr.codes, 

661 tmin_seconds=tmin_seconds, 

662 tmin_offset=tmin_offset, 

663 tmax_seconds=tmax_seconds, 

664 tmax_offset=tmax_offset, 

665 deltat=tr.deltat, 

666 kind_id=to_kind_id('waveform'), 

667 content=tr)) 

668 

669 self.add_volatile(nuts) 

670 return path 

671 

672 def _load(self, check): 

673 for _ in io.iload( 

674 self, 

675 content=[], 

676 skip_unchanged=True, 

677 check=check): 

678 pass 

679 

680 def _update_nuts(self, transaction=None): 

681 transaction = transaction or self.transaction('update nuts') 

682 with make_task('Aggregating selection') as task, \ 

683 transaction as cursor: 

684 

685 self._conn.set_progress_handler(task.update, 100000) 

686 nrows = cursor.execute(self._sql( 

687 ''' 

688 INSERT INTO %(db)s.%(nuts)s 

689 SELECT NULL, 

690 nuts.file_id, nuts.file_segment, nuts.file_element, 

691 nuts.kind_id, nuts.kind_codes_id, 

692 nuts.tmin_seconds, nuts.tmin_offset, 

693 nuts.tmax_seconds, nuts.tmax_offset, 

694 nuts.kscale 

695 FROM %(db)s.%(file_states)s 

696 INNER JOIN nuts 

697 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

698 INNER JOIN kind_codes 

699 ON nuts.kind_codes_id == 

700 kind_codes.kind_codes_id 

701 WHERE %(db)s.%(file_states)s.file_state != 2 

702 AND (((1 << kind_codes.kind_id) 

703 & %(db)s.%(file_states)s.kind_mask) != 0) 

704 ''')).rowcount 

705 

706 task.update(nrows) 

707 self._set_file_states_known(transaction) 

708 self._conn.set_progress_handler(None, 0) 

709 

710 def add_source(self, source, check=True): 

711 ''' 

712 Add remote resource. 

713 

714 :param source: 

715 Remote data access client instance. 

716 :type source: 

717 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

718 ''' 

719 

720 self._sources.append(source) 

721 source.setup(self, check=check) 

722 

723 def add_fdsn(self, *args, **kwargs): 

724 ''' 

725 Add FDSN site for transparent remote data access. 

726 

727 Arguments are passed to 

728 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

729 ''' 

730 

731 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

732 

733 def add_catalog(self, *args, **kwargs): 

734 ''' 

735 Add online catalog for transparent event data access. 

736 

737 Arguments are passed to 

738 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

739 ''' 

740 

741 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

742 

743 def add_dataset(self, ds, check=True, warn_persistent=True): 

744 ''' 

745 Read dataset description from file and add its contents. 

746 

747 :param ds: 

748 Path to dataset description file or dataset description object 

749 . See :py:mod:`~pyrocko.squirrel.dataset`. 

750 :type ds: 

751 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

752 

753 :param check: 

754 If ``True``, all file modification times are checked to see if 

755 cached information has to be updated (slow). If ``False``, only 

756 previously unknown files are indexed and cached information is used 

757 for known files, regardless of file state (fast, corrresponds to 

758 Squirrel's ``--optimistic`` mode). File deletions will go 

759 undetected in the latter case. 

760 :type check: 

761 bool 

762 ''' 

763 if isinstance(ds, str): 

764 ds = dataset.read_dataset(ds) 

765 path = ds 

766 else: 

767 path = None 

768 

769 if warn_persistent and ds.persistent and ( 

770 not self._persistent or (self._persistent != ds.persistent)): 

771 

772 logger.warning( 

773 'Dataset `persistent` flag ignored. Can not be set on already ' 

774 'existing Squirrel instance.%s' % ( 

775 ' Dataset: %s' % path if path else '')) 

776 

777 ds.setup(self, check=check) 

778 

779 def _get_selection_args( 

780 self, kind_id, 

781 obj=None, tmin=None, tmax=None, time=None, codes=None): 

782 

783 if codes is not None: 

784 codes = to_codes(kind_id, codes) 

785 

786 if time is not None: 

787 tmin = time 

788 tmax = time 

789 

790 if obj is not None: 

791 tmin = tmin if tmin is not None else obj.tmin 

792 tmax = tmax if tmax is not None else obj.tmax 

793 codes = codes if codes is not None else obj.codes 

794 

795 return tmin, tmax, codes 

796 

797 def _get_selection_args_str(self, *args, **kwargs): 

798 

799 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

800 return 'tmin: %s, tmax: %s, codes: %s' % ( 

801 util.time_to_str(tmin) if tmin is not None else 'none', 

802 util.time_to_str(tmax) if tmin is not None else 'none', 

803 str(codes)) 

804 

805 def _selection_args_to_kwargs( 

806 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

807 

808 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

809 

810 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

811 

812 tmin_seconds, tmin_offset = model.tsplit(tmin) 

813 tmax_seconds, tmax_offset = model.tsplit(tmax) 

814 if naiv: 

815 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

816 args.append(tmax_seconds) 

817 else: 

818 tscale_edges = model.tscale_edges 

819 tmin_cond = [] 

820 for kscale in range(tscale_edges.size + 1): 

821 if kscale != tscale_edges.size: 

822 tscale = int(tscale_edges[kscale]) 

823 tmin_cond.append(''' 

824 (%(db)s.%(nuts)s.kind_id = ? 

825 AND %(db)s.%(nuts)s.kscale == ? 

826 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

827 ''') 

828 args.extend( 

829 (to_kind_id(kind), kscale, 

830 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

831 

832 else: 

833 tmin_cond.append(''' 

834 (%(db)s.%(nuts)s.kind_id == ? 

835 AND %(db)s.%(nuts)s.kscale == ? 

836 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

837 ''') 

838 

839 args.extend( 

840 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

841 if tmin_cond: 

842 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

843 

844 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

845 args.append(tmin_seconds) 

846 

847 def iter_nuts( 

848 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

849 kind_codes_ids=None, path=None): 

850 

851 ''' 

852 Iterate over content entities matching given constraints. 

853 

854 :param kind: 

855 Content kind (or kinds) to extract. 

856 :type kind: 

857 :py:class:`str`, :py:class:`list` of :py:class:`str` 

858 

859 :param tmin: 

860 Start time of query interval. 

861 :type tmin: 

862 timestamp 

863 

864 :param tmax: 

865 End time of query interval. 

866 :type tmax: 

867 timestamp 

868 

869 :param codes: 

870 Pattern of content codes to query. 

871 :type codes: 

872 :py:class:`tuple` of :py:class:`str` 

873 

874 :param naiv: 

875 Bypass time span lookup through indices (slow, for testing). 

876 :type naiv: 

877 :py:class:`bool` 

878 

879 :param kind_codes_ids: 

880 Kind-codes IDs of contents to be retrieved (internal use). 

881 :type kind_codes_ids: 

882 :py:class:`list` of :py:class:`int` 

883 

884 :yields: 

885 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

886 intersecting content. 

887 

888 :complexity: 

889 O(log N) for the time selection part due to heavy use of database 

890 indices. 

891 

892 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

893 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

894 closed-interval so that content intersecting with the time instant ``t 

895 = tmin = tmax`` is returned (otherwise nothing would be returned as 

896 ``[t, t)`` never matches anything). 

897 

898 Time spans of content entities to be matched are also treated as half 

899 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

900 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

901 modified to closed-interval when the content time span is an empty 

902 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

903 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

904 ''' 

905 

906 if not isinstance(kind, str): 

907 if kind is None: 

908 kind = model.g_content_kinds 

909 for kind_ in kind: 

910 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

911 yield nut 

912 

913 return 

914 

915 kind_id = to_kind_id(kind) 

916 

917 cond = [] 

918 args = [] 

919 if tmin is not None or tmax is not None: 

920 assert kind is not None 

921 if tmin is None: 

922 tmin = self.get_time_span()[0] 

923 if tmax is None: 

924 tmax = self.get_time_span()[1] + 1.0 

925 

926 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

927 

928 cond.append('kind_codes.kind_id == ?') 

929 args.append(kind_id) 

930 

931 if codes is not None: 

932 pats = codes_patterns_for_kind(kind_id, codes) 

933 if pats: 

934 cond.append( 

935 ' ( %s ) ' % ' OR '.join( 

936 ('kind_codes.codes GLOB ?',) * len(pats))) 

937 args.extend(pat.safe_str for pat in pats) 

938 

939 if kind_codes_ids is not None: 

940 cond.append( 

941 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

942 '?'*len(kind_codes_ids))) 

943 

944 args.extend(kind_codes_ids) 

945 

946 db = self.get_database() 

947 if path is not None: 

948 cond.append('files.path == ?') 

949 args.append(db.relpath(abspath(path))) 

950 

951 sql = (''' 

952 SELECT 

953 files.path, 

954 files.format, 

955 files.mtime, 

956 files.size, 

957 %(db)s.%(nuts)s.file_segment, 

958 %(db)s.%(nuts)s.file_element, 

959 kind_codes.kind_id, 

960 kind_codes.codes, 

961 %(db)s.%(nuts)s.tmin_seconds, 

962 %(db)s.%(nuts)s.tmin_offset, 

963 %(db)s.%(nuts)s.tmax_seconds, 

964 %(db)s.%(nuts)s.tmax_offset, 

965 kind_codes.deltat 

966 FROM files 

967 INNER JOIN %(db)s.%(nuts)s 

968 ON files.file_id == %(db)s.%(nuts)s.file_id 

969 INNER JOIN kind_codes 

970 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

971 ''') 

972 

973 if cond: 

974 sql += ''' WHERE ''' + ' AND '.join(cond) 

975 

976 sql = self._sql(sql) 

977 if tmin is None and tmax is None: 

978 for row in self._conn.execute(sql, args): 

979 row = (db.abspath(row[0]),) + row[1:] 

980 nut = model.Nut(values_nocheck=row) 

981 yield nut 

982 else: 

983 assert tmin is not None and tmax is not None 

984 if tmin == tmax: 

985 for row in self._conn.execute(sql, args): 

986 row = (db.abspath(row[0]),) + row[1:] 

987 nut = model.Nut(values_nocheck=row) 

988 if (nut.tmin <= tmin < nut.tmax) \ 

989 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

990 

991 yield nut 

992 else: 

993 for row in self._conn.execute(sql, args): 

994 row = (db.abspath(row[0]),) + row[1:] 

995 nut = model.Nut(values_nocheck=row) 

996 if (tmin < nut.tmax and nut.tmin < tmax) \ 

997 or (nut.tmin == nut.tmax 

998 and tmin <= nut.tmin < tmax): 

999 

1000 yield nut 

1001 

1002 def get_nuts(self, *args, **kwargs): 

1003 ''' 

1004 Get content entities matching given constraints. 

1005 

1006 Like :py:meth:`iter_nuts` but returns results as a list. 

1007 ''' 

1008 

1009 return list(self.iter_nuts(*args, **kwargs)) 

1010 

1011 def _split_nuts( 

1012 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1013 

1014 kind_id = to_kind_id(kind) 

1015 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1016 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1017 

1018 names_main_nuts = dict(self._names) 

1019 names_main_nuts.update(db='main', nuts='nuts') 

1020 

1021 db = self.get_database() 

1022 

1023 def main_nuts(s): 

1024 return s % names_main_nuts 

1025 

1026 with self.transaction('split nuts') as cursor: 

1027 # modify selection and main 

1028 for sql_subst in [ 

1029 self._sql, main_nuts]: 

1030 

1031 cond = [] 

1032 args = [] 

1033 

1034 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1035 

1036 if codes is not None: 

1037 pats = codes_patterns_for_kind(kind_id, codes) 

1038 if pats: 

1039 cond.append( 

1040 ' ( %s ) ' % ' OR '.join( 

1041 ('kind_codes.codes GLOB ?',) * len(pats))) 

1042 args.extend(pat.safe_str for pat in pats) 

1043 

1044 if path is not None: 

1045 cond.append('files.path == ?') 

1046 args.append(db.relpath(abspath(path))) 

1047 

1048 sql = sql_subst(''' 

1049 SELECT 

1050 %(db)s.%(nuts)s.nut_id, 

1051 %(db)s.%(nuts)s.tmin_seconds, 

1052 %(db)s.%(nuts)s.tmin_offset, 

1053 %(db)s.%(nuts)s.tmax_seconds, 

1054 %(db)s.%(nuts)s.tmax_offset, 

1055 kind_codes.deltat 

1056 FROM files 

1057 INNER JOIN %(db)s.%(nuts)s 

1058 ON files.file_id == %(db)s.%(nuts)s.file_id 

1059 INNER JOIN kind_codes 

1060 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1061 WHERE ''' + ' AND '.join(cond)) # noqa 

1062 

1063 insert = [] 

1064 delete = [] 

1065 for row in cursor.execute(sql, args): 

1066 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1067 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1068 

1069 nut_tmin = model.tjoin( 

1070 nut_tmin_seconds, nut_tmin_offset) 

1071 nut_tmax = model.tjoin( 

1072 nut_tmax_seconds, nut_tmax_offset) 

1073 

1074 if nut_tmin < tmax and tmin < nut_tmax: 

1075 if nut_tmin < tmin: 

1076 insert.append(( 

1077 nut_tmin_seconds, nut_tmin_offset, 

1078 tmin_seconds, tmin_offset, 

1079 model.tscale_to_kscale( 

1080 tmin_seconds - nut_tmin_seconds), 

1081 nut_id)) 

1082 

1083 if tmax < nut_tmax: 

1084 insert.append(( 

1085 tmax_seconds, tmax_offset, 

1086 nut_tmax_seconds, nut_tmax_offset, 

1087 model.tscale_to_kscale( 

1088 nut_tmax_seconds - tmax_seconds), 

1089 nut_id)) 

1090 

1091 delete.append((nut_id,)) 

1092 

1093 sql_add = ''' 

1094 INSERT INTO %(db)s.%(nuts)s ( 

1095 file_id, file_segment, file_element, kind_id, 

1096 kind_codes_id, tmin_seconds, tmin_offset, 

1097 tmax_seconds, tmax_offset, kscale ) 

1098 SELECT 

1099 file_id, file_segment, file_element, 

1100 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1101 FROM %(db)s.%(nuts)s 

1102 WHERE nut_id == ? 

1103 ''' 

1104 cursor.executemany(sql_subst(sql_add), insert) 

1105 

1106 sql_delete = ''' 

1107 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1108 ''' 

1109 cursor.executemany(sql_subst(sql_delete), delete) 

1110 

1111 def get_time_span(self, kinds=None): 

1112 ''' 

1113 Get time interval over all content in selection. 

1114 

1115 :param kinds: 

1116 If not ``None``, restrict query to given content kinds. 

1117 :type kind: 

1118 list of str 

1119 

1120 :complexity: 

1121 O(1), independent of the number of nuts. 

1122 

1123 :returns: 

1124 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1125 ''' 

1126 

1127 sql_min = self._sql(''' 

1128 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1129 FROM %(db)s.%(nuts)s 

1130 WHERE kind_id == ? 

1131 AND tmin_seconds == ( 

1132 SELECT MIN(tmin_seconds) 

1133 FROM %(db)s.%(nuts)s 

1134 WHERE kind_id == ?) 

1135 ''') 

1136 

1137 sql_max = self._sql(''' 

1138 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1139 FROM %(db)s.%(nuts)s 

1140 WHERE kind_id == ? 

1141 AND tmax_seconds == ( 

1142 SELECT MAX(tmax_seconds) 

1143 FROM %(db)s.%(nuts)s 

1144 WHERE kind_id == ?) 

1145 ''') 

1146 

1147 gtmin = None 

1148 gtmax = None 

1149 

1150 if isinstance(kinds, str): 

1151 kinds = [kinds] 

1152 

1153 if kinds is None: 

1154 kind_ids = model.g_content_kind_ids 

1155 else: 

1156 kind_ids = model.to_kind_ids(kinds) 

1157 

1158 for kind_id in kind_ids: 

1159 for tmin_seconds, tmin_offset in self._conn.execute( 

1160 sql_min, (kind_id, kind_id)): 

1161 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1162 if tmin is not None and (gtmin is None or tmin < gtmin): 

1163 gtmin = tmin 

1164 

1165 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1166 sql_max, (kind_id, kind_id)): 

1167 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1168 if tmax is not None and (gtmax is None or tmax > gtmax): 

1169 gtmax = tmax 

1170 

1171 return gtmin, gtmax 

1172 

1173 def has(self, kinds): 

1174 ''' 

1175 Check availability of given content kinds. 

1176 

1177 :param kinds: 

1178 Content kinds to query. 

1179 :type kind: 

1180 list of str 

1181 

1182 :returns: 

1183 ``True`` if any of the queried content kinds is available 

1184 in the selection. 

1185 ''' 

1186 self_tmin, self_tmax = self.get_time_span(kinds) 

1187 

1188 return None not in (self_tmin, self_tmax) 

1189 

1190 def get_deltat_span(self, kind): 

1191 ''' 

1192 Get min and max sampling interval of all content of given kind. 

1193 

1194 :param kind: 

1195 Content kind 

1196 :type kind: 

1197 str 

1198 

1199 :returns: ``(deltat_min, deltat_max)`` 

1200 ''' 

1201 

1202 deltats = [ 

1203 deltat for deltat in self.get_deltats(kind) 

1204 if deltat is not None] 

1205 

1206 if deltats: 

1207 return min(deltats), max(deltats) 

1208 else: 

1209 return None, None 

1210 

1211 def iter_kinds(self, codes=None): 

1212 ''' 

1213 Iterate over content types available in selection. 

1214 

1215 :param codes: 

1216 If given, get kinds only for selected codes identifier. 

1217 :type codes: 

1218 :py:class:`tuple` of :py:class:`str` 

1219 

1220 :yields: 

1221 Available content kinds as :py:class:`str`. 

1222 

1223 :complexity: 

1224 O(1), independent of number of nuts. 

1225 ''' 

1226 

1227 return self._database._iter_kinds( 

1228 codes=codes, 

1229 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1230 

1231 def iter_deltats(self, kind=None): 

1232 ''' 

1233 Iterate over sampling intervals available in selection. 

1234 

1235 :param kind: 

1236 If given, get sampling intervals only for a given content type. 

1237 :type kind: 

1238 str 

1239 

1240 :yields: 

1241 :py:class:`float` values. 

1242 

1243 :complexity: 

1244 O(1), independent of number of nuts. 

1245 ''' 

1246 return self._database._iter_deltats( 

1247 kind=kind, 

1248 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1249 

1250 def iter_codes(self, kind=None): 

1251 ''' 

1252 Iterate over content identifier code sequences available in selection. 

1253 

1254 :param kind: 

1255 If given, get codes only for a given content type. 

1256 :type kind: 

1257 str 

1258 

1259 :yields: 

1260 :py:class:`tuple` of :py:class:`str` 

1261 

1262 :complexity: 

1263 O(1), independent of number of nuts. 

1264 ''' 

1265 return self._database._iter_codes( 

1266 kind=kind, 

1267 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1268 

1269 def _iter_codes_info(self, kind=None): 

1270 ''' 

1271 Iterate over number of occurrences of any (kind, codes) combination. 

1272 

1273 :param kind: 

1274 If given, get counts only for selected content type. 

1275 :type kind: 

1276 str 

1277 

1278 :yields: 

1279 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1280 

1281 :complexity: 

1282 O(1), independent of number of nuts. 

1283 ''' 

1284 return self._database._iter_codes_info( 

1285 kind=kind, 

1286 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1287 

1288 def get_kinds(self, codes=None): 

1289 ''' 

1290 Get content types available in selection. 

1291 

1292 :param codes: 

1293 If given, get kinds only for selected codes identifier. 

1294 :type codes: 

1295 :py:class:`tuple` of :py:class:`str` 

1296 

1297 :returns: 

1298 Sorted list of available content types. 

1299 

1300 :complexity: 

1301 O(1), independent of number of nuts. 

1302 

1303 ''' 

1304 return sorted(list(self.iter_kinds(codes=codes))) 

1305 

1306 def get_deltats(self, kind=None): 

1307 ''' 

1308 Get sampling intervals available in selection. 

1309 

1310 :param kind: 

1311 If given, get sampling intervals only for selected content type. 

1312 :type kind: 

1313 str 

1314 

1315 :complexity: 

1316 O(1), independent of number of nuts. 

1317 

1318 :returns: Sorted list of available sampling intervals. 

1319 ''' 

1320 return sorted(list(self.iter_deltats(kind=kind))) 

1321 

1322 def get_codes(self, kind=None): 

1323 ''' 

1324 Get identifier code sequences available in selection. 

1325 

1326 :param kind: 

1327 If given, get codes only for selected content type. 

1328 :type kind: 

1329 str 

1330 

1331 :complexity: 

1332 O(1), independent of number of nuts. 

1333 

1334 :returns: Sorted list of available codes as tuples of strings. 

1335 ''' 

1336 return sorted(list(self.iter_codes(kind=kind))) 

1337 

1338 def get_counts(self, kind=None): 

1339 ''' 

1340 Get number of occurrences of any (kind, codes) combination. 

1341 

1342 :param kind: 

1343 If given, get codes only for selected content type. 

1344 :type kind: 

1345 str 

1346 

1347 :complexity: 

1348 O(1), independent of number of nuts. 

1349 

1350 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1351 if kind is not ``None`` 

1352 ''' 

1353 d = {} 

1354 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1355 if kind_id not in d: 

1356 v = d[kind_id] = {} 

1357 else: 

1358 v = d[kind_id] 

1359 

1360 if codes not in v: 

1361 v[codes] = 0 

1362 

1363 v[codes] += count 

1364 

1365 if kind is not None: 

1366 return d[to_kind_id(kind)] 

1367 else: 

1368 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1369 

1370 def glob_codes(self, kind, codes_list): 

1371 ''' 

1372 Find codes matching given patterns. 

1373 

1374 :param kind: 

1375 Content kind to be queried. 

1376 :type kind: 

1377 str 

1378 

1379 :param codes_list: 

1380 List of code patterns to query. If not given or empty, an empty 

1381 list is returned. 

1382 :type codes_list: 

1383 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

1384 

1385 :returns: 

1386 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1387 ''' 

1388 

1389 kind_id = to_kind_id(kind) 

1390 args = [kind_id] 

1391 pats = [] 

1392 for codes in codes_list: 

1393 pats.extend(codes_patterns_for_kind(kind_id, codes)) 

1394 

1395 if pats: 

1396 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1397 ('kind_codes.codes GLOB ?',) * len(pats)) 

1398 

1399 args.extend(pat.safe_str for pat in pats) 

1400 else: 

1401 codes_cond = '' 

1402 

1403 sql = self._sql(''' 

1404 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1405 WHERE 

1406 kind_id == ? ''' + codes_cond) 

1407 

1408 return list(map(list, self._conn.execute(sql, args))) 

1409 

1410 def update(self, constraint=None, **kwargs): 

1411 ''' 

1412 Update or partially update channel and event inventories. 

1413 

1414 :param constraint: 

1415 Selection of times or areas to be brought up to date. 

1416 :type constraint: 

1417 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1418 

1419 :param \\*\\*kwargs: 

1420 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1421 

1422 This function triggers all attached remote sources, to check for 

1423 updates in the meta-data. The sources will only submit queries when 

1424 their expiration date has passed, or if the selection spans into 

1425 previously unseen times or areas. 

1426 ''' 

1427 

1428 if constraint is None: 

1429 constraint = client.Constraint(**kwargs) 

1430 

1431 for source in self._sources: 

1432 source.update_channel_inventory(self, constraint) 

1433 source.update_event_inventory(self, constraint) 

1434 

1435 def update_waveform_promises(self, constraint=None, **kwargs): 

1436 ''' 

1437 Permit downloading of remote waveforms. 

1438 

1439 :param constraint: 

1440 Remote waveforms compatible with the given constraint are enabled 

1441 for download. 

1442 :type constraint: 

1443 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1444 

1445 :param \\*\\*kwargs: 

1446 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1447 

1448 Calling this method permits Squirrel to download waveforms from remote 

1449 sources when processing subsequent waveform requests. This works by 

1450 inserting so called waveform promises into the database. It will look 

1451 into the available channels for each remote source and create a promise 

1452 for each channel compatible with the given constraint. If the promise 

1453 then matches in a waveform request, Squirrel tries to download the 

1454 waveform. If the download is successful, the downloaded waveform is 

1455 added to the Squirrel and the promise is deleted. If the download 

1456 fails, the promise is kept if the reason of failure looks like being 

1457 temporary, e.g. because of a network failure. If the cause of failure 

1458 however seems to be permanent, the promise is deleted so that no 

1459 further attempts are made to download a waveform which might not be 

1460 available from that server at all. To force re-scheduling after a 

1461 permanent failure, call :py:meth:`update_waveform_promises` 

1462 yet another time. 

1463 ''' 

1464 

1465 if constraint is None: 

1466 constraint = client.Constraint(**kwargs) 

1467 

1468 for source in self._sources: 

1469 source.update_waveform_promises(self, constraint) 

1470 

1471 def remove_waveform_promises(self, from_database='selection'): 

1472 ''' 

1473 Remove waveform promises from live selection or global database. 

1474 

1475 Calling this function removes all waveform promises provided by the 

1476 attached sources. 

1477 

1478 :param from_database: 

1479 Remove from live selection ``'selection'`` or global database 

1480 ``'global'``. 

1481 ''' 

1482 for source in self._sources: 

1483 source.remove_waveform_promises(self, from_database=from_database) 

1484 

1485 def update_responses(self, constraint=None, **kwargs): 

1486 if constraint is None: 

1487 constraint = client.Constraint(**kwargs) 

1488 

1489 for source in self._sources: 

1490 source.update_response_inventory(self, constraint) 

1491 

1492 def get_nfiles(self): 

1493 ''' 

1494 Get number of files in selection. 

1495 ''' 

1496 

1497 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1498 for row in self._conn.execute(sql): 

1499 return row[0] 

1500 

1501 def get_nnuts(self): 

1502 ''' 

1503 Get number of nuts in selection. 

1504 ''' 

1505 

1506 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1507 for row in self._conn.execute(sql): 

1508 return row[0] 

1509 

1510 def get_total_size(self): 

1511 ''' 

1512 Get aggregated file size available in selection. 

1513 ''' 

1514 

1515 sql = self._sql(''' 

1516 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1517 INNER JOIN files 

1518 ON %(db)s.%(file_states)s.file_id = files.file_id 

1519 ''') 

1520 

1521 for row in self._conn.execute(sql): 

1522 return row[0] or 0 

1523 

1524 def get_stats(self): 

1525 ''' 

1526 Get statistics on contents available through this selection. 

1527 ''' 

1528 

1529 kinds = self.get_kinds() 

1530 time_spans = {} 

1531 for kind in kinds: 

1532 time_spans[kind] = self.get_time_span([kind]) 

1533 

1534 return SquirrelStats( 

1535 nfiles=self.get_nfiles(), 

1536 nnuts=self.get_nnuts(), 

1537 kinds=kinds, 

1538 codes=self.get_codes(), 

1539 total_size=self.get_total_size(), 

1540 counts=self.get_counts(), 

1541 time_spans=time_spans, 

1542 sources=[s.describe() for s in self._sources], 

1543 operators=[op.describe() for op in self._operators]) 

1544 

1545 def get_content( 

1546 self, 

1547 nut, 

1548 cache_id='default', 

1549 accessor_id='default', 

1550 show_progress=False, 

1551 model='squirrel'): 

1552 

1553 ''' 

1554 Get and possibly load full content for a given index entry from file. 

1555 

1556 Loads the actual content objects (channel, station, waveform, ...) from 

1557 file. For efficiency, sibling content (all stuff in the same file 

1558 segment) will also be loaded as a side effect. The loaded contents are 

1559 cached in the Squirrel object. 

1560 ''' 

1561 

1562 content_cache = self._content_caches[cache_id] 

1563 if not content_cache.has(nut): 

1564 

1565 for nut_loaded in io.iload( 

1566 nut.file_path, 

1567 segment=nut.file_segment, 

1568 format=nut.file_format, 

1569 database=self._database, 

1570 update_selection=self, 

1571 show_progress=show_progress): 

1572 

1573 content_cache.put(nut_loaded) 

1574 

1575 try: 

1576 return content_cache.get(nut, accessor_id, model) 

1577 except KeyError: 

1578 raise error.NotAvailable( 

1579 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1580 

1581 def advance_accessor(self, accessor_id='default', cache_id=None): 

1582 ''' 

1583 Notify memory caches about consumer moving to a new data batch. 

1584 

1585 :param accessor_id: 

1586 Name of accessing consumer to be advanced. 

1587 :type accessor_id: 

1588 str 

1589 

1590 :param cache_id: 

1591 Name of cache to for which the accessor should be advanced. By 

1592 default the named accessor is advanced in all registered caches. 

1593 By default, two caches named ``'default'`` and ``'waveform'`` are 

1594 available. 

1595 :type cache_id: 

1596 str 

1597 

1598 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1599 Squirrel's memory caching works and can be tuned. Default behaviour is 

1600 to release data when it has not been used in the latest data 

1601 window/batch. If the accessor is never advanced, data is cached 

1602 indefinitely - which is often desired e.g. for station meta-data. 

1603 Methods for consecutive data traversal, like 

1604 :py:meth:`chopper_waveforms` automatically advance and clear 

1605 their accessor. 

1606 ''' 

1607 for cache_ in ( 

1608 self._content_caches.keys() 

1609 if cache_id is None 

1610 else [cache_id]): 

1611 

1612 self._content_caches[cache_].advance_accessor(accessor_id) 

1613 

1614 def clear_accessor(self, accessor_id, cache_id=None): 

1615 ''' 

1616 Notify memory caches about a consumer having finished. 

1617 

1618 :param accessor_id: 

1619 Name of accessor to be cleared. 

1620 :type accessor_id: 

1621 str 

1622 

1623 :param cache_id: 

1624 Name of cache for which the accessor should be cleared. By default 

1625 the named accessor is cleared from all registered caches. By 

1626 default, two caches named ``'default'`` and ``'waveform'`` are 

1627 available. 

1628 :type cache_id: 

1629 str 

1630 

1631 Calling this method clears all references to cache entries held by the 

1632 named accessor. Cache entries are then freed if not referenced by any 

1633 other accessor. 

1634 ''' 

1635 

1636 for cache_ in ( 

1637 self._content_caches.keys() 

1638 if cache_id is None 

1639 else [cache_id]): 

1640 

1641 self._content_caches[cache_].clear_accessor(accessor_id) 

1642 

1643 def get_cache_stats(self, cache_id): 

1644 return self._content_caches[cache_id].get_stats() 

1645 

1646 def _check_duplicates(self, nuts): 

1647 d = defaultdict(list) 

1648 for nut in nuts: 

1649 d[nut.codes].append(nut) 

1650 

1651 for codes, group in d.items(): 

1652 if len(group) > 1: 

1653 logger.warning( 

1654 'Multiple entries matching codes: %s' % str(codes)) 

1655 

1656 @filldocs 

1657 def get_stations( 

1658 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1659 model='squirrel'): 

1660 

1661 ''' 

1662 Get stations matching given constraints. 

1663 

1664 %(query_args)s 

1665 

1666 :param model: 

1667 Select object model for returned values: ``'squirrel'`` to get 

1668 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1669 objects with channel information attached. 

1670 :type model: 

1671 str 

1672 

1673 :returns: 

1674 List of :py:class:`pyrocko.squirrel.Station 

1675 <pyrocko.squirrel.model.Station>` objects by default or list of 

1676 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1677 objects if ``model='pyrocko'`` is requested. 

1678 

1679 See :py:meth:`iter_nuts` for details on time span matching. 

1680 ''' 

1681 

1682 if model == 'pyrocko': 

1683 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1684 elif model in ('squirrel', 'stationxml'): 

1685 args = self._get_selection_args( 

1686 STATION, obj, tmin, tmax, time, codes) 

1687 

1688 nuts = sorted( 

1689 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1690 self._check_duplicates(nuts) 

1691 return [self.get_content(nut, model=model) for nut in nuts] 

1692 else: 

1693 raise ValueError('Invalid station model: %s' % model) 

1694 

1695 @filldocs 

1696 def get_channels( 

1697 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1698 model='squirrel'): 

1699 

1700 ''' 

1701 Get channels matching given constraints. 

1702 

1703 %(query_args)s 

1704 

1705 :returns: 

1706 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1707 

1708 See :py:meth:`iter_nuts` for details on time span matching. 

1709 ''' 

1710 

1711 args = self._get_selection_args( 

1712 CHANNEL, obj, tmin, tmax, time, codes) 

1713 

1714 nuts = sorted( 

1715 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1716 self._check_duplicates(nuts) 

1717 return [self.get_content(nut, model=model) for nut in nuts] 

1718 

1719 @filldocs 

1720 def get_sensors( 

1721 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1722 

1723 ''' 

1724 Get sensors matching given constraints. 

1725 

1726 %(query_args)s 

1727 

1728 :returns: 

1729 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1730 

1731 See :py:meth:`iter_nuts` for details on time span matching. 

1732 ''' 

1733 

1734 tmin, tmax, codes = self._get_selection_args( 

1735 CHANNEL, obj, tmin, tmax, time, codes) 

1736 

1737 if codes is not None: 

1738 if codes.channel != '*': 

1739 codes = codes.replace(channel=codes.channel[:-1] + '?') 

1740 

1741 nuts = sorted( 

1742 self.iter_nuts( 

1743 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1744 self._check_duplicates(nuts) 

1745 return model.Sensor.from_channels( 

1746 self.get_content(nut) for nut in nuts) 

1747 

1748 @filldocs 

1749 def get_responses( 

1750 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1751 model='squirrel'): 

1752 

1753 ''' 

1754 Get instrument responses matching given constraints. 

1755 

1756 %(query_args)s 

1757 

1758 :returns: 

1759 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1760 

1761 See :py:meth:`iter_nuts` for details on time span matching. 

1762 ''' 

1763 

1764 args = self._get_selection_args( 

1765 RESPONSE, obj, tmin, tmax, time, codes) 

1766 

1767 nuts = sorted( 

1768 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1769 self._check_duplicates(nuts) 

1770 return [self.get_content(nut, model=model) for nut in nuts] 

1771 

1772 @filldocs 

1773 def get_response( 

1774 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1775 model='squirrel'): 

1776 

1777 ''' 

1778 Get instrument response matching given constraints. 

1779 

1780 %(query_args)s 

1781 

1782 :returns: 

1783 :py:class:`~pyrocko.squirrel.model.Response` object. 

1784 

1785 Same as :py:meth:`get_responses` but returning exactly one response. 

1786 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1787 than one is available. 

1788 

1789 See :py:meth:`iter_nuts` for details on time span matching. 

1790 ''' 

1791 

1792 responses = self.get_responses( 

1793 obj, tmin, tmax, time, codes, model=model) 

1794 if len(responses) == 0: 

1795 raise error.NotAvailable( 

1796 'No instrument response available (%s).' 

1797 % self._get_selection_args_str( 

1798 RESPONSE, obj, tmin, tmax, time, codes)) 

1799 

1800 elif len(responses) > 1: 

1801 if model == 'squirrel': 

1802 rinfo = ':\n' + '\n'.join( 

1803 ' ' + resp.summary for resp in responses) 

1804 else: 

1805 rinfo = '.' 

1806 

1807 raise error.NotAvailable( 

1808 'Multiple instrument responses matching given constraints ' 

1809 '(%s)%s' % ( 

1810 self._get_selection_args_str( 

1811 RESPONSE, obj, tmin, tmax, time, codes), rinfo)) 

1812 

1813 return responses[0] 

1814 

1815 @filldocs 

1816 def get_events( 

1817 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1818 

1819 ''' 

1820 Get events matching given constraints. 

1821 

1822 %(query_args)s 

1823 

1824 :returns: 

1825 List of :py:class:`~pyrocko.model.event.Event` objects. 

1826 

1827 See :py:meth:`iter_nuts` for details on time span matching. 

1828 ''' 

1829 

1830 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1831 nuts = sorted( 

1832 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1833 self._check_duplicates(nuts) 

1834 return [self.get_content(nut) for nut in nuts] 

1835 

1836 def _redeem_promises(self, *args): 

1837 

1838 tmin, tmax, _ = args 

1839 

1840 waveforms = list(self.iter_nuts('waveform', *args)) 

1841 promises = list(self.iter_nuts('waveform_promise', *args)) 

1842 

1843 codes_to_avail = defaultdict(list) 

1844 for nut in waveforms: 

1845 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1846 

1847 def tts(x): 

1848 if isinstance(x, tuple): 

1849 return tuple(tts(e) for e in x) 

1850 elif isinstance(x, list): 

1851 return list(tts(e) for e in x) 

1852 else: 

1853 return util.time_to_str(x) 

1854 

1855 orders = [] 

1856 for promise in promises: 

1857 waveforms_avail = codes_to_avail[promise.codes] 

1858 for block_tmin, block_tmax in blocks( 

1859 max(tmin, promise.tmin), 

1860 min(tmax, promise.tmax), 

1861 promise.deltat): 

1862 

1863 orders.append( 

1864 WaveformOrder( 

1865 source_id=promise.file_path, 

1866 codes=promise.codes, 

1867 tmin=block_tmin, 

1868 tmax=block_tmax, 

1869 deltat=promise.deltat, 

1870 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1871 

1872 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1873 

1874 order_keys_noop = set(order_key(order) for order in orders_noop) 

1875 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1876 logger.info( 

1877 'Waveform orders already satisified with cached/local data: ' 

1878 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1879 

1880 source_ids = [] 

1881 sources = {} 

1882 for source in self._sources: 

1883 if isinstance(source, fdsn.FDSNSource): 

1884 source_ids.append(source._source_id) 

1885 sources[source._source_id] = source 

1886 

1887 source_priority = dict( 

1888 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1889 

1890 order_groups = defaultdict(list) 

1891 for order in orders: 

1892 order_groups[order_key(order)].append(order) 

1893 

1894 for k, order_group in order_groups.items(): 

1895 order_group.sort( 

1896 key=lambda order: source_priority[order.source_id]) 

1897 

1898 n_order_groups = len(order_groups) 

1899 

1900 if len(order_groups) != 0 or len(orders) != 0: 

1901 logger.info( 

1902 'Waveform orders standing for download: %i (%i)' 

1903 % (len(order_groups), len(orders))) 

1904 

1905 task = make_task('Waveform orders processed', n_order_groups) 

1906 else: 

1907 task = None 

1908 

1909 def split_promise(order): 

1910 self._split_nuts( 

1911 'waveform_promise', 

1912 order.tmin, order.tmax, 

1913 codes=order.codes, 

1914 path=order.source_id) 

1915 

1916 def release_order_group(order): 

1917 okey = order_key(order) 

1918 for followup in order_groups[okey]: 

1919 split_promise(followup) 

1920 

1921 del order_groups[okey] 

1922 

1923 if task: 

1924 task.update(n_order_groups - len(order_groups)) 

1925 

1926 def noop(order): 

1927 pass 

1928 

1929 def success(order): 

1930 release_order_group(order) 

1931 split_promise(order) 

1932 

1933 def batch_add(paths): 

1934 self.add(paths) 

1935 

1936 calls = queue.Queue() 

1937 

1938 def enqueue(f): 

1939 def wrapper(*args): 

1940 calls.put((f, args)) 

1941 

1942 return wrapper 

1943 

1944 for order in orders_noop: 

1945 split_promise(order) 

1946 

1947 while order_groups: 

1948 

1949 orders_now = [] 

1950 empty = [] 

1951 for k, order_group in order_groups.items(): 

1952 try: 

1953 orders_now.append(order_group.pop(0)) 

1954 except IndexError: 

1955 empty.append(k) 

1956 

1957 for k in empty: 

1958 del order_groups[k] 

1959 

1960 by_source_id = defaultdict(list) 

1961 for order in orders_now: 

1962 by_source_id[order.source_id].append(order) 

1963 

1964 threads = [] 

1965 for source_id in by_source_id: 

1966 def download(): 

1967 try: 

1968 sources[source_id].download_waveforms( 

1969 by_source_id[source_id], 

1970 success=enqueue(success), 

1971 error_permanent=enqueue(split_promise), 

1972 error_temporary=noop, 

1973 batch_add=enqueue(batch_add)) 

1974 

1975 finally: 

1976 calls.put(None) 

1977 

1978 thread = threading.Thread(target=download) 

1979 thread.start() 

1980 threads.append(thread) 

1981 

1982 ndone = 0 

1983 while ndone < len(threads): 

1984 ret = calls.get() 

1985 if ret is None: 

1986 ndone += 1 

1987 else: 

1988 ret[0](*ret[1]) 

1989 

1990 for thread in threads: 

1991 thread.join() 

1992 

1993 if task: 

1994 task.update(n_order_groups - len(order_groups)) 

1995 

1996 if task: 

1997 task.done() 

1998 

1999 @filldocs 

2000 def get_waveform_nuts( 

2001 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2002 

2003 ''' 

2004 Get waveform content entities matching given constraints. 

2005 

2006 %(query_args)s 

2007 

2008 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2009 resolves matching waveform promises (downloads waveforms from remote 

2010 sources). 

2011 

2012 See :py:meth:`iter_nuts` for details on time span matching. 

2013 ''' 

2014 

2015 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2016 self._redeem_promises(*args) 

2017 return sorted( 

2018 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2019 

2020 @filldocs 

2021 def get_waveforms( 

2022 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2023 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2024 maxlap=None, snap=None, include_last=False, load_data=True, 

2025 accessor_id='default', operator_params=None): 

2026 

2027 ''' 

2028 Get waveforms matching given constraints. 

2029 

2030 %(query_args)s 

2031 

2032 :param uncut: 

2033 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2034 and to disable degapping/deoverlapping. Returns untouched traces as 

2035 they are read from file segment. File segments are always read in 

2036 their entirety. 

2037 :type uncut: 

2038 bool 

2039 

2040 :param want_incomplete: 

2041 If ``True``, gappy/incomplete traces are included in the result. 

2042 :type want_incomplete: 

2043 bool 

2044 

2045 :param degap: 

2046 If ``True``, connect traces and remove gaps and overlaps. 

2047 :type degap: 

2048 bool 

2049 

2050 :param maxgap: 

2051 Maximum gap size in samples which is filled with interpolated 

2052 samples when ``degap`` is ``True``. 

2053 :type maxgap: 

2054 int 

2055 

2056 :param maxlap: 

2057 Maximum overlap size in samples which is removed when ``degap`` is 

2058 ``True``. 

2059 :type maxlap: 

2060 int 

2061 

2062 :param snap: 

2063 Rounding functions used when computing sample index from time 

2064 instance, for trace start and trace end, respectively. By default, 

2065 ``(round, round)`` is used. 

2066 :type snap: 

2067 tuple of 2 callables 

2068 

2069 :param include_last: 

2070 If ``True``, add one more sample to the returned traces (the sample 

2071 which would be the first sample of a query with ``tmin`` set to the 

2072 current value of ``tmax``). 

2073 :type include_last: 

2074 bool 

2075 

2076 :param load_data: 

2077 If ``True``, waveform data samples are read from files (or cache). 

2078 If ``False``, meta-information-only traces are returned (dummy 

2079 traces with no data samples). 

2080 :type load_data: 

2081 bool 

2082 

2083 :param accessor_id: 

2084 Name of consumer on who's behalf data is accessed. Used in cache 

2085 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2086 to distinguish different points of extraction for the decision of 

2087 when to release cached waveform data. Should be used when data is 

2088 alternately extracted from more than one region / selection. 

2089 :type accessor_id: 

2090 str 

2091 

2092 See :py:meth:`iter_nuts` for details on time span matching. 

2093 

2094 Loaded data is kept in memory (at least) until 

2095 :py:meth:`clear_accessor` has been called or 

2096 :py:meth:`advance_accessor` has been called two consecutive times 

2097 without data being accessed between the two calls (by this accessor). 

2098 Data may still be further kept in the memory cache if held alive by 

2099 consumers with a different ``accessor_id``. 

2100 ''' 

2101 

2102 tmin, tmax, codes = self._get_selection_args( 

2103 WAVEFORM, obj, tmin, tmax, time, codes) 

2104 

2105 self_tmin, self_tmax = self.get_time_span( 

2106 ['waveform', 'waveform_promise']) 

2107 

2108 if None in (self_tmin, self_tmax): 

2109 logger.warning( 

2110 'No waveforms available.') 

2111 return [] 

2112 

2113 tmin = tmin if tmin is not None else self_tmin 

2114 tmax = tmax if tmax is not None else self_tmax 

2115 

2116 if codes is not None: 

2117 operator = self.get_operator(codes) 

2118 if operator is not None: 

2119 return operator.get_waveforms( 

2120 self, codes, 

2121 tmin=tmin, tmax=tmax, 

2122 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2123 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2124 include_last=include_last, load_data=load_data, 

2125 accessor_id=accessor_id, params=operator_params) 

2126 

2127 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2128 

2129 if load_data: 

2130 traces = [ 

2131 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2132 

2133 else: 

2134 traces = [ 

2135 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2136 

2137 if uncut: 

2138 return traces 

2139 

2140 if snap is None: 

2141 snap = (round, round) 

2142 

2143 chopped = [] 

2144 for tr in traces: 

2145 if not load_data and tr.ydata is not None: 

2146 tr = tr.copy(data=False) 

2147 tr.ydata = None 

2148 

2149 try: 

2150 chopped.append(tr.chop( 

2151 tmin, tmax, 

2152 inplace=False, 

2153 snap=snap, 

2154 include_last=include_last)) 

2155 

2156 except trace.NoData: 

2157 pass 

2158 

2159 processed = self._process_chopped( 

2160 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2161 

2162 return processed 

2163 

2164 @filldocs 

2165 def chopper_waveforms( 

2166 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2167 tinc=None, tpad=0., 

2168 want_incomplete=True, snap_window=False, 

2169 degap=True, maxgap=5, maxlap=None, 

2170 snap=None, include_last=False, load_data=True, 

2171 accessor_id=None, clear_accessor=True, operator_params=None): 

2172 

2173 ''' 

2174 Iterate window-wise over waveform archive. 

2175 

2176 %(query_args)s 

2177 

2178 :param tinc: 

2179 Time increment (window shift time) (default uses ``tmax-tmin``). 

2180 :type tinc: 

2181 timestamp 

2182 

2183 :param tpad: 

2184 Padding time appended on either side of the data window (window 

2185 overlap is ``2*tpad``). 

2186 :type tpad: 

2187 timestamp 

2188 

2189 :param want_incomplete: 

2190 If ``True``, gappy/incomplete traces are included in the result. 

2191 :type want_incomplete: 

2192 bool 

2193 

2194 :param snap_window: 

2195 If ``True``, start time windows at multiples of tinc with respect 

2196 to system time zero. 

2197 :type snap_window: 

2198 bool 

2199 

2200 :param degap: 

2201 If ``True``, connect traces and remove gaps and overlaps. 

2202 :type degap: 

2203 bool 

2204 

2205 :param maxgap: 

2206 Maximum gap size in samples which is filled with interpolated 

2207 samples when ``degap`` is ``True``. 

2208 :type maxgap: 

2209 int 

2210 

2211 :param maxlap: 

2212 Maximum overlap size in samples which is removed when ``degap`` is 

2213 ``True``. 

2214 :type maxlap: 

2215 int 

2216 

2217 :param snap: 

2218 Rounding functions used when computing sample index from time 

2219 instance, for trace start and trace end, respectively. By default, 

2220 ``(round, round)`` is used. 

2221 :type snap: 

2222 tuple of 2 callables 

2223 

2224 :param include_last: 

2225 If ``True``, add one more sample to the returned traces (the sample 

2226 which would be the first sample of a query with ``tmin`` set to the 

2227 current value of ``tmax``). 

2228 :type include_last: 

2229 bool 

2230 

2231 :param load_data: 

2232 If ``True``, waveform data samples are read from files (or cache). 

2233 If ``False``, meta-information-only traces are returned (dummy 

2234 traces with no data samples). 

2235 :type load_data: 

2236 bool 

2237 

2238 :param accessor_id: 

2239 Name of consumer on who's behalf data is accessed. Used in cache 

2240 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2241 to distinguish different points of extraction for the decision of 

2242 when to release cached waveform data. Should be used when data is 

2243 alternately extracted from more than one region / selection. 

2244 :type accessor_id: 

2245 str 

2246 

2247 :param clear_accessor: 

2248 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2249 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2250 memory when the generator returns. 

2251 :type clear_accessor: 

2252 bool 

2253 

2254 :yields: 

2255 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2256 extracted time window. 

2257 

2258 See :py:meth:`iter_nuts` for details on time span matching. 

2259 ''' 

2260 

2261 tmin, tmax, codes = self._get_selection_args( 

2262 WAVEFORM, obj, tmin, tmax, time, codes) 

2263 

2264 self_tmin, self_tmax = self.get_time_span( 

2265 ['waveform', 'waveform_promise']) 

2266 

2267 if None in (self_tmin, self_tmax): 

2268 logger.warning( 

2269 'Content has undefined time span. No waveforms and no ' 

2270 'waveform promises?') 

2271 return 

2272 

2273 if snap_window and tinc is not None: 

2274 tmin = tmin if tmin is not None else self_tmin 

2275 tmax = tmax if tmax is not None else self_tmax 

2276 tmin = math.floor(tmin / tinc) * tinc 

2277 tmax = math.ceil(tmax / tinc) * tinc 

2278 else: 

2279 tmin = tmin if tmin is not None else self_tmin + tpad 

2280 tmax = tmax if tmax is not None else self_tmax - tpad 

2281 

2282 tinc = tinc if tinc is not None else tmax - tmin 

2283 

2284 try: 

2285 if accessor_id is None: 

2286 accessor_id = 'chopper%i' % self._n_choppers_active 

2287 

2288 self._n_choppers_active += 1 

2289 

2290 eps = tinc * 1e-6 

2291 if tinc != 0.0: 

2292 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2293 else: 

2294 nwin = 1 

2295 

2296 for iwin in range(nwin): 

2297 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2298 

2299 chopped = self.get_waveforms( 

2300 tmin=wmin-tpad, 

2301 tmax=wmax+tpad, 

2302 codes=codes, 

2303 snap=snap, 

2304 include_last=include_last, 

2305 load_data=load_data, 

2306 want_incomplete=want_incomplete, 

2307 degap=degap, 

2308 maxgap=maxgap, 

2309 maxlap=maxlap, 

2310 accessor_id=accessor_id, 

2311 operator_params=operator_params) 

2312 

2313 self.advance_accessor(accessor_id) 

2314 

2315 yield Batch( 

2316 tmin=wmin, 

2317 tmax=wmax, 

2318 i=iwin, 

2319 n=nwin, 

2320 traces=chopped) 

2321 

2322 iwin += 1 

2323 

2324 finally: 

2325 self._n_choppers_active -= 1 

2326 if clear_accessor: 

2327 self.clear_accessor(accessor_id, 'waveform') 

2328 

2329 def _process_chopped( 

2330 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2331 

2332 chopped.sort(key=lambda a: a.full_id) 

2333 if degap: 

2334 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2335 

2336 if not want_incomplete: 

2337 chopped_weeded = [] 

2338 for tr in chopped: 

2339 emin = tr.tmin - tmin 

2340 emax = tr.tmax + tr.deltat - tmax 

2341 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2342 chopped_weeded.append(tr) 

2343 

2344 elif degap: 

2345 if (0. < emin <= 5. * tr.deltat 

2346 and -5. * tr.deltat <= emax < 0.): 

2347 

2348 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2349 chopped_weeded.append(tr) 

2350 

2351 chopped = chopped_weeded 

2352 

2353 return chopped 

2354 

2355 def _get_pyrocko_stations( 

2356 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2357 

2358 from pyrocko import model as pmodel 

2359 

2360 by_nsl = defaultdict(lambda: (list(), list())) 

2361 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2362 sargs = station._get_pyrocko_station_args() 

2363 by_nsl[station.codes.nsl][0].append(sargs) 

2364 

2365 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2366 sargs = channel._get_pyrocko_station_args() 

2367 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2368 sargs_list.append(sargs) 

2369 channels_list.append(channel) 

2370 

2371 pstations = [] 

2372 nsls = list(by_nsl.keys()) 

2373 nsls.sort() 

2374 for nsl in nsls: 

2375 sargs_list, channels_list = by_nsl[nsl] 

2376 sargs = util.consistency_merge( 

2377 [('',) + x for x in sargs_list]) 

2378 

2379 by_c = defaultdict(list) 

2380 for ch in channels_list: 

2381 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2382 

2383 chas = list(by_c.keys()) 

2384 chas.sort() 

2385 pchannels = [] 

2386 for cha in chas: 

2387 list_of_cargs = by_c[cha] 

2388 cargs = util.consistency_merge( 

2389 [('',) + x for x in list_of_cargs]) 

2390 pchannels.append(pmodel.Channel(*cargs)) 

2391 

2392 pstations.append( 

2393 pmodel.Station(*sargs, channels=pchannels)) 

2394 

2395 return pstations 

2396 

2397 @property 

2398 def pile(self): 

2399 

2400 ''' 

2401 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2402 

2403 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2404 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2405 but uses the fluffy power of the Squirrel under the hood. 

2406 

2407 This interface can be used as a drop-in replacement for piles which are 

2408 used in existing scripts and programs for efficient waveform data 

2409 access. The Squirrel-based pile scales better for large datasets. Newer 

2410 scripts should use Squirrel's native methods to avoid the emulation 

2411 overhead. 

2412 ''' 

2413 from . import pile 

2414 

2415 if self._pile is None: 

2416 self._pile = pile.Pile(self) 

2417 

2418 return self._pile 

2419 

2420 def snuffle(self): 

2421 ''' 

2422 Look at dataset in Snuffler. 

2423 ''' 

2424 self.pile.snuffle() 

2425 

2426 def _gather_codes_keys(self, kind, gather, selector): 

2427 return set( 

2428 gather(codes) 

2429 for codes in self.iter_codes(kind) 

2430 if selector is None or selector(codes)) 

2431 

2432 def __str__(self): 

2433 return str(self.get_stats()) 

2434 

2435 def get_coverage( 

2436 self, kind, tmin=None, tmax=None, codes_list=None, limit=None): 

2437 

2438 ''' 

2439 Get coverage information. 

2440 

2441 Get information about strips of gapless data coverage. 

2442 

2443 :param kind: 

2444 Content kind to be queried. 

2445 :type kind: 

2446 str 

2447 

2448 :param tmin: 

2449 Start time of query interval. 

2450 :type tmin: 

2451 timestamp 

2452 

2453 :param tmax: 

2454 End time of query interval. 

2455 :type tmax: 

2456 timestamp 

2457 

2458 :param codes_list: 

2459 If given, restrict query to given content codes patterns. 

2460 :type codes_list: 

2461 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2462 objects appropriate for the queried content type, or anything which 

2463 can be converted to such objects. 

2464 

2465 :param limit: 

2466 Limit query to return only up to a given maximum number of entries 

2467 per matching time series (without setting this option, very gappy 

2468 data could cause the query to execute for a very long time). 

2469 :type limit: 

2470 int 

2471 

2472 :returns: 

2473 Information about time spans covered by the requested time series 

2474 data. 

2475 :rtype: 

2476 :py:class:`list` of :py:class:`Coverage` objects 

2477 ''' 

2478 

2479 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2480 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2481 kind_id = to_kind_id(kind) 

2482 

2483 codes_info = list(self._iter_codes_info(kind=kind)) 

2484 

2485 kdata_all = [] 

2486 if codes_list is None: 

2487 for _, codes, deltat, kind_codes_id, _ in codes_info: 

2488 kdata_all.append((codes, kind_codes_id, codes, deltat)) 

2489 

2490 else: 

2491 for pattern in codes_list: 

2492 pattern = to_codes(kind_id, pattern) 

2493 for _, codes, deltat, kind_codes_id, _ in codes_info: 

2494 if model.match_codes(pattern, codes): 

2495 kdata_all.append( 

2496 (pattern, kind_codes_id, codes, deltat)) 

2497 

2498 kind_codes_ids = [x[1] for x in kdata_all] 

2499 

2500 counts_at_tmin = {} 

2501 if tmin is not None: 

2502 for nut in self.iter_nuts( 

2503 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2504 

2505 k = nut.codes, nut.deltat 

2506 if k not in counts_at_tmin: 

2507 counts_at_tmin[k] = 0 

2508 

2509 counts_at_tmin[k] += 1 

2510 

2511 coverages = [] 

2512 for pattern, kind_codes_id, codes, deltat in kdata_all: 

2513 entry = [pattern, codes, deltat, None, None, []] 

2514 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2515 sql = self._sql(''' 

2516 SELECT 

2517 time_seconds, 

2518 time_offset 

2519 FROM %(db)s.%(coverage)s 

2520 WHERE 

2521 kind_codes_id == ? 

2522 ORDER BY 

2523 kind_codes_id ''' + order + ''', 

2524 time_seconds ''' + order + ''', 

2525 time_offset ''' + order + ''' 

2526 LIMIT 1 

2527 ''') 

2528 

2529 for row in self._conn.execute(sql, [kind_codes_id]): 

2530 entry[3+i] = model.tjoin(row[0], row[1]) 

2531 

2532 if None in entry[3:5]: 

2533 continue 

2534 

2535 args = [kind_codes_id] 

2536 

2537 sql_time = '' 

2538 if tmin is not None: 

2539 # intentionally < because (== tmin) is queried from nuts 

2540 sql_time += ' AND ( ? < time_seconds ' \ 

2541 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2542 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2543 

2544 if tmax is not None: 

2545 sql_time += ' AND ( time_seconds < ? ' \ 

2546 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2547 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2548 

2549 sql_limit = '' 

2550 if limit is not None: 

2551 sql_limit = ' LIMIT ?' 

2552 args.append(limit) 

2553 

2554 sql = self._sql(''' 

2555 SELECT 

2556 time_seconds, 

2557 time_offset, 

2558 step 

2559 FROM %(db)s.%(coverage)s 

2560 WHERE 

2561 kind_codes_id == ? 

2562 ''' + sql_time + ''' 

2563 ORDER BY 

2564 kind_codes_id, 

2565 time_seconds, 

2566 time_offset 

2567 ''' + sql_limit) 

2568 

2569 rows = list(self._conn.execute(sql, args)) 

2570 

2571 if limit is not None and len(rows) == limit: 

2572 entry[-1] = None 

2573 else: 

2574 counts = counts_at_tmin.get((codes, deltat), 0) 

2575 tlast = None 

2576 if tmin is not None: 

2577 entry[-1].append((tmin, counts)) 

2578 tlast = tmin 

2579 

2580 for row in rows: 

2581 t = model.tjoin(row[0], row[1]) 

2582 counts += row[2] 

2583 entry[-1].append((t, counts)) 

2584 tlast = t 

2585 

2586 if tmax is not None and (tlast is None or tlast != tmax): 

2587 entry[-1].append((tmax, counts)) 

2588 

2589 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2590 

2591 return coverages 

2592 

2593 def add_operator(self, op): 

2594 self._operators.append(op) 

2595 

2596 def update_operator_mappings(self): 

2597 available = self.get_codes(kind=('channel')) 

2598 

2599 for operator in self._operators: 

2600 operator.update_mappings(available, self._operator_registry) 

2601 

2602 def iter_operator_mappings(self): 

2603 for operator in self._operators: 

2604 for in_codes, out_codes in operator.iter_mappings(): 

2605 yield operator, in_codes, out_codes 

2606 

2607 def get_operator_mappings(self): 

2608 return list(self.iter_operator_mappings()) 

2609 

2610 def get_operator(self, codes): 

2611 try: 

2612 return self._operator_registry[codes][0] 

2613 except KeyError: 

2614 return None 

2615 

2616 def get_operator_group(self, codes): 

2617 try: 

2618 return self._operator_registry[codes] 

2619 except KeyError: 

2620 return None, (None, None, None) 

2621 

2622 def iter_operator_codes(self): 

2623 for _, _, out_codes in self.iter_operator_mappings(): 

2624 for codes in out_codes: 

2625 yield codes 

2626 

2627 def get_operator_codes(self): 

2628 return list(self.iter_operator_codes()) 

2629 

2630 def print_tables(self, table_names=None, stream=None): 

2631 ''' 

2632 Dump raw database tables in textual form (for debugging purposes). 

2633 

2634 :param table_names: 

2635 Names of tables to be dumped or ``None`` to dump all. 

2636 :type table_names: 

2637 :py:class:`list` of :py:class:`str` 

2638 

2639 :param stream: 

2640 Open file or ``None`` to dump to standard output. 

2641 ''' 

2642 

2643 if stream is None: 

2644 stream = sys.stdout 

2645 

2646 if isinstance(table_names, str): 

2647 table_names = [table_names] 

2648 

2649 if table_names is None: 

2650 table_names = [ 

2651 'selection_file_states', 

2652 'selection_nuts', 

2653 'selection_kind_codes_count', 

2654 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2655 

2656 m = { 

2657 'selection_file_states': '%(db)s.%(file_states)s', 

2658 'selection_nuts': '%(db)s.%(nuts)s', 

2659 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2660 'files': 'files', 

2661 'nuts': 'nuts', 

2662 'kind_codes': 'kind_codes', 

2663 'kind_codes_count': 'kind_codes_count'} 

2664 

2665 for table_name in table_names: 

2666 self._database.print_table( 

2667 m[table_name] % self._names, stream=stream) 

2668 

2669 

2670class SquirrelStats(Object): 

2671 ''' 

2672 Container to hold statistics about contents available from a Squirrel. 

2673 

2674 See also :py:meth:`Squirrel.get_stats`. 

2675 ''' 

2676 

2677 nfiles = Int.T( 

2678 help='Number of files in selection.') 

2679 nnuts = Int.T( 

2680 help='Number of index nuts in selection.') 

2681 codes = List.T( 

2682 Tuple.T(content_t=String.T()), 

2683 help='Available code sequences in selection, e.g. ' 

2684 '(agency, network, station, location) for stations nuts.') 

2685 kinds = List.T( 

2686 String.T(), 

2687 help='Available content types in selection.') 

2688 total_size = Int.T( 

2689 help='Aggregated file size of files is selection.') 

2690 counts = Dict.T( 

2691 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2692 help='Breakdown of how many nuts of any content type and code ' 

2693 'sequence are available in selection, ``counts[kind][codes]``.') 

2694 time_spans = Dict.T( 

2695 String.T(), Tuple.T(content_t=Timestamp.T()), 

2696 help='Time spans by content type.') 

2697 sources = List.T( 

2698 String.T(), 

2699 help='Descriptions of attached sources.') 

2700 operators = List.T( 

2701 String.T(), 

2702 help='Descriptions of attached operators.') 

2703 

2704 def __str__(self): 

2705 kind_counts = dict( 

2706 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2707 

2708 scodes = model.codes_to_str_abbreviated(self.codes) 

2709 

2710 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2711 ' ' + s for s in self.sources) 

2712 

2713 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2714 ' ' + s for s in self.operators) 

2715 

2716 def stime(t): 

2717 return util.tts(t) if t is not None and t not in ( 

2718 model.g_tmin, model.g_tmax) else '<none>' 

2719 

2720 def stable(rows): 

2721 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2722 return '\n'.join( 

2723 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2724 for row in rows) 

2725 

2726 def indent(s): 

2727 return '\n'.join(' '+line for line in s.splitlines()) 

2728 

2729 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2730 kind + ':', 

2731 str(kind_counts[kind]), 

2732 stime(self.time_spans[kind][0]), 

2733 '-', 

2734 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2735 

2736 s = ''' 

2737Number of files: %i 

2738Total size of known files: %s 

2739Number of index nuts: %i 

2740Available content kinds: %s 

2741Available codes: %s 

2742Sources: %s 

2743Operators: %s''' % ( 

2744 self.nfiles, 

2745 util.human_bytesize(self.total_size), 

2746 self.nnuts, 

2747 stspans, scodes, ssources, soperators) 

2748 

2749 return s.lstrip() 

2750 

2751 

2752__all__ = [ 

2753 'Squirrel', 

2754 'SquirrelStats', 

2755]