1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

25 codes_patterns_for_kind 

26from .client import fdsn, catalog 

27from .selection import Selection, filldocs 

28from .database import abspath 

29from . import client, environment, error 

30 

31logger = logging.getLogger('psq.base') 

32 

33guts_prefix = 'squirrel' 

34 

35 

36def make_task(*args): 

37 return progress.task(*args, logger=logger) 

38 

39 

40def lpick(condition, seq): 

41 ft = [], [] 

42 for ele in seq: 

43 ft[int(bool(condition(ele)))].append(ele) 

44 

45 return ft 

46 

47 

48def blocks(tmin, tmax, deltat, nsamples_block=100000): 

49 tblock = util.to_time_float(deltat * nsamples_block) 

50 iblock_min = int(math.floor(tmin / tblock)) 

51 iblock_max = int(math.ceil(tmax / tblock)) 

52 for iblock in range(iblock_min, iblock_max): 

53 yield iblock * tblock, (iblock+1) * tblock 

54 

55 

56def gaps(avail, tmin, tmax): 

57 assert tmin < tmax 

58 

59 data = [(tmax, 1), (tmin, -1)] 

60 for (tmin_a, tmax_a) in avail: 

61 assert tmin_a < tmax_a 

62 data.append((tmin_a, 1)) 

63 data.append((tmax_a, -1)) 

64 

65 data.sort() 

66 s = 1 

67 gaps = [] 

68 tmin_g = None 

69 for t, x in data: 

70 if s == 1 and x == -1: 

71 tmin_g = t 

72 elif s == 0 and x == 1 and tmin_g is not None: 

73 tmax_g = t 

74 if tmin_g != tmax_g: 

75 gaps.append((tmin_g, tmax_g)) 

76 

77 s += x 

78 

79 return gaps 

80 

81 

82def order_key(order): 

83 return (order.codes, order.tmin, order.tmax) 

84 

85 

86class Batch(object): 

87 ''' 

88 Batch of waveforms from window-wise data extraction. 

89 

90 Encapsulates state and results yielded for each window in window-wise 

91 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

92 

93 *Attributes:* 

94 

95 .. py:attribute:: tmin 

96 

97 Start of this time window. 

98 

99 .. py:attribute:: tmax 

100 

101 End of this time window. 

102 

103 .. py:attribute:: i 

104 

105 Index of this time window in sequence. 

106 

107 .. py:attribute:: n 

108 

109 Total number of time windows in sequence. 

110 

111 .. py:attribute:: traces 

112 

113 Extracted waveforms for this time window. 

114 ''' 

115 

116 def __init__(self, tmin, tmax, i, n, traces): 

117 self.tmin = tmin 

118 self.tmax = tmax 

119 self.i = i 

120 self.n = n 

121 self.traces = traces 

122 

123 

124class Squirrel(Selection): 

125 ''' 

126 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

127 

128 :param env: 

129 Squirrel environment instance or directory path to use as starting 

130 point for its detection. By default, the current directory is used as 

131 starting point. When searching for a usable environment the directory 

132 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

133 directory is used if it exists, otherwise the parent directories are 

134 search upwards for the existence of such a directory. If no such 

135 directory is found, the user's global Squirrel environment 

136 ``'$HOME/.pyrocko/squirrel'`` is used. 

137 :type env: 

138 :py:class:`~pyrocko.squirrel.environment.Environment` or 

139 :py:class:`str` 

140 

141 :param database: 

142 Database instance or path to database. By default the 

143 database found in the detected Squirrel environment is used. 

144 :type database: 

145 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

146 

147 :param cache_path: 

148 Directory path to use for data caching. By default, the ``'cache'`` 

149 directory in the detected Squirrel environment is used. 

150 :type cache_path: 

151 :py:class:`str` 

152 

153 :param persistent: 

154 If given a name, create a persistent selection. 

155 :type persistent: 

156 :py:class:`str` 

157 

158 This is the central class of the Squirrel framework. It provides a unified 

159 interface to query and access seismic waveforms, station meta-data and 

160 event information from local file collections and remote data sources. For 

161 prompt responses, a profound database setup is used under the hood. To 

162 speed up assemblage of ad-hoc data selections, files are indexed on first 

163 use and the extracted meta-data is remembered in the database for 

164 subsequent accesses. Bulk data is lazily loaded from disk and remote 

165 sources, just when requested. Once loaded, data is cached in memory to 

166 expedite typical access patterns. Files and data sources can be dynamically 

167 added to and removed from the Squirrel selection at runtime. 

168 

169 Queries are restricted to the contents of the files currently added to the 

170 Squirrel selection (usually a subset of the file meta-information 

171 collection in the database). This list of files is referred to here as the 

172 "selection". By default, temporary tables are created in the attached 

173 database to hold the names of the files in the selection as well as various 

174 indices and counters. These tables are only visible inside the application 

175 which created them and are deleted when the database connection is closed 

176 or the application exits. To create a selection which is not deleted at 

177 exit, supply a name to the ``persistent`` argument of the Squirrel 

178 constructor. Persistent selections are shared among applications using the 

179 same database. 

180 

181 **Method summary** 

182 

183 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

184 :py:class:`~pyrocko.squirrel.selection.Selection`. 

185 

186 .. autosummary:: 

187 

188 ~Squirrel.add 

189 ~Squirrel.add_source 

190 ~Squirrel.add_fdsn 

191 ~Squirrel.add_catalog 

192 ~Squirrel.add_dataset 

193 ~Squirrel.add_virtual 

194 ~Squirrel.update 

195 ~Squirrel.update_waveform_promises 

196 ~Squirrel.advance_accessor 

197 ~Squirrel.clear_accessor 

198 ~Squirrel.reload 

199 ~pyrocko.squirrel.selection.Selection.iter_paths 

200 ~Squirrel.iter_nuts 

201 ~Squirrel.iter_kinds 

202 ~Squirrel.iter_deltats 

203 ~Squirrel.iter_codes 

204 ~pyrocko.squirrel.selection.Selection.get_paths 

205 ~Squirrel.get_nuts 

206 ~Squirrel.get_kinds 

207 ~Squirrel.get_deltats 

208 ~Squirrel.get_codes 

209 ~Squirrel.get_counts 

210 ~Squirrel.get_time_span 

211 ~Squirrel.get_deltat_span 

212 ~Squirrel.get_nfiles 

213 ~Squirrel.get_nnuts 

214 ~Squirrel.get_total_size 

215 ~Squirrel.get_stats 

216 ~Squirrel.get_content 

217 ~Squirrel.get_stations 

218 ~Squirrel.get_channels 

219 ~Squirrel.get_responses 

220 ~Squirrel.get_events 

221 ~Squirrel.get_waveform_nuts 

222 ~Squirrel.get_waveforms 

223 ~Squirrel.chopper_waveforms 

224 ~Squirrel.get_coverage 

225 ~Squirrel.pile 

226 ~Squirrel.snuffle 

227 ~Squirrel.glob_codes 

228 ~pyrocko.squirrel.selection.Selection.get_database 

229 ~Squirrel.print_tables 

230 ''' 

231 

232 def __init__( 

233 self, env=None, database=None, cache_path=None, persistent=None): 

234 

235 if not isinstance(env, environment.Environment): 

236 env = environment.get_environment(env) 

237 

238 if database is None: 

239 database = env.expand_path(env.database_path) 

240 

241 if cache_path is None: 

242 cache_path = env.expand_path(env.cache_path) 

243 

244 if persistent is None: 

245 persistent = env.persistent 

246 

247 Selection.__init__( 

248 self, database=database, persistent=persistent) 

249 

250 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

251 

252 self._content_caches = { 

253 'waveform': cache.ContentCache(), 

254 'default': cache.ContentCache()} 

255 

256 self._cache_path = cache_path 

257 

258 self._sources = [] 

259 self._operators = [] 

260 self._operator_registry = {} 

261 

262 self._pile = None 

263 self._n_choppers_active = 0 

264 

265 self._names.update({ 

266 'nuts': self.name + '_nuts', 

267 'kind_codes_count': self.name + '_kind_codes_count', 

268 'coverage': self.name + '_coverage'}) 

269 

270 with self.transaction('create tables') as cursor: 

271 self._create_tables_squirrel(cursor) 

272 

273 def _create_tables_squirrel(self, cursor): 

274 

275 cursor.execute(self._register_table(self._sql( 

276 ''' 

277 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

278 nut_id integer PRIMARY KEY, 

279 file_id integer, 

280 file_segment integer, 

281 file_element integer, 

282 kind_id integer, 

283 kind_codes_id integer, 

284 tmin_seconds integer, 

285 tmin_offset integer, 

286 tmax_seconds integer, 

287 tmax_offset integer, 

288 kscale integer) 

289 '''))) 

290 

291 cursor.execute(self._register_table(self._sql( 

292 ''' 

293 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

294 kind_codes_id integer PRIMARY KEY, 

295 count integer) 

296 '''))) 

297 

298 cursor.execute(self._sql( 

299 ''' 

300 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

301 ON %(nuts)s (file_id, file_segment, file_element) 

302 ''')) 

303 

304 cursor.execute(self._sql( 

305 ''' 

306 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

307 ON %(nuts)s (file_id) 

308 ''')) 

309 

310 cursor.execute(self._sql( 

311 ''' 

312 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

313 ON %(nuts)s (kind_id, tmin_seconds) 

314 ''')) 

315 

316 cursor.execute(self._sql( 

317 ''' 

318 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

319 ON %(nuts)s (kind_id, tmax_seconds) 

320 ''')) 

321 

322 cursor.execute(self._sql( 

323 ''' 

324 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

325 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

326 ''')) 

327 

328 cursor.execute(self._sql( 

329 ''' 

330 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

331 BEFORE DELETE ON main.files FOR EACH ROW 

332 BEGIN 

333 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

334 END 

335 ''')) 

336 

337 # trigger only on size to make silent update of mtime possible 

338 cursor.execute(self._sql( 

339 ''' 

340 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

341 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

342 BEGIN 

343 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

344 END 

345 ''')) 

346 

347 cursor.execute(self._sql( 

348 ''' 

349 CREATE TRIGGER IF NOT EXISTS 

350 %(db)s.%(file_states)s_delete_files 

351 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

352 BEGIN 

353 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

354 END 

355 ''')) 

356 

357 cursor.execute(self._sql( 

358 ''' 

359 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

360 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

361 BEGIN 

362 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

363 (new.kind_codes_id, 0); 

364 UPDATE %(kind_codes_count)s 

365 SET count = count + 1 

366 WHERE new.kind_codes_id 

367 == %(kind_codes_count)s.kind_codes_id; 

368 END 

369 ''')) 

370 

371 cursor.execute(self._sql( 

372 ''' 

373 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

374 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

375 BEGIN 

376 UPDATE %(kind_codes_count)s 

377 SET count = count - 1 

378 WHERE old.kind_codes_id 

379 == %(kind_codes_count)s.kind_codes_id; 

380 END 

381 ''')) 

382 

383 cursor.execute(self._register_table(self._sql( 

384 ''' 

385 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

386 kind_codes_id integer, 

387 time_seconds integer, 

388 time_offset integer, 

389 step integer) 

390 '''))) 

391 

392 cursor.execute(self._sql( 

393 ''' 

394 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

395 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

396 ''')) 

397 

398 cursor.execute(self._sql( 

399 ''' 

400 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

401 AFTER INSERT ON %(nuts)s FOR EACH ROW 

402 BEGIN 

403 INSERT OR IGNORE INTO %(coverage)s VALUES 

404 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

405 ; 

406 UPDATE %(coverage)s 

407 SET step = step + 1 

408 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

409 AND new.tmin_seconds == %(coverage)s.time_seconds 

410 AND new.tmin_offset == %(coverage)s.time_offset 

411 ; 

412 INSERT OR IGNORE INTO %(coverage)s VALUES 

413 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

414 ; 

415 UPDATE %(coverage)s 

416 SET step = step - 1 

417 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

418 AND new.tmax_seconds == %(coverage)s.time_seconds 

419 AND new.tmax_offset == %(coverage)s.time_offset 

420 ; 

421 DELETE FROM %(coverage)s 

422 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

423 AND new.tmin_seconds == %(coverage)s.time_seconds 

424 AND new.tmin_offset == %(coverage)s.time_offset 

425 AND step == 0 

426 ; 

427 DELETE FROM %(coverage)s 

428 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

429 AND new.tmax_seconds == %(coverage)s.time_seconds 

430 AND new.tmax_offset == %(coverage)s.time_offset 

431 AND step == 0 

432 ; 

433 END 

434 ''')) 

435 

436 cursor.execute(self._sql( 

437 ''' 

438 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

439 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

440 BEGIN 

441 INSERT OR IGNORE INTO %(coverage)s VALUES 

442 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

443 ; 

444 UPDATE %(coverage)s 

445 SET step = step - 1 

446 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

447 AND old.tmin_seconds == %(coverage)s.time_seconds 

448 AND old.tmin_offset == %(coverage)s.time_offset 

449 ; 

450 INSERT OR IGNORE INTO %(coverage)s VALUES 

451 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

452 ; 

453 UPDATE %(coverage)s 

454 SET step = step + 1 

455 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

456 AND old.tmax_seconds == %(coverage)s.time_seconds 

457 AND old.tmax_offset == %(coverage)s.time_offset 

458 ; 

459 DELETE FROM %(coverage)s 

460 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

461 AND old.tmin_seconds == %(coverage)s.time_seconds 

462 AND old.tmin_offset == %(coverage)s.time_offset 

463 AND step == 0 

464 ; 

465 DELETE FROM %(coverage)s 

466 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

467 AND old.tmax_seconds == %(coverage)s.time_seconds 

468 AND old.tmax_offset == %(coverage)s.time_offset 

469 AND step == 0 

470 ; 

471 END 

472 ''')) 

473 

474 def _delete(self): 

475 '''Delete database tables associated with this Squirrel.''' 

476 

477 with self.transaction('delete tables') as cursor: 

478 for s in ''' 

479 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

480 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

481 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

482 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

483 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

484 DROP TABLE %(db)s.%(nuts)s; 

485 DROP TABLE %(db)s.%(kind_codes_count)s; 

486 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

487 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

488 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

489 '''.strip().splitlines(): 

490 

491 cursor.execute(self._sql(s)) 

492 

493 Selection._delete(self) 

494 

495 @filldocs 

496 def add(self, 

497 paths, 

498 kinds=None, 

499 format='detect', 

500 include=None, 

501 exclude=None, 

502 check=True): 

503 

504 ''' 

505 Add files to the selection. 

506 

507 :param paths: 

508 Iterator yielding paths to files or directories to be added to the 

509 selection. Recurses into directories. If given a ``str``, it 

510 is treated as a single path to be added. 

511 :type paths: 

512 :py:class:`list` of :py:class:`str` 

513 

514 :param kinds: 

515 Content types to be made available through the Squirrel selection. 

516 By default, all known content types are accepted. 

517 :type kinds: 

518 :py:class:`list` of :py:class:`str` 

519 

520 :param format: 

521 File format identifier or ``'detect'`` to enable auto-detection 

522 (available: %(file_formats)s). 

523 :type format: 

524 str 

525 

526 :param include: 

527 If not ``None``, files are only included if their paths match the 

528 given regular expression pattern. 

529 :type format: 

530 str 

531 

532 :param exclude: 

533 If not ``None``, files are only included if their paths do not 

534 match the given regular expression pattern. 

535 :type format: 

536 str 

537 

538 :param check: 

539 If ``True``, all file modification times are checked to see if 

540 cached information has to be updated (slow). If ``False``, only 

541 previously unknown files are indexed and cached information is used 

542 for known files, regardless of file state (fast, corrresponds to 

543 Squirrel's ``--optimistic`` mode). File deletions will go 

544 undetected in the latter case. 

545 :type check: 

546 bool 

547 

548 :Complexity: 

549 O(log N) 

550 ''' 

551 

552 if isinstance(kinds, str): 

553 kinds = (kinds,) 

554 

555 if isinstance(paths, str): 

556 paths = [paths] 

557 

558 kind_mask = model.to_kind_mask(kinds) 

559 

560 with progress.view(): 

561 Selection.add( 

562 self, util.iter_select_files( 

563 paths, 

564 show_progress=False, 

565 include=include, 

566 exclude=exclude, 

567 pass_through=lambda path: path.startswith('virtual:') 

568 ), kind_mask, format) 

569 

570 self._load(check) 

571 self._update_nuts() 

572 

573 def reload(self): 

574 ''' 

575 Check for modifications and reindex modified files. 

576 

577 Based on file modification times. 

578 ''' 

579 

580 self._set_file_states_force_check() 

581 self._load(check=True) 

582 self._update_nuts() 

583 

584 def add_virtual(self, nuts, virtual_paths=None): 

585 ''' 

586 Add content which is not backed by files. 

587 

588 :param nuts: 

589 Content pieces to be added. 

590 :type nuts: 

591 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

592 

593 :param virtual_paths: 

594 List of virtual paths to prevent creating a temporary list of the 

595 nuts while aggregating the file paths for the selection. 

596 :type virtual_paths: 

597 :py:class:`list` of :py:class:`str` 

598 

599 Stores to the main database and the selection. 

600 ''' 

601 

602 if isinstance(virtual_paths, str): 

603 virtual_paths = [virtual_paths] 

604 

605 if virtual_paths is None: 

606 if not isinstance(nuts, list): 

607 nuts = list(nuts) 

608 virtual_paths = set(nut.file_path for nut in nuts) 

609 

610 Selection.add(self, virtual_paths) 

611 self.get_database().dig(nuts) 

612 self._update_nuts() 

613 

614 def add_volatile(self, nuts): 

615 if not isinstance(nuts, list): 

616 nuts = list(nuts) 

617 

618 paths = list(set(nut.file_path for nut in nuts)) 

619 io.backends.virtual.add_nuts(nuts) 

620 self.add_virtual(nuts, paths) 

621 self._volatile_paths.extend(paths) 

622 

623 def add_volatile_waveforms(self, traces): 

624 ''' 

625 Add in-memory waveforms which will be removed when the app closes. 

626 ''' 

627 

628 name = model.random_name() 

629 

630 path = 'virtual:volatile:%s' % name 

631 

632 nuts = [] 

633 for itr, tr in enumerate(traces): 

634 assert tr.tmin <= tr.tmax 

635 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

636 tmax_seconds, tmax_offset = model.tsplit( 

637 tr.tmin + tr.data_len()*tr.deltat) 

638 

639 nuts.append(model.Nut( 

640 file_path=path, 

641 file_format='virtual', 

642 file_segment=itr, 

643 file_element=0, 

644 file_mtime=0, 

645 codes=tr.codes, 

646 tmin_seconds=tmin_seconds, 

647 tmin_offset=tmin_offset, 

648 tmax_seconds=tmax_seconds, 

649 tmax_offset=tmax_offset, 

650 deltat=tr.deltat, 

651 kind_id=to_kind_id('waveform'), 

652 content=tr)) 

653 

654 self.add_volatile(nuts) 

655 return path 

656 

657 def _load(self, check): 

658 for _ in io.iload( 

659 self, 

660 content=[], 

661 skip_unchanged=True, 

662 check=check): 

663 pass 

664 

665 def _update_nuts(self, transaction=None): 

666 transaction = transaction or self.transaction('update nuts') 

667 with make_task('Aggregating selection') as task, \ 

668 transaction as cursor: 

669 

670 self._conn.set_progress_handler(task.update, 100000) 

671 nrows = cursor.execute(self._sql( 

672 ''' 

673 INSERT INTO %(db)s.%(nuts)s 

674 SELECT NULL, 

675 nuts.file_id, nuts.file_segment, nuts.file_element, 

676 nuts.kind_id, nuts.kind_codes_id, 

677 nuts.tmin_seconds, nuts.tmin_offset, 

678 nuts.tmax_seconds, nuts.tmax_offset, 

679 nuts.kscale 

680 FROM %(db)s.%(file_states)s 

681 INNER JOIN nuts 

682 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

683 INNER JOIN kind_codes 

684 ON nuts.kind_codes_id == 

685 kind_codes.kind_codes_id 

686 WHERE %(db)s.%(file_states)s.file_state != 2 

687 AND (((1 << kind_codes.kind_id) 

688 & %(db)s.%(file_states)s.kind_mask) != 0) 

689 ''')).rowcount 

690 

691 task.update(nrows) 

692 self._set_file_states_known(transaction) 

693 self._conn.set_progress_handler(None, 0) 

694 

695 def add_source(self, source, check=True): 

696 ''' 

697 Add remote resource. 

698 

699 :param source: 

700 Remote data access client instance. 

701 :type source: 

702 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

703 ''' 

704 

705 self._sources.append(source) 

706 source.setup(self, check=check) 

707 

708 def add_fdsn(self, *args, **kwargs): 

709 ''' 

710 Add FDSN site for transparent remote data access. 

711 

712 Arguments are passed to 

713 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

714 ''' 

715 

716 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

717 

718 def add_catalog(self, *args, **kwargs): 

719 ''' 

720 Add online catalog for transparent event data access. 

721 

722 Arguments are passed to 

723 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

724 ''' 

725 

726 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

727 

728 def add_dataset(self, ds, check=True, warn_persistent=True): 

729 ''' 

730 Read dataset description from file and add its contents. 

731 

732 :param ds: 

733 Path to dataset description file or dataset description object 

734 . See :py:mod:`~pyrocko.squirrel.dataset`. 

735 :type ds: 

736 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

737 

738 :param check: 

739 If ``True``, all file modification times are checked to see if 

740 cached information has to be updated (slow). If ``False``, only 

741 previously unknown files are indexed and cached information is used 

742 for known files, regardless of file state (fast, corrresponds to 

743 Squirrel's ``--optimistic`` mode). File deletions will go 

744 undetected in the latter case. 

745 :type check: 

746 bool 

747 ''' 

748 if isinstance(ds, str): 

749 ds = dataset.read_dataset(ds) 

750 path = ds 

751 else: 

752 path = None 

753 

754 if warn_persistent and ds.persistent and ( 

755 not self._persistent or (self._persistent != ds.persistent)): 

756 

757 logger.warning( 

758 'Dataset `persistent` flag ignored. Can not be set on already ' 

759 'existing Squirrel instance.%s' % ( 

760 ' Dataset: %s' % path if path else '')) 

761 

762 ds.setup(self, check=check) 

763 

764 def _get_selection_args( 

765 self, kind_id, 

766 obj=None, tmin=None, tmax=None, time=None, codes=None): 

767 

768 if codes is not None: 

769 codes = codes_patterns_for_kind(kind_id, codes) 

770 

771 if time is not None: 

772 tmin = time 

773 tmax = time 

774 

775 if obj is not None: 

776 tmin = tmin if tmin is not None else obj.tmin 

777 tmax = tmax if tmax is not None else obj.tmax 

778 codes = codes if codes is not None else codes_patterns_for_kind( 

779 kind_id, obj.codes) 

780 

781 return tmin, tmax, codes 

782 

783 def _get_selection_args_str(self, *args, **kwargs): 

784 

785 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

786 return 'tmin: %s, tmax: %s, codes: %s' % ( 

787 util.time_to_str(tmin) if tmin is not None else 'none', 

788 util.time_to_str(tmax) if tmin is not None else 'none', 

789 ','.join(str(entry) for entry in codes)) 

790 

791 def _selection_args_to_kwargs( 

792 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

793 

794 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

795 

796 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

797 

798 tmin_seconds, tmin_offset = model.tsplit(tmin) 

799 tmax_seconds, tmax_offset = model.tsplit(tmax) 

800 if naiv: 

801 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

802 args.append(tmax_seconds) 

803 else: 

804 tscale_edges = model.tscale_edges 

805 tmin_cond = [] 

806 for kscale in range(tscale_edges.size + 1): 

807 if kscale != tscale_edges.size: 

808 tscale = int(tscale_edges[kscale]) 

809 tmin_cond.append(''' 

810 (%(db)s.%(nuts)s.kind_id = ? 

811 AND %(db)s.%(nuts)s.kscale == ? 

812 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

813 ''') 

814 args.extend( 

815 (to_kind_id(kind), kscale, 

816 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

817 

818 else: 

819 tmin_cond.append(''' 

820 (%(db)s.%(nuts)s.kind_id == ? 

821 AND %(db)s.%(nuts)s.kscale == ? 

822 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

823 ''') 

824 

825 args.extend( 

826 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

827 if tmin_cond: 

828 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

829 

830 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

831 args.append(tmin_seconds) 

832 

833 def iter_nuts( 

834 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

835 kind_codes_ids=None, path=None): 

836 

837 ''' 

838 Iterate over content entities matching given constraints. 

839 

840 :param kind: 

841 Content kind (or kinds) to extract. 

842 :type kind: 

843 :py:class:`str`, :py:class:`list` of :py:class:`str` 

844 

845 :param tmin: 

846 Start time of query interval. 

847 :type tmin: 

848 timestamp 

849 

850 :param tmax: 

851 End time of query interval. 

852 :type tmax: 

853 timestamp 

854 

855 :param codes: 

856 List of code patterns to query. 

857 :type codes: 

858 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

859 objects appropriate for the queried content type, or anything which 

860 can be converted to such objects. 

861 

862 :param naiv: 

863 Bypass time span lookup through indices (slow, for testing). 

864 :type naiv: 

865 :py:class:`bool` 

866 

867 :param kind_codes_ids: 

868 Kind-codes IDs of contents to be retrieved (internal use). 

869 :type kind_codes_ids: 

870 :py:class:`list` of :py:class:`int` 

871 

872 :yields: 

873 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

874 intersecting content. 

875 

876 :complexity: 

877 O(log N) for the time selection part due to heavy use of database 

878 indices. 

879 

880 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

881 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

882 closed-interval so that content intersecting with the time instant ``t 

883 = tmin = tmax`` is returned (otherwise nothing would be returned as 

884 ``[t, t)`` never matches anything). 

885 

886 Time spans of content entities to be matched are also treated as half 

887 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

888 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

889 modified to closed-interval when the content time span is an empty 

890 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

891 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

892 ''' 

893 

894 if not isinstance(kind, str): 

895 if kind is None: 

896 kind = model.g_content_kinds 

897 for kind_ in kind: 

898 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

899 yield nut 

900 

901 return 

902 

903 kind_id = to_kind_id(kind) 

904 

905 cond = [] 

906 args = [] 

907 if tmin is not None or tmax is not None: 

908 assert kind is not None 

909 if tmin is None: 

910 tmin = self.get_time_span()[0] 

911 if tmax is None: 

912 tmax = self.get_time_span()[1] + 1.0 

913 

914 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

915 

916 cond.append('kind_codes.kind_id == ?') 

917 args.append(kind_id) 

918 

919 if codes is not None: 

920 pats = codes_patterns_for_kind(kind_id, codes) 

921 

922 if pats: 

923 # could optimize this by using IN for non-patterns 

924 cond.append( 

925 ' ( %s ) ' % ' OR '.join( 

926 ('kind_codes.codes GLOB ?',) * len(pats))) 

927 args.extend(pat.safe_str for pat in pats) 

928 

929 if kind_codes_ids is not None: 

930 cond.append( 

931 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

932 '?'*len(kind_codes_ids))) 

933 

934 args.extend(kind_codes_ids) 

935 

936 db = self.get_database() 

937 if path is not None: 

938 cond.append('files.path == ?') 

939 args.append(db.relpath(abspath(path))) 

940 

941 sql = (''' 

942 SELECT 

943 files.path, 

944 files.format, 

945 files.mtime, 

946 files.size, 

947 %(db)s.%(nuts)s.file_segment, 

948 %(db)s.%(nuts)s.file_element, 

949 kind_codes.kind_id, 

950 kind_codes.codes, 

951 %(db)s.%(nuts)s.tmin_seconds, 

952 %(db)s.%(nuts)s.tmin_offset, 

953 %(db)s.%(nuts)s.tmax_seconds, 

954 %(db)s.%(nuts)s.tmax_offset, 

955 kind_codes.deltat 

956 FROM files 

957 INNER JOIN %(db)s.%(nuts)s 

958 ON files.file_id == %(db)s.%(nuts)s.file_id 

959 INNER JOIN kind_codes 

960 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

961 ''') 

962 

963 if cond: 

964 sql += ''' WHERE ''' + ' AND '.join(cond) 

965 

966 sql = self._sql(sql) 

967 if tmin is None and tmax is None: 

968 for row in self._conn.execute(sql, args): 

969 row = (db.abspath(row[0]),) + row[1:] 

970 nut = model.Nut(values_nocheck=row) 

971 yield nut 

972 else: 

973 assert tmin is not None and tmax is not None 

974 if tmin == tmax: 

975 for row in self._conn.execute(sql, args): 

976 row = (db.abspath(row[0]),) + row[1:] 

977 nut = model.Nut(values_nocheck=row) 

978 if (nut.tmin <= tmin < nut.tmax) \ 

979 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

980 

981 yield nut 

982 else: 

983 for row in self._conn.execute(sql, args): 

984 row = (db.abspath(row[0]),) + row[1:] 

985 nut = model.Nut(values_nocheck=row) 

986 if (tmin < nut.tmax and nut.tmin < tmax) \ 

987 or (nut.tmin == nut.tmax 

988 and tmin <= nut.tmin < tmax): 

989 

990 yield nut 

991 

992 def get_nuts(self, *args, **kwargs): 

993 ''' 

994 Get content entities matching given constraints. 

995 

996 Like :py:meth:`iter_nuts` but returns results as a list. 

997 ''' 

998 

999 return list(self.iter_nuts(*args, **kwargs)) 

1000 

1001 def _split_nuts( 

1002 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1003 

1004 kind_id = to_kind_id(kind) 

1005 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1006 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1007 

1008 names_main_nuts = dict(self._names) 

1009 names_main_nuts.update(db='main', nuts='nuts') 

1010 

1011 db = self.get_database() 

1012 

1013 def main_nuts(s): 

1014 return s % names_main_nuts 

1015 

1016 with self.transaction('split nuts') as cursor: 

1017 # modify selection and main 

1018 for sql_subst in [ 

1019 self._sql, main_nuts]: 

1020 

1021 cond = [] 

1022 args = [] 

1023 

1024 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1025 

1026 if codes is not None: 

1027 pats = codes_patterns_for_kind(kind_id, codes) 

1028 if pats: 

1029 cond.append( 

1030 ' ( %s ) ' % ' OR '.join( 

1031 ('kind_codes.codes GLOB ?',) * len(pats))) 

1032 args.extend(pat.safe_str for pat in pats) 

1033 

1034 if path is not None: 

1035 cond.append('files.path == ?') 

1036 args.append(db.relpath(abspath(path))) 

1037 

1038 sql = sql_subst(''' 

1039 SELECT 

1040 %(db)s.%(nuts)s.nut_id, 

1041 %(db)s.%(nuts)s.tmin_seconds, 

1042 %(db)s.%(nuts)s.tmin_offset, 

1043 %(db)s.%(nuts)s.tmax_seconds, 

1044 %(db)s.%(nuts)s.tmax_offset, 

1045 kind_codes.deltat 

1046 FROM files 

1047 INNER JOIN %(db)s.%(nuts)s 

1048 ON files.file_id == %(db)s.%(nuts)s.file_id 

1049 INNER JOIN kind_codes 

1050 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1051 WHERE ''' + ' AND '.join(cond)) # noqa 

1052 

1053 insert = [] 

1054 delete = [] 

1055 for row in cursor.execute(sql, args): 

1056 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1057 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1058 

1059 nut_tmin = model.tjoin( 

1060 nut_tmin_seconds, nut_tmin_offset) 

1061 nut_tmax = model.tjoin( 

1062 nut_tmax_seconds, nut_tmax_offset) 

1063 

1064 if nut_tmin < tmax and tmin < nut_tmax: 

1065 if nut_tmin < tmin: 

1066 insert.append(( 

1067 nut_tmin_seconds, nut_tmin_offset, 

1068 tmin_seconds, tmin_offset, 

1069 model.tscale_to_kscale( 

1070 tmin_seconds - nut_tmin_seconds), 

1071 nut_id)) 

1072 

1073 if tmax < nut_tmax: 

1074 insert.append(( 

1075 tmax_seconds, tmax_offset, 

1076 nut_tmax_seconds, nut_tmax_offset, 

1077 model.tscale_to_kscale( 

1078 nut_tmax_seconds - tmax_seconds), 

1079 nut_id)) 

1080 

1081 delete.append((nut_id,)) 

1082 

1083 sql_add = ''' 

1084 INSERT INTO %(db)s.%(nuts)s ( 

1085 file_id, file_segment, file_element, kind_id, 

1086 kind_codes_id, tmin_seconds, tmin_offset, 

1087 tmax_seconds, tmax_offset, kscale ) 

1088 SELECT 

1089 file_id, file_segment, file_element, 

1090 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1091 FROM %(db)s.%(nuts)s 

1092 WHERE nut_id == ? 

1093 ''' 

1094 cursor.executemany(sql_subst(sql_add), insert) 

1095 

1096 sql_delete = ''' 

1097 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1098 ''' 

1099 cursor.executemany(sql_subst(sql_delete), delete) 

1100 

1101 def get_time_span(self, kinds=None): 

1102 ''' 

1103 Get time interval over all content in selection. 

1104 

1105 :param kinds: 

1106 If not ``None``, restrict query to given content kinds. 

1107 :type kind: 

1108 list of str 

1109 

1110 :complexity: 

1111 O(1), independent of the number of nuts. 

1112 

1113 :returns: 

1114 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1115 ''' 

1116 

1117 sql_min = self._sql(''' 

1118 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1119 FROM %(db)s.%(nuts)s 

1120 WHERE kind_id == ? 

1121 AND tmin_seconds == ( 

1122 SELECT MIN(tmin_seconds) 

1123 FROM %(db)s.%(nuts)s 

1124 WHERE kind_id == ?) 

1125 ''') 

1126 

1127 sql_max = self._sql(''' 

1128 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1129 FROM %(db)s.%(nuts)s 

1130 WHERE kind_id == ? 

1131 AND tmax_seconds == ( 

1132 SELECT MAX(tmax_seconds) 

1133 FROM %(db)s.%(nuts)s 

1134 WHERE kind_id == ?) 

1135 ''') 

1136 

1137 gtmin = None 

1138 gtmax = None 

1139 

1140 if isinstance(kinds, str): 

1141 kinds = [kinds] 

1142 

1143 if kinds is None: 

1144 kind_ids = model.g_content_kind_ids 

1145 else: 

1146 kind_ids = model.to_kind_ids(kinds) 

1147 

1148 for kind_id in kind_ids: 

1149 for tmin_seconds, tmin_offset in self._conn.execute( 

1150 sql_min, (kind_id, kind_id)): 

1151 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1152 if tmin is not None and (gtmin is None or tmin < gtmin): 

1153 gtmin = tmin 

1154 

1155 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1156 sql_max, (kind_id, kind_id)): 

1157 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1158 if tmax is not None and (gtmax is None or tmax > gtmax): 

1159 gtmax = tmax 

1160 

1161 return gtmin, gtmax 

1162 

1163 def has(self, kinds): 

1164 ''' 

1165 Check availability of given content kinds. 

1166 

1167 :param kinds: 

1168 Content kinds to query. 

1169 :type kind: 

1170 list of str 

1171 

1172 :returns: 

1173 ``True`` if any of the queried content kinds is available 

1174 in the selection. 

1175 ''' 

1176 self_tmin, self_tmax = self.get_time_span(kinds) 

1177 

1178 return None not in (self_tmin, self_tmax) 

1179 

1180 def get_deltat_span(self, kind): 

1181 ''' 

1182 Get min and max sampling interval of all content of given kind. 

1183 

1184 :param kind: 

1185 Content kind 

1186 :type kind: 

1187 str 

1188 

1189 :returns: ``(deltat_min, deltat_max)`` 

1190 ''' 

1191 

1192 deltats = [ 

1193 deltat for deltat in self.get_deltats(kind) 

1194 if deltat is not None] 

1195 

1196 if deltats: 

1197 return min(deltats), max(deltats) 

1198 else: 

1199 return None, None 

1200 

1201 def iter_kinds(self, codes=None): 

1202 ''' 

1203 Iterate over content types available in selection. 

1204 

1205 :param codes: 

1206 If given, get kinds only for selected codes identifier. 

1207 Only a single identifier may be given here and no pattern matching 

1208 is done, currently. 

1209 :type codes: 

1210 :py:class:`~pyrocko.squirrel.model.Codes` 

1211 

1212 :yields: 

1213 Available content kinds as :py:class:`str`. 

1214 

1215 :complexity: 

1216 O(1), independent of number of nuts. 

1217 ''' 

1218 

1219 return self._database._iter_kinds( 

1220 codes=codes, 

1221 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1222 

1223 def iter_deltats(self, kind=None): 

1224 ''' 

1225 Iterate over sampling intervals available in selection. 

1226 

1227 :param kind: 

1228 If given, get sampling intervals only for a given content type. 

1229 :type kind: 

1230 str 

1231 

1232 :yields: 

1233 :py:class:`float` values. 

1234 

1235 :complexity: 

1236 O(1), independent of number of nuts. 

1237 ''' 

1238 return self._database._iter_deltats( 

1239 kind=kind, 

1240 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1241 

1242 def iter_codes(self, kind=None): 

1243 ''' 

1244 Iterate over content identifier code sequences available in selection. 

1245 

1246 :param kind: 

1247 If given, get codes only for a given content type. 

1248 :type kind: 

1249 str 

1250 

1251 :yields: 

1252 :py:class:`tuple` of :py:class:`str` 

1253 

1254 :complexity: 

1255 O(1), independent of number of nuts. 

1256 ''' 

1257 return self._database._iter_codes( 

1258 kind=kind, 

1259 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1260 

1261 def _iter_codes_info(self, kind=None, codes=None): 

1262 ''' 

1263 Iterate over number of occurrences of any (kind, codes) combination. 

1264 

1265 :param kind: 

1266 If given, get counts only for selected content type. 

1267 :type kind: 

1268 str 

1269 

1270 :yields: 

1271 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1272 

1273 :complexity: 

1274 O(1), independent of number of nuts. 

1275 ''' 

1276 return self._database._iter_codes_info( 

1277 kind=kind, 

1278 codes=codes, 

1279 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1280 

1281 def get_kinds(self, codes=None): 

1282 ''' 

1283 Get content types available in selection. 

1284 

1285 :param codes: 

1286 If given, get kinds only for selected codes identifier. 

1287 Only a single identifier may be given here and no pattern matching 

1288 is done, currently. 

1289 :type codes: 

1290 :py:class:`~pyrocko.squirrel.model.Codes` 

1291 

1292 :returns: 

1293 Sorted list of available content types. 

1294 :rtype: 

1295 py:class:`list` of :py:class:`str` 

1296 

1297 :complexity: 

1298 O(1), independent of number of nuts. 

1299 

1300 ''' 

1301 return sorted(list(self.iter_kinds(codes=codes))) 

1302 

1303 def get_deltats(self, kind=None): 

1304 ''' 

1305 Get sampling intervals available in selection. 

1306 

1307 :param kind: 

1308 If given, get sampling intervals only for selected content type. 

1309 :type kind: 

1310 str 

1311 

1312 :complexity: 

1313 O(1), independent of number of nuts. 

1314 

1315 :returns: Sorted list of available sampling intervals. 

1316 ''' 

1317 return sorted(list(self.iter_deltats(kind=kind))) 

1318 

1319 def get_codes(self, kind=None): 

1320 ''' 

1321 Get identifier code sequences available in selection. 

1322 

1323 :param kind: 

1324 If given, get codes only for selected content type. 

1325 :type kind: 

1326 str 

1327 

1328 :complexity: 

1329 O(1), independent of number of nuts. 

1330 

1331 :returns: Sorted list of available codes as tuples of strings. 

1332 ''' 

1333 return sorted(list(self.iter_codes(kind=kind))) 

1334 

1335 def get_counts(self, kind=None): 

1336 ''' 

1337 Get number of occurrences of any (kind, codes) combination. 

1338 

1339 :param kind: 

1340 If given, get codes only for selected content type. 

1341 :type kind: 

1342 str 

1343 

1344 :complexity: 

1345 O(1), independent of number of nuts. 

1346 

1347 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1348 if kind is not ``None`` 

1349 ''' 

1350 d = {} 

1351 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1352 if kind_id not in d: 

1353 v = d[kind_id] = {} 

1354 else: 

1355 v = d[kind_id] 

1356 

1357 if codes not in v: 

1358 v[codes] = 0 

1359 

1360 v[codes] += count 

1361 

1362 if kind is not None: 

1363 return d[to_kind_id(kind)] 

1364 else: 

1365 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1366 

1367 def glob_codes(self, kind, codes): 

1368 ''' 

1369 Find codes matching given patterns. 

1370 

1371 :param kind: 

1372 Content kind to be queried. 

1373 :type kind: 

1374 str 

1375 

1376 :param codes: 

1377 List of code patterns to query. 

1378 :type codes: 

1379 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1380 objects appropriate for the queried content type, or anything which 

1381 can be converted to such objects. 

1382 

1383 :returns: 

1384 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1385 ''' 

1386 

1387 kind_id = to_kind_id(kind) 

1388 args = [kind_id] 

1389 pats = codes_patterns_for_kind(kind_id, codes) 

1390 

1391 if pats: 

1392 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1393 ('kind_codes.codes GLOB ?',) * len(pats)) 

1394 

1395 args.extend(pat.safe_str for pat in pats) 

1396 else: 

1397 codes_cond = '' 

1398 

1399 sql = self._sql(''' 

1400 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1401 WHERE 

1402 kind_id == ? ''' + codes_cond) 

1403 

1404 return list(map(list, self._conn.execute(sql, args))) 

1405 

1406 def update(self, constraint=None, **kwargs): 

1407 ''' 

1408 Update or partially update channel and event inventories. 

1409 

1410 :param constraint: 

1411 Selection of times or areas to be brought up to date. 

1412 :type constraint: 

1413 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1414 

1415 :param \\*\\*kwargs: 

1416 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1417 

1418 This function triggers all attached remote sources, to check for 

1419 updates in the meta-data. The sources will only submit queries when 

1420 their expiration date has passed, or if the selection spans into 

1421 previously unseen times or areas. 

1422 ''' 

1423 

1424 if constraint is None: 

1425 constraint = client.Constraint(**kwargs) 

1426 

1427 for source in self._sources: 

1428 source.update_channel_inventory(self, constraint) 

1429 source.update_event_inventory(self, constraint) 

1430 

1431 def update_waveform_promises(self, constraint=None, **kwargs): 

1432 ''' 

1433 Permit downloading of remote waveforms. 

1434 

1435 :param constraint: 

1436 Remote waveforms compatible with the given constraint are enabled 

1437 for download. 

1438 :type constraint: 

1439 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1440 

1441 :param \\*\\*kwargs: 

1442 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1443 

1444 Calling this method permits Squirrel to download waveforms from remote 

1445 sources when processing subsequent waveform requests. This works by 

1446 inserting so called waveform promises into the database. It will look 

1447 into the available channels for each remote source and create a promise 

1448 for each channel compatible with the given constraint. If the promise 

1449 then matches in a waveform request, Squirrel tries to download the 

1450 waveform. If the download is successful, the downloaded waveform is 

1451 added to the Squirrel and the promise is deleted. If the download 

1452 fails, the promise is kept if the reason of failure looks like being 

1453 temporary, e.g. because of a network failure. If the cause of failure 

1454 however seems to be permanent, the promise is deleted so that no 

1455 further attempts are made to download a waveform which might not be 

1456 available from that server at all. To force re-scheduling after a 

1457 permanent failure, call :py:meth:`update_waveform_promises` 

1458 yet another time. 

1459 ''' 

1460 

1461 if constraint is None: 

1462 constraint = client.Constraint(**kwargs) 

1463 

1464 for source in self._sources: 

1465 source.update_waveform_promises(self, constraint) 

1466 

1467 def remove_waveform_promises(self, from_database='selection'): 

1468 ''' 

1469 Remove waveform promises from live selection or global database. 

1470 

1471 Calling this function removes all waveform promises provided by the 

1472 attached sources. 

1473 

1474 :param from_database: 

1475 Remove from live selection ``'selection'`` or global database 

1476 ``'global'``. 

1477 ''' 

1478 for source in self._sources: 

1479 source.remove_waveform_promises(self, from_database=from_database) 

1480 

1481 def update_responses(self, constraint=None, **kwargs): 

1482 if constraint is None: 

1483 constraint = client.Constraint(**kwargs) 

1484 

1485 for source in self._sources: 

1486 source.update_response_inventory(self, constraint) 

1487 

1488 def get_nfiles(self): 

1489 ''' 

1490 Get number of files in selection. 

1491 ''' 

1492 

1493 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1494 for row in self._conn.execute(sql): 

1495 return row[0] 

1496 

1497 def get_nnuts(self): 

1498 ''' 

1499 Get number of nuts in selection. 

1500 ''' 

1501 

1502 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1503 for row in self._conn.execute(sql): 

1504 return row[0] 

1505 

1506 def get_total_size(self): 

1507 ''' 

1508 Get aggregated file size available in selection. 

1509 ''' 

1510 

1511 sql = self._sql(''' 

1512 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1513 INNER JOIN files 

1514 ON %(db)s.%(file_states)s.file_id = files.file_id 

1515 ''') 

1516 

1517 for row in self._conn.execute(sql): 

1518 return row[0] or 0 

1519 

1520 def get_stats(self): 

1521 ''' 

1522 Get statistics on contents available through this selection. 

1523 ''' 

1524 

1525 kinds = self.get_kinds() 

1526 time_spans = {} 

1527 for kind in kinds: 

1528 time_spans[kind] = self.get_time_span([kind]) 

1529 

1530 return SquirrelStats( 

1531 nfiles=self.get_nfiles(), 

1532 nnuts=self.get_nnuts(), 

1533 kinds=kinds, 

1534 codes=self.get_codes(), 

1535 total_size=self.get_total_size(), 

1536 counts=self.get_counts(), 

1537 time_spans=time_spans, 

1538 sources=[s.describe() for s in self._sources], 

1539 operators=[op.describe() for op in self._operators]) 

1540 

1541 def get_content( 

1542 self, 

1543 nut, 

1544 cache_id='default', 

1545 accessor_id='default', 

1546 show_progress=False, 

1547 model='squirrel'): 

1548 

1549 ''' 

1550 Get and possibly load full content for a given index entry from file. 

1551 

1552 Loads the actual content objects (channel, station, waveform, ...) from 

1553 file. For efficiency, sibling content (all stuff in the same file 

1554 segment) will also be loaded as a side effect. The loaded contents are 

1555 cached in the Squirrel object. 

1556 ''' 

1557 

1558 content_cache = self._content_caches[cache_id] 

1559 if not content_cache.has(nut): 

1560 

1561 for nut_loaded in io.iload( 

1562 nut.file_path, 

1563 segment=nut.file_segment, 

1564 format=nut.file_format, 

1565 database=self._database, 

1566 update_selection=self, 

1567 show_progress=show_progress): 

1568 

1569 content_cache.put(nut_loaded) 

1570 

1571 try: 

1572 return content_cache.get(nut, accessor_id, model) 

1573 except KeyError: 

1574 raise error.NotAvailable( 

1575 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1576 

1577 def advance_accessor(self, accessor_id='default', cache_id=None): 

1578 ''' 

1579 Notify memory caches about consumer moving to a new data batch. 

1580 

1581 :param accessor_id: 

1582 Name of accessing consumer to be advanced. 

1583 :type accessor_id: 

1584 str 

1585 

1586 :param cache_id: 

1587 Name of cache to for which the accessor should be advanced. By 

1588 default the named accessor is advanced in all registered caches. 

1589 By default, two caches named ``'default'`` and ``'waveform'`` are 

1590 available. 

1591 :type cache_id: 

1592 str 

1593 

1594 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1595 Squirrel's memory caching works and can be tuned. Default behaviour is 

1596 to release data when it has not been used in the latest data 

1597 window/batch. If the accessor is never advanced, data is cached 

1598 indefinitely - which is often desired e.g. for station meta-data. 

1599 Methods for consecutive data traversal, like 

1600 :py:meth:`chopper_waveforms` automatically advance and clear 

1601 their accessor. 

1602 ''' 

1603 for cache_ in ( 

1604 self._content_caches.keys() 

1605 if cache_id is None 

1606 else [cache_id]): 

1607 

1608 self._content_caches[cache_].advance_accessor(accessor_id) 

1609 

1610 def clear_accessor(self, accessor_id, cache_id=None): 

1611 ''' 

1612 Notify memory caches about a consumer having finished. 

1613 

1614 :param accessor_id: 

1615 Name of accessor to be cleared. 

1616 :type accessor_id: 

1617 str 

1618 

1619 :param cache_id: 

1620 Name of cache for which the accessor should be cleared. By default 

1621 the named accessor is cleared from all registered caches. By 

1622 default, two caches named ``'default'`` and ``'waveform'`` are 

1623 available. 

1624 :type cache_id: 

1625 str 

1626 

1627 Calling this method clears all references to cache entries held by the 

1628 named accessor. Cache entries are then freed if not referenced by any 

1629 other accessor. 

1630 ''' 

1631 

1632 for cache_ in ( 

1633 self._content_caches.keys() 

1634 if cache_id is None 

1635 else [cache_id]): 

1636 

1637 self._content_caches[cache_].clear_accessor(accessor_id) 

1638 

1639 def get_cache_stats(self, cache_id): 

1640 return self._content_caches[cache_id].get_stats() 

1641 

1642 def _check_duplicates(self, nuts): 

1643 d = defaultdict(list) 

1644 for nut in nuts: 

1645 d[nut.codes].append(nut) 

1646 

1647 for codes, group in d.items(): 

1648 if len(group) > 1: 

1649 logger.warning( 

1650 'Multiple entries matching codes: %s' % str(codes)) 

1651 

1652 @filldocs 

1653 def get_stations( 

1654 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1655 model='squirrel'): 

1656 

1657 ''' 

1658 Get stations matching given constraints. 

1659 

1660 %(query_args)s 

1661 

1662 :param model: 

1663 Select object model for returned values: ``'squirrel'`` to get 

1664 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1665 objects with channel information attached. 

1666 :type model: 

1667 str 

1668 

1669 :returns: 

1670 List of :py:class:`pyrocko.squirrel.Station 

1671 <pyrocko.squirrel.model.Station>` objects by default or list of 

1672 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1673 objects if ``model='pyrocko'`` is requested. 

1674 

1675 See :py:meth:`iter_nuts` for details on time span matching. 

1676 ''' 

1677 

1678 if model == 'pyrocko': 

1679 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1680 elif model in ('squirrel', 'stationxml'): 

1681 args = self._get_selection_args( 

1682 STATION, obj, tmin, tmax, time, codes) 

1683 

1684 nuts = sorted( 

1685 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1686 self._check_duplicates(nuts) 

1687 return [self.get_content(nut, model=model) for nut in nuts] 

1688 else: 

1689 raise ValueError('Invalid station model: %s' % model) 

1690 

1691 @filldocs 

1692 def get_channels( 

1693 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1694 model='squirrel'): 

1695 

1696 ''' 

1697 Get channels matching given constraints. 

1698 

1699 %(query_args)s 

1700 

1701 :returns: 

1702 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1703 

1704 See :py:meth:`iter_nuts` for details on time span matching. 

1705 ''' 

1706 

1707 args = self._get_selection_args( 

1708 CHANNEL, obj, tmin, tmax, time, codes) 

1709 

1710 nuts = sorted( 

1711 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1712 self._check_duplicates(nuts) 

1713 return [self.get_content(nut, model=model) for nut in nuts] 

1714 

1715 @filldocs 

1716 def get_sensors( 

1717 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1718 

1719 ''' 

1720 Get sensors matching given constraints. 

1721 

1722 %(query_args)s 

1723 

1724 :returns: 

1725 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1726 

1727 See :py:meth:`iter_nuts` for details on time span matching. 

1728 ''' 

1729 

1730 tmin, tmax, codes = self._get_selection_args( 

1731 CHANNEL, obj, tmin, tmax, time, codes) 

1732 

1733 if codes is not None: 

1734 codes = codes_patterns_list( 

1735 (entry.replace(channel=entry.channel[:-1] + '?') 

1736 if entry != '*' else entry) 

1737 for entry in codes) 

1738 

1739 nuts = sorted( 

1740 self.iter_nuts( 

1741 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1742 self._check_duplicates(nuts) 

1743 return model.Sensor.from_channels( 

1744 self.get_content(nut) for nut in nuts) 

1745 

1746 @filldocs 

1747 def get_responses( 

1748 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1749 model='squirrel'): 

1750 

1751 ''' 

1752 Get instrument responses matching given constraints. 

1753 

1754 %(query_args)s 

1755 

1756 :returns: 

1757 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1758 

1759 See :py:meth:`iter_nuts` for details on time span matching. 

1760 ''' 

1761 

1762 args = self._get_selection_args( 

1763 RESPONSE, obj, tmin, tmax, time, codes) 

1764 

1765 nuts = sorted( 

1766 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1767 self._check_duplicates(nuts) 

1768 return [self.get_content(nut, model=model) for nut in nuts] 

1769 

1770 @filldocs 

1771 def get_response( 

1772 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1773 model='squirrel'): 

1774 

1775 ''' 

1776 Get instrument response matching given constraints. 

1777 

1778 %(query_args)s 

1779 

1780 :returns: 

1781 :py:class:`~pyrocko.squirrel.model.Response` object. 

1782 

1783 Same as :py:meth:`get_responses` but returning exactly one response. 

1784 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1785 than one is available. 

1786 

1787 See :py:meth:`iter_nuts` for details on time span matching. 

1788 ''' 

1789 

1790 responses = self.get_responses( 

1791 obj, tmin, tmax, time, codes, model=model) 

1792 if len(responses) == 0: 

1793 raise error.NotAvailable( 

1794 'No instrument response available (%s).' 

1795 % self._get_selection_args_str( 

1796 RESPONSE, obj, tmin, tmax, time, codes)) 

1797 

1798 elif len(responses) > 1: 

1799 if model == 'squirrel': 

1800 rinfo = ':\n' + '\n'.join( 

1801 ' ' + resp.summary for resp in responses) 

1802 else: 

1803 rinfo = '.' 

1804 

1805 raise error.NotAvailable( 

1806 'Multiple instrument responses matching given constraints ' 

1807 '(%s)%s' % ( 

1808 self._get_selection_args_str( 

1809 RESPONSE, obj, tmin, tmax, time, codes), rinfo)) 

1810 

1811 return responses[0] 

1812 

1813 @filldocs 

1814 def get_events( 

1815 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1816 

1817 ''' 

1818 Get events matching given constraints. 

1819 

1820 %(query_args)s 

1821 

1822 :returns: 

1823 List of :py:class:`~pyrocko.model.event.Event` objects. 

1824 

1825 See :py:meth:`iter_nuts` for details on time span matching. 

1826 ''' 

1827 

1828 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1829 nuts = sorted( 

1830 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1831 self._check_duplicates(nuts) 

1832 return [self.get_content(nut) for nut in nuts] 

1833 

1834 def _redeem_promises(self, *args): 

1835 

1836 tmin, tmax, _ = args 

1837 

1838 waveforms = list(self.iter_nuts('waveform', *args)) 

1839 promises = list(self.iter_nuts('waveform_promise', *args)) 

1840 

1841 codes_to_avail = defaultdict(list) 

1842 for nut in waveforms: 

1843 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1844 

1845 def tts(x): 

1846 if isinstance(x, tuple): 

1847 return tuple(tts(e) for e in x) 

1848 elif isinstance(x, list): 

1849 return list(tts(e) for e in x) 

1850 else: 

1851 return util.time_to_str(x) 

1852 

1853 orders = [] 

1854 for promise in promises: 

1855 waveforms_avail = codes_to_avail[promise.codes] 

1856 for block_tmin, block_tmax in blocks( 

1857 max(tmin, promise.tmin), 

1858 min(tmax, promise.tmax), 

1859 promise.deltat): 

1860 

1861 orders.append( 

1862 WaveformOrder( 

1863 source_id=promise.file_path, 

1864 codes=promise.codes, 

1865 tmin=block_tmin, 

1866 tmax=block_tmax, 

1867 deltat=promise.deltat, 

1868 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1869 

1870 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1871 

1872 order_keys_noop = set(order_key(order) for order in orders_noop) 

1873 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1874 logger.info( 

1875 'Waveform orders already satisified with cached/local data: ' 

1876 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1877 

1878 source_ids = [] 

1879 sources = {} 

1880 for source in self._sources: 

1881 if isinstance(source, fdsn.FDSNSource): 

1882 source_ids.append(source._source_id) 

1883 sources[source._source_id] = source 

1884 

1885 source_priority = dict( 

1886 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1887 

1888 order_groups = defaultdict(list) 

1889 for order in orders: 

1890 order_groups[order_key(order)].append(order) 

1891 

1892 for k, order_group in order_groups.items(): 

1893 order_group.sort( 

1894 key=lambda order: source_priority[order.source_id]) 

1895 

1896 n_order_groups = len(order_groups) 

1897 

1898 if len(order_groups) != 0 or len(orders) != 0: 

1899 logger.info( 

1900 'Waveform orders standing for download: %i (%i)' 

1901 % (len(order_groups), len(orders))) 

1902 

1903 task = make_task('Waveform orders processed', n_order_groups) 

1904 else: 

1905 task = None 

1906 

1907 def split_promise(order): 

1908 self._split_nuts( 

1909 'waveform_promise', 

1910 order.tmin, order.tmax, 

1911 codes=order.codes, 

1912 path=order.source_id) 

1913 

1914 def release_order_group(order): 

1915 okey = order_key(order) 

1916 for followup in order_groups[okey]: 

1917 split_promise(followup) 

1918 

1919 del order_groups[okey] 

1920 

1921 if task: 

1922 task.update(n_order_groups - len(order_groups)) 

1923 

1924 def noop(order): 

1925 pass 

1926 

1927 def success(order): 

1928 release_order_group(order) 

1929 split_promise(order) 

1930 

1931 def batch_add(paths): 

1932 self.add(paths) 

1933 

1934 calls = queue.Queue() 

1935 

1936 def enqueue(f): 

1937 def wrapper(*args): 

1938 calls.put((f, args)) 

1939 

1940 return wrapper 

1941 

1942 for order in orders_noop: 

1943 split_promise(order) 

1944 

1945 while order_groups: 

1946 

1947 orders_now = [] 

1948 empty = [] 

1949 for k, order_group in order_groups.items(): 

1950 try: 

1951 orders_now.append(order_group.pop(0)) 

1952 except IndexError: 

1953 empty.append(k) 

1954 

1955 for k in empty: 

1956 del order_groups[k] 

1957 

1958 by_source_id = defaultdict(list) 

1959 for order in orders_now: 

1960 by_source_id[order.source_id].append(order) 

1961 

1962 threads = [] 

1963 for source_id in by_source_id: 

1964 def download(): 

1965 try: 

1966 sources[source_id].download_waveforms( 

1967 by_source_id[source_id], 

1968 success=enqueue(success), 

1969 error_permanent=enqueue(split_promise), 

1970 error_temporary=noop, 

1971 batch_add=enqueue(batch_add)) 

1972 

1973 finally: 

1974 calls.put(None) 

1975 

1976 thread = threading.Thread(target=download) 

1977 thread.start() 

1978 threads.append(thread) 

1979 

1980 ndone = 0 

1981 while ndone < len(threads): 

1982 ret = calls.get() 

1983 if ret is None: 

1984 ndone += 1 

1985 else: 

1986 ret[0](*ret[1]) 

1987 

1988 for thread in threads: 

1989 thread.join() 

1990 

1991 if task: 

1992 task.update(n_order_groups - len(order_groups)) 

1993 

1994 if task: 

1995 task.done() 

1996 

1997 @filldocs 

1998 def get_waveform_nuts( 

1999 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2000 

2001 ''' 

2002 Get waveform content entities matching given constraints. 

2003 

2004 %(query_args)s 

2005 

2006 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2007 resolves matching waveform promises (downloads waveforms from remote 

2008 sources). 

2009 

2010 See :py:meth:`iter_nuts` for details on time span matching. 

2011 ''' 

2012 

2013 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2014 self._redeem_promises(*args) 

2015 return sorted( 

2016 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2017 

2018 @filldocs 

2019 def get_waveforms( 

2020 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2021 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2022 maxlap=None, snap=None, include_last=False, load_data=True, 

2023 accessor_id='default', operator_params=None): 

2024 

2025 ''' 

2026 Get waveforms matching given constraints. 

2027 

2028 %(query_args)s 

2029 

2030 :param uncut: 

2031 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2032 and to disable degapping/deoverlapping. Returns untouched traces as 

2033 they are read from file segment. File segments are always read in 

2034 their entirety. 

2035 :type uncut: 

2036 bool 

2037 

2038 :param want_incomplete: 

2039 If ``True``, gappy/incomplete traces are included in the result. 

2040 :type want_incomplete: 

2041 bool 

2042 

2043 :param degap: 

2044 If ``True``, connect traces and remove gaps and overlaps. 

2045 :type degap: 

2046 bool 

2047 

2048 :param maxgap: 

2049 Maximum gap size in samples which is filled with interpolated 

2050 samples when ``degap`` is ``True``. 

2051 :type maxgap: 

2052 int 

2053 

2054 :param maxlap: 

2055 Maximum overlap size in samples which is removed when ``degap`` is 

2056 ``True``. 

2057 :type maxlap: 

2058 int 

2059 

2060 :param snap: 

2061 Rounding functions used when computing sample index from time 

2062 instance, for trace start and trace end, respectively. By default, 

2063 ``(round, round)`` is used. 

2064 :type snap: 

2065 tuple of 2 callables 

2066 

2067 :param include_last: 

2068 If ``True``, add one more sample to the returned traces (the sample 

2069 which would be the first sample of a query with ``tmin`` set to the 

2070 current value of ``tmax``). 

2071 :type include_last: 

2072 bool 

2073 

2074 :param load_data: 

2075 If ``True``, waveform data samples are read from files (or cache). 

2076 If ``False``, meta-information-only traces are returned (dummy 

2077 traces with no data samples). 

2078 :type load_data: 

2079 bool 

2080 

2081 :param accessor_id: 

2082 Name of consumer on who's behalf data is accessed. Used in cache 

2083 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2084 to distinguish different points of extraction for the decision of 

2085 when to release cached waveform data. Should be used when data is 

2086 alternately extracted from more than one region / selection. 

2087 :type accessor_id: 

2088 str 

2089 

2090 See :py:meth:`iter_nuts` for details on time span matching. 

2091 

2092 Loaded data is kept in memory (at least) until 

2093 :py:meth:`clear_accessor` has been called or 

2094 :py:meth:`advance_accessor` has been called two consecutive times 

2095 without data being accessed between the two calls (by this accessor). 

2096 Data may still be further kept in the memory cache if held alive by 

2097 consumers with a different ``accessor_id``. 

2098 ''' 

2099 

2100 tmin, tmax, codes = self._get_selection_args( 

2101 WAVEFORM, obj, tmin, tmax, time, codes) 

2102 

2103 self_tmin, self_tmax = self.get_time_span( 

2104 ['waveform', 'waveform_promise']) 

2105 

2106 if None in (self_tmin, self_tmax): 

2107 logger.warning( 

2108 'No waveforms available.') 

2109 return [] 

2110 

2111 tmin = tmin if tmin is not None else self_tmin 

2112 tmax = tmax if tmax is not None else self_tmax 

2113 

2114 if codes is not None and len(codes) == 1: 

2115 # TODO: fix for multiple / mixed codes 

2116 operator = self.get_operator(codes[0]) 

2117 if operator is not None: 

2118 return operator.get_waveforms( 

2119 self, codes[0], 

2120 tmin=tmin, tmax=tmax, 

2121 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2122 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2123 include_last=include_last, load_data=load_data, 

2124 accessor_id=accessor_id, params=operator_params) 

2125 

2126 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2127 

2128 if load_data: 

2129 traces = [ 

2130 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2131 

2132 else: 

2133 traces = [ 

2134 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2135 

2136 if uncut: 

2137 return traces 

2138 

2139 if snap is None: 

2140 snap = (round, round) 

2141 

2142 chopped = [] 

2143 for tr in traces: 

2144 if not load_data and tr.ydata is not None: 

2145 tr = tr.copy(data=False) 

2146 tr.ydata = None 

2147 

2148 try: 

2149 chopped.append(tr.chop( 

2150 tmin, tmax, 

2151 inplace=False, 

2152 snap=snap, 

2153 include_last=include_last)) 

2154 

2155 except trace.NoData: 

2156 pass 

2157 

2158 processed = self._process_chopped( 

2159 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2160 

2161 return processed 

2162 

2163 @filldocs 

2164 def chopper_waveforms( 

2165 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2166 tinc=None, tpad=0., 

2167 want_incomplete=True, snap_window=False, 

2168 degap=True, maxgap=5, maxlap=None, 

2169 snap=None, include_last=False, load_data=True, 

2170 accessor_id=None, clear_accessor=True, operator_params=None): 

2171 

2172 ''' 

2173 Iterate window-wise over waveform archive. 

2174 

2175 %(query_args)s 

2176 

2177 :param tinc: 

2178 Time increment (window shift time) (default uses ``tmax-tmin``). 

2179 :type tinc: 

2180 timestamp 

2181 

2182 :param tpad: 

2183 Padding time appended on either side of the data window (window 

2184 overlap is ``2*tpad``). 

2185 :type tpad: 

2186 timestamp 

2187 

2188 :param want_incomplete: 

2189 If ``True``, gappy/incomplete traces are included in the result. 

2190 :type want_incomplete: 

2191 bool 

2192 

2193 :param snap_window: 

2194 If ``True``, start time windows at multiples of tinc with respect 

2195 to system time zero. 

2196 :type snap_window: 

2197 bool 

2198 

2199 :param degap: 

2200 If ``True``, connect traces and remove gaps and overlaps. 

2201 :type degap: 

2202 bool 

2203 

2204 :param maxgap: 

2205 Maximum gap size in samples which is filled with interpolated 

2206 samples when ``degap`` is ``True``. 

2207 :type maxgap: 

2208 int 

2209 

2210 :param maxlap: 

2211 Maximum overlap size in samples which is removed when ``degap`` is 

2212 ``True``. 

2213 :type maxlap: 

2214 int 

2215 

2216 :param snap: 

2217 Rounding functions used when computing sample index from time 

2218 instance, for trace start and trace end, respectively. By default, 

2219 ``(round, round)`` is used. 

2220 :type snap: 

2221 tuple of 2 callables 

2222 

2223 :param include_last: 

2224 If ``True``, add one more sample to the returned traces (the sample 

2225 which would be the first sample of a query with ``tmin`` set to the 

2226 current value of ``tmax``). 

2227 :type include_last: 

2228 bool 

2229 

2230 :param load_data: 

2231 If ``True``, waveform data samples are read from files (or cache). 

2232 If ``False``, meta-information-only traces are returned (dummy 

2233 traces with no data samples). 

2234 :type load_data: 

2235 bool 

2236 

2237 :param accessor_id: 

2238 Name of consumer on who's behalf data is accessed. Used in cache 

2239 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2240 to distinguish different points of extraction for the decision of 

2241 when to release cached waveform data. Should be used when data is 

2242 alternately extracted from more than one region / selection. 

2243 :type accessor_id: 

2244 str 

2245 

2246 :param clear_accessor: 

2247 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2248 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2249 memory when the generator returns. 

2250 :type clear_accessor: 

2251 bool 

2252 

2253 :yields: 

2254 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2255 extracted time window. 

2256 

2257 See :py:meth:`iter_nuts` for details on time span matching. 

2258 ''' 

2259 

2260 tmin, tmax, codes = self._get_selection_args( 

2261 WAVEFORM, obj, tmin, tmax, time, codes) 

2262 

2263 self_tmin, self_tmax = self.get_time_span( 

2264 ['waveform', 'waveform_promise']) 

2265 

2266 if None in (self_tmin, self_tmax): 

2267 logger.warning( 

2268 'Content has undefined time span. No waveforms and no ' 

2269 'waveform promises?') 

2270 return 

2271 

2272 if snap_window and tinc is not None: 

2273 tmin = tmin if tmin is not None else self_tmin 

2274 tmax = tmax if tmax is not None else self_tmax 

2275 tmin = math.floor(tmin / tinc) * tinc 

2276 tmax = math.ceil(tmax / tinc) * tinc 

2277 else: 

2278 tmin = tmin if tmin is not None else self_tmin + tpad 

2279 tmax = tmax if tmax is not None else self_tmax - tpad 

2280 

2281 tinc = tinc if tinc is not None else tmax - tmin 

2282 

2283 try: 

2284 if accessor_id is None: 

2285 accessor_id = 'chopper%i' % self._n_choppers_active 

2286 

2287 self._n_choppers_active += 1 

2288 

2289 eps = tinc * 1e-6 

2290 if tinc != 0.0: 

2291 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2292 else: 

2293 nwin = 1 

2294 

2295 for iwin in range(nwin): 

2296 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2297 

2298 chopped = self.get_waveforms( 

2299 tmin=wmin-tpad, 

2300 tmax=wmax+tpad, 

2301 codes=codes, 

2302 snap=snap, 

2303 include_last=include_last, 

2304 load_data=load_data, 

2305 want_incomplete=want_incomplete, 

2306 degap=degap, 

2307 maxgap=maxgap, 

2308 maxlap=maxlap, 

2309 accessor_id=accessor_id, 

2310 operator_params=operator_params) 

2311 

2312 self.advance_accessor(accessor_id) 

2313 

2314 yield Batch( 

2315 tmin=wmin, 

2316 tmax=wmax, 

2317 i=iwin, 

2318 n=nwin, 

2319 traces=chopped) 

2320 

2321 iwin += 1 

2322 

2323 finally: 

2324 self._n_choppers_active -= 1 

2325 if clear_accessor: 

2326 self.clear_accessor(accessor_id, 'waveform') 

2327 

2328 def _process_chopped( 

2329 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2330 

2331 chopped.sort(key=lambda a: a.full_id) 

2332 if degap: 

2333 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2334 

2335 if not want_incomplete: 

2336 chopped_weeded = [] 

2337 for tr in chopped: 

2338 emin = tr.tmin - tmin 

2339 emax = tr.tmax + tr.deltat - tmax 

2340 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2341 chopped_weeded.append(tr) 

2342 

2343 elif degap: 

2344 if (0. < emin <= 5. * tr.deltat 

2345 and -5. * tr.deltat <= emax < 0.): 

2346 

2347 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2348 chopped_weeded.append(tr) 

2349 

2350 chopped = chopped_weeded 

2351 

2352 return chopped 

2353 

2354 def _get_pyrocko_stations( 

2355 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2356 

2357 from pyrocko import model as pmodel 

2358 

2359 by_nsl = defaultdict(lambda: (list(), list())) 

2360 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2361 sargs = station._get_pyrocko_station_args() 

2362 by_nsl[station.codes.nsl][0].append(sargs) 

2363 

2364 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2365 sargs = channel._get_pyrocko_station_args() 

2366 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2367 sargs_list.append(sargs) 

2368 channels_list.append(channel) 

2369 

2370 pstations = [] 

2371 nsls = list(by_nsl.keys()) 

2372 nsls.sort() 

2373 for nsl in nsls: 

2374 sargs_list, channels_list = by_nsl[nsl] 

2375 sargs = util.consistency_merge( 

2376 [('',) + x for x in sargs_list]) 

2377 

2378 by_c = defaultdict(list) 

2379 for ch in channels_list: 

2380 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2381 

2382 chas = list(by_c.keys()) 

2383 chas.sort() 

2384 pchannels = [] 

2385 for cha in chas: 

2386 list_of_cargs = by_c[cha] 

2387 cargs = util.consistency_merge( 

2388 [('',) + x for x in list_of_cargs]) 

2389 pchannels.append(pmodel.Channel(*cargs)) 

2390 

2391 pstations.append( 

2392 pmodel.Station(*sargs, channels=pchannels)) 

2393 

2394 return pstations 

2395 

2396 @property 

2397 def pile(self): 

2398 

2399 ''' 

2400 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2401 

2402 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2403 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2404 but uses the fluffy power of the Squirrel under the hood. 

2405 

2406 This interface can be used as a drop-in replacement for piles which are 

2407 used in existing scripts and programs for efficient waveform data 

2408 access. The Squirrel-based pile scales better for large datasets. Newer 

2409 scripts should use Squirrel's native methods to avoid the emulation 

2410 overhead. 

2411 ''' 

2412 from . import pile 

2413 

2414 if self._pile is None: 

2415 self._pile = pile.Pile(self) 

2416 

2417 return self._pile 

2418 

2419 def snuffle(self): 

2420 ''' 

2421 Look at dataset in Snuffler. 

2422 ''' 

2423 self.pile.snuffle() 

2424 

2425 def _gather_codes_keys(self, kind, gather, selector): 

2426 return set( 

2427 gather(codes) 

2428 for codes in self.iter_codes(kind) 

2429 if selector is None or selector(codes)) 

2430 

2431 def __str__(self): 

2432 return str(self.get_stats()) 

2433 

2434 def get_coverage( 

2435 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2436 

2437 ''' 

2438 Get coverage information. 

2439 

2440 Get information about strips of gapless data coverage. 

2441 

2442 :param kind: 

2443 Content kind to be queried. 

2444 :type kind: 

2445 str 

2446 

2447 :param tmin: 

2448 Start time of query interval. 

2449 :type tmin: 

2450 timestamp 

2451 

2452 :param tmax: 

2453 End time of query interval. 

2454 :type tmax: 

2455 timestamp 

2456 

2457 :param codes: 

2458 If given, restrict query to given content codes patterns. 

2459 :type codes: 

2460 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2461 objects appropriate for the queried content type, or anything which 

2462 can be converted to such objects. 

2463 

2464 :param limit: 

2465 Limit query to return only up to a given maximum number of entries 

2466 per matching time series (without setting this option, very gappy 

2467 data could cause the query to execute for a very long time). 

2468 :type limit: 

2469 int 

2470 

2471 :returns: 

2472 Information about time spans covered by the requested time series 

2473 data. 

2474 :rtype: 

2475 :py:class:`list` of :py:class:`Coverage` objects 

2476 ''' 

2477 

2478 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2479 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2480 kind_id = to_kind_id(kind) 

2481 

2482 codes_info = list(self._iter_codes_info(kind=kind)) 

2483 

2484 kdata_all = [] 

2485 if codes is None: 

2486 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2487 kdata_all.append( 

2488 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2489 

2490 else: 

2491 for codes_entry in codes: 

2492 pattern = to_codes(kind_id, codes_entry) 

2493 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2494 if model.match_codes(pattern, codes_entry): 

2495 kdata_all.append( 

2496 (pattern, kind_codes_id, codes_entry, deltat)) 

2497 

2498 kind_codes_ids = [x[1] for x in kdata_all] 

2499 

2500 counts_at_tmin = {} 

2501 if tmin is not None: 

2502 for nut in self.iter_nuts( 

2503 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2504 

2505 k = nut.codes, nut.deltat 

2506 if k not in counts_at_tmin: 

2507 counts_at_tmin[k] = 0 

2508 

2509 counts_at_tmin[k] += 1 

2510 

2511 coverages = [] 

2512 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2513 entry = [pattern, codes_entry, deltat, None, None, []] 

2514 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2515 sql = self._sql(''' 

2516 SELECT 

2517 time_seconds, 

2518 time_offset 

2519 FROM %(db)s.%(coverage)s 

2520 WHERE 

2521 kind_codes_id == ? 

2522 ORDER BY 

2523 kind_codes_id ''' + order + ''', 

2524 time_seconds ''' + order + ''', 

2525 time_offset ''' + order + ''' 

2526 LIMIT 1 

2527 ''') 

2528 

2529 for row in self._conn.execute(sql, [kind_codes_id]): 

2530 entry[3+i] = model.tjoin(row[0], row[1]) 

2531 

2532 if None in entry[3:5]: 

2533 continue 

2534 

2535 args = [kind_codes_id] 

2536 

2537 sql_time = '' 

2538 if tmin is not None: 

2539 # intentionally < because (== tmin) is queried from nuts 

2540 sql_time += ' AND ( ? < time_seconds ' \ 

2541 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2542 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2543 

2544 if tmax is not None: 

2545 sql_time += ' AND ( time_seconds < ? ' \ 

2546 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2547 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2548 

2549 sql_limit = '' 

2550 if limit is not None: 

2551 sql_limit = ' LIMIT ?' 

2552 args.append(limit) 

2553 

2554 sql = self._sql(''' 

2555 SELECT 

2556 time_seconds, 

2557 time_offset, 

2558 step 

2559 FROM %(db)s.%(coverage)s 

2560 WHERE 

2561 kind_codes_id == ? 

2562 ''' + sql_time + ''' 

2563 ORDER BY 

2564 kind_codes_id, 

2565 time_seconds, 

2566 time_offset 

2567 ''' + sql_limit) 

2568 

2569 rows = list(self._conn.execute(sql, args)) 

2570 

2571 if limit is not None and len(rows) == limit: 

2572 entry[-1] = None 

2573 else: 

2574 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2575 tlast = None 

2576 if tmin is not None: 

2577 entry[-1].append((tmin, counts)) 

2578 tlast = tmin 

2579 

2580 for row in rows: 

2581 t = model.tjoin(row[0], row[1]) 

2582 counts += row[2] 

2583 entry[-1].append((t, counts)) 

2584 tlast = t 

2585 

2586 if tmax is not None and (tlast is None or tlast != tmax): 

2587 entry[-1].append((tmax, counts)) 

2588 

2589 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2590 

2591 return coverages 

2592 

2593 def add_operator(self, op): 

2594 self._operators.append(op) 

2595 

2596 def update_operator_mappings(self): 

2597 available = self.get_codes(kind=('channel')) 

2598 

2599 for operator in self._operators: 

2600 operator.update_mappings(available, self._operator_registry) 

2601 

2602 def iter_operator_mappings(self): 

2603 for operator in self._operators: 

2604 for in_codes, out_codes in operator.iter_mappings(): 

2605 yield operator, in_codes, out_codes 

2606 

2607 def get_operator_mappings(self): 

2608 return list(self.iter_operator_mappings()) 

2609 

2610 def get_operator(self, codes): 

2611 try: 

2612 return self._operator_registry[codes][0] 

2613 except KeyError: 

2614 return None 

2615 

2616 def get_operator_group(self, codes): 

2617 try: 

2618 return self._operator_registry[codes] 

2619 except KeyError: 

2620 return None, (None, None, None) 

2621 

2622 def iter_operator_codes(self): 

2623 for _, _, out_codes in self.iter_operator_mappings(): 

2624 for codes in out_codes: 

2625 yield codes 

2626 

2627 def get_operator_codes(self): 

2628 return list(self.iter_operator_codes()) 

2629 

2630 def print_tables(self, table_names=None, stream=None): 

2631 ''' 

2632 Dump raw database tables in textual form (for debugging purposes). 

2633 

2634 :param table_names: 

2635 Names of tables to be dumped or ``None`` to dump all. 

2636 :type table_names: 

2637 :py:class:`list` of :py:class:`str` 

2638 

2639 :param stream: 

2640 Open file or ``None`` to dump to standard output. 

2641 ''' 

2642 

2643 if stream is None: 

2644 stream = sys.stdout 

2645 

2646 if isinstance(table_names, str): 

2647 table_names = [table_names] 

2648 

2649 if table_names is None: 

2650 table_names = [ 

2651 'selection_file_states', 

2652 'selection_nuts', 

2653 'selection_kind_codes_count', 

2654 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2655 

2656 m = { 

2657 'selection_file_states': '%(db)s.%(file_states)s', 

2658 'selection_nuts': '%(db)s.%(nuts)s', 

2659 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2660 'files': 'files', 

2661 'nuts': 'nuts', 

2662 'kind_codes': 'kind_codes', 

2663 'kind_codes_count': 'kind_codes_count'} 

2664 

2665 for table_name in table_names: 

2666 self._database.print_table( 

2667 m[table_name] % self._names, stream=stream) 

2668 

2669 

2670class SquirrelStats(Object): 

2671 ''' 

2672 Container to hold statistics about contents available from a Squirrel. 

2673 

2674 See also :py:meth:`Squirrel.get_stats`. 

2675 ''' 

2676 

2677 nfiles = Int.T( 

2678 help='Number of files in selection.') 

2679 nnuts = Int.T( 

2680 help='Number of index nuts in selection.') 

2681 codes = List.T( 

2682 Tuple.T(content_t=String.T()), 

2683 help='Available code sequences in selection, e.g. ' 

2684 '(agency, network, station, location) for stations nuts.') 

2685 kinds = List.T( 

2686 String.T(), 

2687 help='Available content types in selection.') 

2688 total_size = Int.T( 

2689 help='Aggregated file size of files is selection.') 

2690 counts = Dict.T( 

2691 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2692 help='Breakdown of how many nuts of any content type and code ' 

2693 'sequence are available in selection, ``counts[kind][codes]``.') 

2694 time_spans = Dict.T( 

2695 String.T(), Tuple.T(content_t=Timestamp.T()), 

2696 help='Time spans by content type.') 

2697 sources = List.T( 

2698 String.T(), 

2699 help='Descriptions of attached sources.') 

2700 operators = List.T( 

2701 String.T(), 

2702 help='Descriptions of attached operators.') 

2703 

2704 def __str__(self): 

2705 kind_counts = dict( 

2706 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2707 

2708 scodes = model.codes_to_str_abbreviated(self.codes) 

2709 

2710 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2711 ' ' + s for s in self.sources) 

2712 

2713 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2714 ' ' + s for s in self.operators) 

2715 

2716 def stime(t): 

2717 return util.tts(t) if t is not None and t not in ( 

2718 model.g_tmin, model.g_tmax) else '<none>' 

2719 

2720 def stable(rows): 

2721 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2722 return '\n'.join( 

2723 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2724 for row in rows) 

2725 

2726 def indent(s): 

2727 return '\n'.join(' '+line for line in s.splitlines()) 

2728 

2729 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2730 kind + ':', 

2731 str(kind_counts[kind]), 

2732 stime(self.time_spans[kind][0]), 

2733 '-', 

2734 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2735 

2736 s = ''' 

2737Number of files: %i 

2738Total size of known files: %s 

2739Number of index nuts: %i 

2740Available content kinds: %s 

2741Available codes: %s 

2742Sources: %s 

2743Operators: %s''' % ( 

2744 self.nfiles, 

2745 util.human_bytesize(self.total_size), 

2746 self.nnuts, 

2747 stspans, scodes, ssources, soperators) 

2748 

2749 return s.lstrip() 

2750 

2751 

2752__all__ = [ 

2753 'Squirrel', 

2754 'SquirrelStats', 

2755]