1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from . import client, environment, error 

29 

30logger = logging.getLogger('psq.base') 

31 

32guts_prefix = 'squirrel' 

33 

34 

35def make_task(*args): 

36 return progress.task(*args, logger=logger) 

37 

38 

39def lpick(condition, seq): 

40 ft = [], [] 

41 for ele in seq: 

42 ft[int(bool(condition(ele)))].append(ele) 

43 

44 return ft 

45 

46 

47def codes_patterns_for_kind(kind_id, codes): 

48 if isinstance(codes, list): 

49 lcodes = [] 

50 for sc in codes: 

51 lcodes.extend(codes_patterns_for_kind(kind_id, sc)) 

52 

53 return lcodes 

54 

55 codes = to_codes(kind_id, codes) 

56 

57 if kind_id == model.STATION: 

58 return [codes, codes.replace(location='[*]')] 

59 else: 

60 return [codes] 

61 

62 

63def blocks(tmin, tmax, deltat, nsamples_block=100000): 

64 tblock = util.to_time_float(deltat * nsamples_block) 

65 iblock_min = int(math.floor(tmin / tblock)) 

66 iblock_max = int(math.ceil(tmax / tblock)) 

67 for iblock in range(iblock_min, iblock_max): 

68 yield iblock * tblock, (iblock+1) * tblock 

69 

70 

71def gaps(avail, tmin, tmax): 

72 assert tmin < tmax 

73 

74 data = [(tmax, 1), (tmin, -1)] 

75 for (tmin_a, tmax_a) in avail: 

76 assert tmin_a < tmax_a 

77 data.append((tmin_a, 1)) 

78 data.append((tmax_a, -1)) 

79 

80 data.sort() 

81 s = 1 

82 gaps = [] 

83 tmin_g = None 

84 for t, x in data: 

85 if s == 1 and x == -1: 

86 tmin_g = t 

87 elif s == 0 and x == 1 and tmin_g is not None: 

88 tmax_g = t 

89 if tmin_g != tmax_g: 

90 gaps.append((tmin_g, tmax_g)) 

91 

92 s += x 

93 

94 return gaps 

95 

96 

97def order_key(order): 

98 return (order.codes, order.tmin, order.tmax) 

99 

100 

101class Batch(object): 

102 ''' 

103 Batch of waveforms from window-wise data extraction. 

104 

105 Encapsulates state and results yielded for each window in window-wise 

106 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

107 

108 *Attributes:* 

109 

110 .. py:attribute:: tmin 

111 

112 Start of this time window. 

113 

114 .. py:attribute:: tmax 

115 

116 End of this time window. 

117 

118 .. py:attribute:: i 

119 

120 Index of this time window in sequence. 

121 

122 .. py:attribute:: n 

123 

124 Total number of time windows in sequence. 

125 

126 .. py:attribute:: traces 

127 

128 Extracted waveforms for this time window. 

129 ''' 

130 

131 def __init__(self, tmin, tmax, i, n, traces): 

132 self.tmin = tmin 

133 self.tmax = tmax 

134 self.i = i 

135 self.n = n 

136 self.traces = traces 

137 

138 

139class Squirrel(Selection): 

140 ''' 

141 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

142 

143 :param env: 

144 Squirrel environment instance or directory path to use as starting 

145 point for its detection. By default, the current directory is used as 

146 starting point. When searching for a usable environment the directory 

147 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

148 directory is used if it exists, otherwise the parent directories are 

149 search upwards for the existence of such a directory. If no such 

150 directory is found, the user's global Squirrel environment 

151 ``'$HOME/.pyrocko/squirrel'`` is used. 

152 :type env: 

153 :py:class:`~pyrocko.squirrel.environment.Environment` or 

154 :py:class:`str` 

155 

156 :param database: 

157 Database instance or path to database. By default the 

158 database found in the detected Squirrel environment is used. 

159 :type database: 

160 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

161 

162 :param cache_path: 

163 Directory path to use for data caching. By default, the ``'cache'`` 

164 directory in the detected Squirrel environment is used. 

165 :type cache_path: 

166 :py:class:`str` 

167 

168 :param persistent: 

169 If given a name, create a persistent selection. 

170 :type persistent: 

171 :py:class:`str` 

172 

173 This is the central class of the Squirrel framework. It provides a unified 

174 interface to query and access seismic waveforms, station meta-data and 

175 event information from local file collections and remote data sources. For 

176 prompt responses, a profound database setup is used under the hood. To 

177 speed up assemblage of ad-hoc data selections, files are indexed on first 

178 use and the extracted meta-data is remembered in the database for 

179 subsequent accesses. Bulk data is lazily loaded from disk and remote 

180 sources, just when requested. Once loaded, data is cached in memory to 

181 expedite typical access patterns. Files and data sources can be dynamically 

182 added to and removed from the Squirrel selection at runtime. 

183 

184 Queries are restricted to the contents of the files currently added to the 

185 Squirrel selection (usually a subset of the file meta-information 

186 collection in the database). This list of files is referred to here as the 

187 "selection". By default, temporary tables are created in the attached 

188 database to hold the names of the files in the selection as well as various 

189 indices and counters. These tables are only visible inside the application 

190 which created them and are deleted when the database connection is closed 

191 or the application exits. To create a selection which is not deleted at 

192 exit, supply a name to the ``persistent`` argument of the Squirrel 

193 constructor. Persistent selections are shared among applications using the 

194 same database. 

195 

196 **Method summary** 

197 

198 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

199 :py:class:`~pyrocko.squirrel.selection.Selection`. 

200 

201 .. autosummary:: 

202 

203 ~Squirrel.add 

204 ~Squirrel.add_source 

205 ~Squirrel.add_fdsn 

206 ~Squirrel.add_catalog 

207 ~Squirrel.add_dataset 

208 ~Squirrel.add_virtual 

209 ~Squirrel.update 

210 ~Squirrel.update_waveform_promises 

211 ~Squirrel.advance_accessor 

212 ~Squirrel.clear_accessor 

213 ~Squirrel.reload 

214 ~pyrocko.squirrel.selection.Selection.iter_paths 

215 ~Squirrel.iter_nuts 

216 ~Squirrel.iter_kinds 

217 ~Squirrel.iter_deltats 

218 ~Squirrel.iter_codes 

219 ~pyrocko.squirrel.selection.Selection.get_paths 

220 ~Squirrel.get_nuts 

221 ~Squirrel.get_kinds 

222 ~Squirrel.get_deltats 

223 ~Squirrel.get_codes 

224 ~Squirrel.get_counts 

225 ~Squirrel.get_time_span 

226 ~Squirrel.get_deltat_span 

227 ~Squirrel.get_nfiles 

228 ~Squirrel.get_nnuts 

229 ~Squirrel.get_total_size 

230 ~Squirrel.get_stats 

231 ~Squirrel.get_content 

232 ~Squirrel.get_stations 

233 ~Squirrel.get_channels 

234 ~Squirrel.get_responses 

235 ~Squirrel.get_events 

236 ~Squirrel.get_waveform_nuts 

237 ~Squirrel.get_waveforms 

238 ~Squirrel.chopper_waveforms 

239 ~Squirrel.get_coverage 

240 ~Squirrel.pile 

241 ~Squirrel.snuffle 

242 ~Squirrel.glob_codes 

243 ~pyrocko.squirrel.selection.Selection.get_database 

244 ~Squirrel.print_tables 

245 ''' 

246 

247 def __init__( 

248 self, env=None, database=None, cache_path=None, persistent=None): 

249 

250 if not isinstance(env, environment.Environment): 

251 env = environment.get_environment(env) 

252 

253 if database is None: 

254 database = env.expand_path(env.database_path) 

255 

256 if cache_path is None: 

257 cache_path = env.expand_path(env.cache_path) 

258 

259 if persistent is None: 

260 persistent = env.persistent 

261 

262 Selection.__init__( 

263 self, database=database, persistent=persistent) 

264 

265 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

266 

267 self._content_caches = { 

268 'waveform': cache.ContentCache(), 

269 'default': cache.ContentCache()} 

270 

271 self._cache_path = cache_path 

272 

273 self._sources = [] 

274 self._operators = [] 

275 self._operator_registry = {} 

276 

277 self._pile = None 

278 self._n_choppers_active = 0 

279 

280 self._names.update({ 

281 'nuts': self.name + '_nuts', 

282 'kind_codes_count': self.name + '_kind_codes_count', 

283 'coverage': self.name + '_coverage'}) 

284 

285 with self.transaction('create tables') as cursor: 

286 self._create_tables_squirrel(cursor) 

287 

288 def _create_tables_squirrel(self, cursor): 

289 

290 cursor.execute(self._register_table(self._sql( 

291 ''' 

292 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

293 nut_id integer PRIMARY KEY, 

294 file_id integer, 

295 file_segment integer, 

296 file_element integer, 

297 kind_id integer, 

298 kind_codes_id integer, 

299 tmin_seconds integer, 

300 tmin_offset integer, 

301 tmax_seconds integer, 

302 tmax_offset integer, 

303 kscale integer) 

304 '''))) 

305 

306 cursor.execute(self._register_table(self._sql( 

307 ''' 

308 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

309 kind_codes_id integer PRIMARY KEY, 

310 count integer) 

311 '''))) 

312 

313 cursor.execute(self._sql( 

314 ''' 

315 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

316 ON %(nuts)s (file_id, file_segment, file_element) 

317 ''')) 

318 

319 cursor.execute(self._sql( 

320 ''' 

321 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

322 ON %(nuts)s (file_id) 

323 ''')) 

324 

325 cursor.execute(self._sql( 

326 ''' 

327 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

328 ON %(nuts)s (kind_id, tmin_seconds) 

329 ''')) 

330 

331 cursor.execute(self._sql( 

332 ''' 

333 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

334 ON %(nuts)s (kind_id, tmax_seconds) 

335 ''')) 

336 

337 cursor.execute(self._sql( 

338 ''' 

339 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

340 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

341 ''')) 

342 

343 cursor.execute(self._sql( 

344 ''' 

345 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

346 BEFORE DELETE ON main.files FOR EACH ROW 

347 BEGIN 

348 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

349 END 

350 ''')) 

351 

352 # trigger only on size to make silent update of mtime possible 

353 cursor.execute(self._sql( 

354 ''' 

355 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

356 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

357 BEGIN 

358 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

359 END 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE TRIGGER IF NOT EXISTS 

365 %(db)s.%(file_states)s_delete_files 

366 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

367 BEGIN 

368 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

369 END 

370 ''')) 

371 

372 cursor.execute(self._sql( 

373 ''' 

374 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

375 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

376 BEGIN 

377 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

378 (new.kind_codes_id, 0); 

379 UPDATE %(kind_codes_count)s 

380 SET count = count + 1 

381 WHERE new.kind_codes_id 

382 == %(kind_codes_count)s.kind_codes_id; 

383 END 

384 ''')) 

385 

386 cursor.execute(self._sql( 

387 ''' 

388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

389 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

390 BEGIN 

391 UPDATE %(kind_codes_count)s 

392 SET count = count - 1 

393 WHERE old.kind_codes_id 

394 == %(kind_codes_count)s.kind_codes_id; 

395 END 

396 ''')) 

397 

398 cursor.execute(self._register_table(self._sql( 

399 ''' 

400 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

401 kind_codes_id integer, 

402 time_seconds integer, 

403 time_offset integer, 

404 step integer) 

405 '''))) 

406 

407 cursor.execute(self._sql( 

408 ''' 

409 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

410 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

411 ''')) 

412 

413 cursor.execute(self._sql( 

414 ''' 

415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

416 AFTER INSERT ON %(nuts)s FOR EACH ROW 

417 BEGIN 

418 INSERT OR IGNORE INTO %(coverage)s VALUES 

419 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

420 ; 

421 UPDATE %(coverage)s 

422 SET step = step + 1 

423 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

424 AND new.tmin_seconds == %(coverage)s.time_seconds 

425 AND new.tmin_offset == %(coverage)s.time_offset 

426 ; 

427 INSERT OR IGNORE INTO %(coverage)s VALUES 

428 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

429 ; 

430 UPDATE %(coverage)s 

431 SET step = step - 1 

432 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

433 AND new.tmax_seconds == %(coverage)s.time_seconds 

434 AND new.tmax_offset == %(coverage)s.time_offset 

435 ; 

436 DELETE FROM %(coverage)s 

437 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

438 AND new.tmin_seconds == %(coverage)s.time_seconds 

439 AND new.tmin_offset == %(coverage)s.time_offset 

440 AND step == 0 

441 ; 

442 DELETE FROM %(coverage)s 

443 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

444 AND new.tmax_seconds == %(coverage)s.time_seconds 

445 AND new.tmax_offset == %(coverage)s.time_offset 

446 AND step == 0 

447 ; 

448 END 

449 ''')) 

450 

451 cursor.execute(self._sql( 

452 ''' 

453 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

454 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

455 BEGIN 

456 INSERT OR IGNORE INTO %(coverage)s VALUES 

457 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

458 ; 

459 UPDATE %(coverage)s 

460 SET step = step - 1 

461 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

462 AND old.tmin_seconds == %(coverage)s.time_seconds 

463 AND old.tmin_offset == %(coverage)s.time_offset 

464 ; 

465 INSERT OR IGNORE INTO %(coverage)s VALUES 

466 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

467 ; 

468 UPDATE %(coverage)s 

469 SET step = step + 1 

470 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

471 AND old.tmax_seconds == %(coverage)s.time_seconds 

472 AND old.tmax_offset == %(coverage)s.time_offset 

473 ; 

474 DELETE FROM %(coverage)s 

475 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

476 AND old.tmin_seconds == %(coverage)s.time_seconds 

477 AND old.tmin_offset == %(coverage)s.time_offset 

478 AND step == 0 

479 ; 

480 DELETE FROM %(coverage)s 

481 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

482 AND old.tmax_seconds == %(coverage)s.time_seconds 

483 AND old.tmax_offset == %(coverage)s.time_offset 

484 AND step == 0 

485 ; 

486 END 

487 ''')) 

488 

489 def _delete(self): 

490 '''Delete database tables associated with this Squirrel.''' 

491 

492 with self.transaction('delete tables') as cursor: 

493 for s in ''' 

494 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

495 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

496 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

497 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

498 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

499 DROP TABLE %(db)s.%(nuts)s; 

500 DROP TABLE %(db)s.%(kind_codes_count)s; 

501 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

502 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

503 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

504 '''.strip().splitlines(): 

505 

506 cursor.execute(self._sql(s)) 

507 

508 Selection._delete(self) 

509 

510 @filldocs 

511 def add(self, 

512 paths, 

513 kinds=None, 

514 format='detect', 

515 include=None, 

516 exclude=None, 

517 check=True): 

518 

519 ''' 

520 Add files to the selection. 

521 

522 :param paths: 

523 Iterator yielding paths to files or directories to be added to the 

524 selection. Recurses into directories. If given a ``str``, it 

525 is treated as a single path to be added. 

526 :type paths: 

527 :py:class:`list` of :py:class:`str` 

528 

529 :param kinds: 

530 Content types to be made available through the Squirrel selection. 

531 By default, all known content types are accepted. 

532 :type kinds: 

533 :py:class:`list` of :py:class:`str` 

534 

535 :param format: 

536 File format identifier or ``'detect'`` to enable auto-detection 

537 (available: %(file_formats)s). 

538 :type format: 

539 str 

540 

541 :param include: 

542 If not ``None``, files are only included if their paths match the 

543 given regular expression pattern. 

544 :type format: 

545 str 

546 

547 :param exclude: 

548 If not ``None``, files are only included if their paths do not 

549 match the given regular expression pattern. 

550 :type format: 

551 str 

552 

553 :param check: 

554 If ``True``, all file modification times are checked to see if 

555 cached information has to be updated (slow). If ``False``, only 

556 previously unknown files are indexed and cached information is used 

557 for known files, regardless of file state (fast, corrresponds to 

558 Squirrel's ``--optimistic`` mode). File deletions will go 

559 undetected in the latter case. 

560 :type check: 

561 bool 

562 

563 :Complexity: 

564 O(log N) 

565 ''' 

566 

567 if isinstance(kinds, str): 

568 kinds = (kinds,) 

569 

570 if isinstance(paths, str): 

571 paths = [paths] 

572 

573 kind_mask = model.to_kind_mask(kinds) 

574 

575 with progress.view(): 

576 Selection.add( 

577 self, util.iter_select_files( 

578 paths, 

579 show_progress=False, 

580 include=include, 

581 exclude=exclude, 

582 pass_through=lambda path: path.startswith('virtual:') 

583 ), kind_mask, format) 

584 

585 self._load(check) 

586 self._update_nuts() 

587 

588 def reload(self): 

589 ''' 

590 Check for modifications and reindex modified files. 

591 

592 Based on file modification times. 

593 ''' 

594 

595 self._set_file_states_force_check() 

596 self._load(check=True) 

597 self._update_nuts() 

598 

599 def add_virtual(self, nuts, virtual_paths=None): 

600 ''' 

601 Add content which is not backed by files. 

602 

603 :param nuts: 

604 Content pieces to be added. 

605 :type nuts: 

606 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

607 

608 :param virtual_paths: 

609 List of virtual paths to prevent creating a temporary list of the 

610 nuts while aggregating the file paths for the selection. 

611 :type virtual_paths: 

612 :py:class:`list` of :py:class:`str` 

613 

614 Stores to the main database and the selection. 

615 ''' 

616 

617 if isinstance(virtual_paths, str): 

618 virtual_paths = [virtual_paths] 

619 

620 if virtual_paths is None: 

621 if not isinstance(nuts, list): 

622 nuts = list(nuts) 

623 virtual_paths = set(nut.file_path for nut in nuts) 

624 

625 Selection.add(self, virtual_paths) 

626 self.get_database().dig(nuts) 

627 self._update_nuts() 

628 

629 def add_volatile(self, nuts): 

630 if not isinstance(nuts, list): 

631 nuts = list(nuts) 

632 

633 paths = list(set(nut.file_path for nut in nuts)) 

634 io.backends.virtual.add_nuts(nuts) 

635 self.add_virtual(nuts, paths) 

636 self._volatile_paths.extend(paths) 

637 

638 def add_volatile_waveforms(self, traces): 

639 ''' 

640 Add in-memory waveforms which will be removed when the app closes. 

641 ''' 

642 

643 name = model.random_name() 

644 

645 path = 'virtual:volatile:%s' % name 

646 

647 nuts = [] 

648 for itr, tr in enumerate(traces): 

649 assert tr.tmin <= tr.tmax 

650 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

651 tmax_seconds, tmax_offset = model.tsplit( 

652 tr.tmin + tr.data_len()*tr.deltat) 

653 

654 nuts.append(model.Nut( 

655 file_path=path, 

656 file_format='virtual', 

657 file_segment=itr, 

658 file_element=0, 

659 file_mtime=0, 

660 codes=tr.codes, 

661 tmin_seconds=tmin_seconds, 

662 tmin_offset=tmin_offset, 

663 tmax_seconds=tmax_seconds, 

664 tmax_offset=tmax_offset, 

665 deltat=tr.deltat, 

666 kind_id=to_kind_id('waveform'), 

667 content=tr)) 

668 

669 self.add_volatile(nuts) 

670 return path 

671 

672 def _load(self, check): 

673 for _ in io.iload( 

674 self, 

675 content=[], 

676 skip_unchanged=True, 

677 check=check): 

678 pass 

679 

680 def _update_nuts(self, transaction=None): 

681 transaction = transaction or self.transaction('update nuts') 

682 with make_task('Aggregating selection') as task, \ 

683 transaction as cursor: 

684 

685 self._conn.set_progress_handler(task.update, 100000) 

686 nrows = cursor.execute(self._sql( 

687 ''' 

688 INSERT INTO %(db)s.%(nuts)s 

689 SELECT NULL, 

690 nuts.file_id, nuts.file_segment, nuts.file_element, 

691 nuts.kind_id, nuts.kind_codes_id, 

692 nuts.tmin_seconds, nuts.tmin_offset, 

693 nuts.tmax_seconds, nuts.tmax_offset, 

694 nuts.kscale 

695 FROM %(db)s.%(file_states)s 

696 INNER JOIN nuts 

697 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

698 INNER JOIN kind_codes 

699 ON nuts.kind_codes_id == 

700 kind_codes.kind_codes_id 

701 WHERE %(db)s.%(file_states)s.file_state != 2 

702 AND (((1 << kind_codes.kind_id) 

703 & %(db)s.%(file_states)s.kind_mask) != 0) 

704 ''')).rowcount 

705 

706 task.update(nrows) 

707 self._set_file_states_known(transaction) 

708 self._conn.set_progress_handler(None, 0) 

709 

710 def add_source(self, source, check=True): 

711 ''' 

712 Add remote resource. 

713 

714 :param source: 

715 Remote data access client instance. 

716 :type source: 

717 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

718 ''' 

719 

720 self._sources.append(source) 

721 source.setup(self, check=check) 

722 

723 def add_fdsn(self, *args, **kwargs): 

724 ''' 

725 Add FDSN site for transparent remote data access. 

726 

727 Arguments are passed to 

728 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

729 ''' 

730 

731 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

732 

733 def add_catalog(self, *args, **kwargs): 

734 ''' 

735 Add online catalog for transparent event data access. 

736 

737 Arguments are passed to 

738 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

739 ''' 

740 

741 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

742 

743 def add_dataset(self, ds, check=True, warn_persistent=True): 

744 ''' 

745 Read dataset description from file and add its contents. 

746 

747 :param ds: 

748 Path to dataset description file or dataset description object 

749 . See :py:mod:`~pyrocko.squirrel.dataset`. 

750 :type ds: 

751 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

752 

753 :param check: 

754 If ``True``, all file modification times are checked to see if 

755 cached information has to be updated (slow). If ``False``, only 

756 previously unknown files are indexed and cached information is used 

757 for known files, regardless of file state (fast, corrresponds to 

758 Squirrel's ``--optimistic`` mode). File deletions will go 

759 undetected in the latter case. 

760 :type check: 

761 bool 

762 ''' 

763 if isinstance(ds, str): 

764 ds = dataset.read_dataset(ds) 

765 path = ds 

766 else: 

767 path = None 

768 

769 if warn_persistent and ds.persistent and ( 

770 not self._persistent or (self._persistent != ds.persistent)): 

771 

772 logger.warning( 

773 'Dataset `persistent` flag ignored. Can not be set on already ' 

774 'existing Squirrel instance.%s' % ( 

775 ' Dataset: %s' % path if path else '')) 

776 

777 ds.setup(self, check=check) 

778 

779 def _get_selection_args( 

780 self, kind_id, 

781 obj=None, tmin=None, tmax=None, time=None, codes=None): 

782 

783 if codes is not None: 

784 codes = to_codes(kind_id, codes) 

785 

786 if time is not None: 

787 tmin = time 

788 tmax = time 

789 

790 if obj is not None: 

791 tmin = tmin if tmin is not None else obj.tmin 

792 tmax = tmax if tmax is not None else obj.tmax 

793 codes = codes if codes is not None else obj.codes 

794 

795 return tmin, tmax, codes 

796 

797 def _selection_args_to_kwargs( 

798 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

799 

800 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

801 

802 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

803 

804 tmin_seconds, tmin_offset = model.tsplit(tmin) 

805 tmax_seconds, tmax_offset = model.tsplit(tmax) 

806 if naiv: 

807 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

808 args.append(tmax_seconds) 

809 else: 

810 tscale_edges = model.tscale_edges 

811 tmin_cond = [] 

812 for kscale in range(tscale_edges.size + 1): 

813 if kscale != tscale_edges.size: 

814 tscale = int(tscale_edges[kscale]) 

815 tmin_cond.append(''' 

816 (%(db)s.%(nuts)s.kind_id = ? 

817 AND %(db)s.%(nuts)s.kscale == ? 

818 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

819 ''') 

820 args.extend( 

821 (to_kind_id(kind), kscale, 

822 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

823 

824 else: 

825 tmin_cond.append(''' 

826 (%(db)s.%(nuts)s.kind_id == ? 

827 AND %(db)s.%(nuts)s.kscale == ? 

828 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

829 ''') 

830 

831 args.extend( 

832 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

833 if tmin_cond: 

834 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

835 

836 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

837 args.append(tmin_seconds) 

838 

839 def iter_nuts( 

840 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

841 kind_codes_ids=None, path=None): 

842 

843 ''' 

844 Iterate over content entities matching given constraints. 

845 

846 :param kind: 

847 Content kind (or kinds) to extract. 

848 :type kind: 

849 :py:class:`str`, :py:class:`list` of :py:class:`str` 

850 

851 :param tmin: 

852 Start time of query interval. 

853 :type tmin: 

854 timestamp 

855 

856 :param tmax: 

857 End time of query interval. 

858 :type tmax: 

859 timestamp 

860 

861 :param codes: 

862 Pattern of content codes to query. 

863 :type codes: 

864 :py:class:`tuple` of :py:class:`str` 

865 

866 :param naiv: 

867 Bypass time span lookup through indices (slow, for testing). 

868 :type naiv: 

869 :py:class:`bool` 

870 

871 :param kind_codes_ids: 

872 Kind-codes IDs of contents to be retrieved (internal use). 

873 :type kind_codes_ids: 

874 :py:class:`list` of :py:class:`int` 

875 

876 :yields: 

877 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

878 intersecting content. 

879 

880 :complexity: 

881 O(log N) for the time selection part due to heavy use of database 

882 indices. 

883 

884 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

885 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

886 closed-interval so that content intersecting with the time instant ``t 

887 = tmin = tmax`` is returned (otherwise nothing would be returned as 

888 ``[t, t)`` never matches anything). 

889 

890 Time spans of content entities to be matched are also treated as half 

891 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

892 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

893 modified to closed-interval when the content time span is an empty 

894 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

895 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

896 ''' 

897 

898 if not isinstance(kind, str): 

899 if kind is None: 

900 kind = model.g_content_kinds 

901 for kind_ in kind: 

902 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

903 yield nut 

904 

905 return 

906 

907 kind_id = to_kind_id(kind) 

908 

909 cond = [] 

910 args = [] 

911 if tmin is not None or tmax is not None: 

912 assert kind is not None 

913 if tmin is None: 

914 tmin = self.get_time_span()[0] 

915 if tmax is None: 

916 tmax = self.get_time_span()[1] + 1.0 

917 

918 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

919 

920 cond.append('kind_codes.kind_id == ?') 

921 args.append(kind_id) 

922 

923 if codes is not None: 

924 pats = codes_patterns_for_kind(kind_id, codes) 

925 if pats: 

926 cond.append( 

927 ' ( %s ) ' % ' OR '.join( 

928 ('kind_codes.codes GLOB ?',) * len(pats))) 

929 args.extend(pat.safe_str for pat in pats) 

930 

931 if kind_codes_ids is not None: 

932 cond.append( 

933 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

934 '?'*len(kind_codes_ids))) 

935 

936 args.extend(kind_codes_ids) 

937 

938 db = self.get_database() 

939 if path is not None: 

940 cond.append('files.path == ?') 

941 args.append(db.relpath(abspath(path))) 

942 

943 sql = (''' 

944 SELECT 

945 files.path, 

946 files.format, 

947 files.mtime, 

948 files.size, 

949 %(db)s.%(nuts)s.file_segment, 

950 %(db)s.%(nuts)s.file_element, 

951 kind_codes.kind_id, 

952 kind_codes.codes, 

953 %(db)s.%(nuts)s.tmin_seconds, 

954 %(db)s.%(nuts)s.tmin_offset, 

955 %(db)s.%(nuts)s.tmax_seconds, 

956 %(db)s.%(nuts)s.tmax_offset, 

957 kind_codes.deltat 

958 FROM files 

959 INNER JOIN %(db)s.%(nuts)s 

960 ON files.file_id == %(db)s.%(nuts)s.file_id 

961 INNER JOIN kind_codes 

962 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

963 ''') 

964 

965 if cond: 

966 sql += ''' WHERE ''' + ' AND '.join(cond) 

967 

968 sql = self._sql(sql) 

969 if tmin is None and tmax is None: 

970 for row in self._conn.execute(sql, args): 

971 row = (db.abspath(row[0]),) + row[1:] 

972 nut = model.Nut(values_nocheck=row) 

973 yield nut 

974 else: 

975 assert tmin is not None and tmax is not None 

976 if tmin == tmax: 

977 for row in self._conn.execute(sql, args): 

978 row = (db.abspath(row[0]),) + row[1:] 

979 nut = model.Nut(values_nocheck=row) 

980 if (nut.tmin <= tmin < nut.tmax) \ 

981 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

982 

983 yield nut 

984 else: 

985 for row in self._conn.execute(sql, args): 

986 row = (db.abspath(row[0]),) + row[1:] 

987 nut = model.Nut(values_nocheck=row) 

988 if (tmin < nut.tmax and nut.tmin < tmax) \ 

989 or (nut.tmin == nut.tmax 

990 and tmin <= nut.tmin < tmax): 

991 

992 yield nut 

993 

994 def get_nuts(self, *args, **kwargs): 

995 ''' 

996 Get content entities matching given constraints. 

997 

998 Like :py:meth:`iter_nuts` but returns results as a list. 

999 ''' 

1000 

1001 return list(self.iter_nuts(*args, **kwargs)) 

1002 

1003 def _split_nuts( 

1004 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1005 

1006 kind_id = to_kind_id(kind) 

1007 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1008 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1009 

1010 names_main_nuts = dict(self._names) 

1011 names_main_nuts.update(db='main', nuts='nuts') 

1012 

1013 db = self.get_database() 

1014 

1015 def main_nuts(s): 

1016 return s % names_main_nuts 

1017 

1018 with self.transaction('split nuts') as cursor: 

1019 # modify selection and main 

1020 for sql_subst in [ 

1021 self._sql, main_nuts]: 

1022 

1023 cond = [] 

1024 args = [] 

1025 

1026 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1027 

1028 if codes is not None: 

1029 pats = codes_patterns_for_kind(kind_id, codes) 

1030 if pats: 

1031 cond.append( 

1032 ' ( %s ) ' % ' OR '.join( 

1033 ('kind_codes.codes GLOB ?',) * len(pats))) 

1034 args.extend(pat.safe_str for pat in pats) 

1035 

1036 if path is not None: 

1037 cond.append('files.path == ?') 

1038 args.append(db.relpath(abspath(path))) 

1039 

1040 sql = sql_subst(''' 

1041 SELECT 

1042 %(db)s.%(nuts)s.nut_id, 

1043 %(db)s.%(nuts)s.tmin_seconds, 

1044 %(db)s.%(nuts)s.tmin_offset, 

1045 %(db)s.%(nuts)s.tmax_seconds, 

1046 %(db)s.%(nuts)s.tmax_offset, 

1047 kind_codes.deltat 

1048 FROM files 

1049 INNER JOIN %(db)s.%(nuts)s 

1050 ON files.file_id == %(db)s.%(nuts)s.file_id 

1051 INNER JOIN kind_codes 

1052 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1053 WHERE ''' + ' AND '.join(cond)) # noqa 

1054 

1055 insert = [] 

1056 delete = [] 

1057 for row in cursor.execute(sql, args): 

1058 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1059 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1060 

1061 nut_tmin = model.tjoin( 

1062 nut_tmin_seconds, nut_tmin_offset) 

1063 nut_tmax = model.tjoin( 

1064 nut_tmax_seconds, nut_tmax_offset) 

1065 

1066 if nut_tmin < tmax and tmin < nut_tmax: 

1067 if nut_tmin < tmin: 

1068 insert.append(( 

1069 nut_tmin_seconds, nut_tmin_offset, 

1070 tmin_seconds, tmin_offset, 

1071 model.tscale_to_kscale( 

1072 tmin_seconds - nut_tmin_seconds), 

1073 nut_id)) 

1074 

1075 if tmax < nut_tmax: 

1076 insert.append(( 

1077 tmax_seconds, tmax_offset, 

1078 nut_tmax_seconds, nut_tmax_offset, 

1079 model.tscale_to_kscale( 

1080 nut_tmax_seconds - tmax_seconds), 

1081 nut_id)) 

1082 

1083 delete.append((nut_id,)) 

1084 

1085 sql_add = ''' 

1086 INSERT INTO %(db)s.%(nuts)s ( 

1087 file_id, file_segment, file_element, kind_id, 

1088 kind_codes_id, tmin_seconds, tmin_offset, 

1089 tmax_seconds, tmax_offset, kscale ) 

1090 SELECT 

1091 file_id, file_segment, file_element, 

1092 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1093 FROM %(db)s.%(nuts)s 

1094 WHERE nut_id == ? 

1095 ''' 

1096 cursor.executemany(sql_subst(sql_add), insert) 

1097 

1098 sql_delete = ''' 

1099 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1100 ''' 

1101 cursor.executemany(sql_subst(sql_delete), delete) 

1102 

1103 def get_time_span(self, kinds=None): 

1104 ''' 

1105 Get time interval over all content in selection. 

1106 

1107 :param kinds: 

1108 If not ``None``, restrict query to given content kinds. 

1109 :type kind: 

1110 list of str 

1111 

1112 :complexity: 

1113 O(1), independent of the number of nuts. 

1114 

1115 :returns: 

1116 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1117 ''' 

1118 

1119 sql_min = self._sql(''' 

1120 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1121 FROM %(db)s.%(nuts)s 

1122 WHERE kind_id == ? 

1123 AND tmin_seconds == ( 

1124 SELECT MIN(tmin_seconds) 

1125 FROM %(db)s.%(nuts)s 

1126 WHERE kind_id == ?) 

1127 ''') 

1128 

1129 sql_max = self._sql(''' 

1130 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1131 FROM %(db)s.%(nuts)s 

1132 WHERE kind_id == ? 

1133 AND tmax_seconds == ( 

1134 SELECT MAX(tmax_seconds) 

1135 FROM %(db)s.%(nuts)s 

1136 WHERE kind_id == ?) 

1137 ''') 

1138 

1139 gtmin = None 

1140 gtmax = None 

1141 

1142 if isinstance(kinds, str): 

1143 kinds = [kinds] 

1144 

1145 if kinds is None: 

1146 kind_ids = model.g_content_kind_ids 

1147 else: 

1148 kind_ids = model.to_kind_ids(kinds) 

1149 

1150 for kind_id in kind_ids: 

1151 for tmin_seconds, tmin_offset in self._conn.execute( 

1152 sql_min, (kind_id, kind_id)): 

1153 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1154 if tmin is not None and (gtmin is None or tmin < gtmin): 

1155 gtmin = tmin 

1156 

1157 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1158 sql_max, (kind_id, kind_id)): 

1159 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1160 if tmax is not None and (gtmax is None or tmax > gtmax): 

1161 gtmax = tmax 

1162 

1163 return gtmin, gtmax 

1164 

1165 def has(self, kinds): 

1166 ''' 

1167 Check availability of given content kinds. 

1168 

1169 :param kinds: 

1170 Content kinds to query. 

1171 :type kind: 

1172 list of str 

1173 

1174 :returns: 

1175 ``True`` if any of the queried content kinds is available 

1176 in the selection. 

1177 ''' 

1178 self_tmin, self_tmax = self.get_time_span(kinds) 

1179 

1180 return None not in (self_tmin, self_tmax) 

1181 

1182 def get_deltat_span(self, kind): 

1183 ''' 

1184 Get min and max sampling interval of all content of given kind. 

1185 

1186 :param kind: 

1187 Content kind 

1188 :type kind: 

1189 str 

1190 

1191 :returns: ``(deltat_min, deltat_max)`` 

1192 ''' 

1193 

1194 deltats = [ 

1195 deltat for deltat in self.get_deltats(kind) 

1196 if deltat is not None] 

1197 

1198 if deltats: 

1199 return min(deltats), max(deltats) 

1200 else: 

1201 return None, None 

1202 

1203 def iter_kinds(self, codes=None): 

1204 ''' 

1205 Iterate over content types available in selection. 

1206 

1207 :param codes: 

1208 If given, get kinds only for selected codes identifier. 

1209 :type codes: 

1210 :py:class:`tuple` of :py:class:`str` 

1211 

1212 :yields: 

1213 Available content kinds as :py:class:`str`. 

1214 

1215 :complexity: 

1216 O(1), independent of number of nuts. 

1217 ''' 

1218 

1219 return self._database._iter_kinds( 

1220 codes=codes, 

1221 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1222 

1223 def iter_deltats(self, kind=None): 

1224 ''' 

1225 Iterate over sampling intervals available in selection. 

1226 

1227 :param kind: 

1228 If given, get sampling intervals only for a given content type. 

1229 :type kind: 

1230 str 

1231 

1232 :yields: 

1233 :py:class:`float` values. 

1234 

1235 :complexity: 

1236 O(1), independent of number of nuts. 

1237 ''' 

1238 return self._database._iter_deltats( 

1239 kind=kind, 

1240 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1241 

1242 def iter_codes(self, kind=None): 

1243 ''' 

1244 Iterate over content identifier code sequences available in selection. 

1245 

1246 :param kind: 

1247 If given, get codes only for a given content type. 

1248 :type kind: 

1249 str 

1250 

1251 :yields: 

1252 :py:class:`tuple` of :py:class:`str` 

1253 

1254 :complexity: 

1255 O(1), independent of number of nuts. 

1256 ''' 

1257 return self._database._iter_codes( 

1258 kind=kind, 

1259 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1260 

1261 def _iter_codes_info(self, kind=None): 

1262 ''' 

1263 Iterate over number of occurrences of any (kind, codes) combination. 

1264 

1265 :param kind: 

1266 If given, get counts only for selected content type. 

1267 :type kind: 

1268 str 

1269 

1270 :yields: 

1271 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1272 

1273 :complexity: 

1274 O(1), independent of number of nuts. 

1275 ''' 

1276 return self._database._iter_codes_info( 

1277 kind=kind, 

1278 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1279 

1280 def get_kinds(self, codes=None): 

1281 ''' 

1282 Get content types available in selection. 

1283 

1284 :param codes: 

1285 If given, get kinds only for selected codes identifier. 

1286 :type codes: 

1287 :py:class:`tuple` of :py:class:`str` 

1288 

1289 :returns: 

1290 Sorted list of available content types. 

1291 

1292 :complexity: 

1293 O(1), independent of number of nuts. 

1294 

1295 ''' 

1296 return sorted(list(self.iter_kinds(codes=codes))) 

1297 

1298 def get_deltats(self, kind=None): 

1299 ''' 

1300 Get sampling intervals available in selection. 

1301 

1302 :param kind: 

1303 If given, get sampling intervals only for selected content type. 

1304 :type kind: 

1305 str 

1306 

1307 :complexity: 

1308 O(1), independent of number of nuts. 

1309 

1310 :returns: Sorted list of available sampling intervals. 

1311 ''' 

1312 return sorted(list(self.iter_deltats(kind=kind))) 

1313 

1314 def get_codes(self, kind=None): 

1315 ''' 

1316 Get identifier code sequences available in selection. 

1317 

1318 :param kind: 

1319 If given, get codes only for selected content type. 

1320 :type kind: 

1321 str 

1322 

1323 :complexity: 

1324 O(1), independent of number of nuts. 

1325 

1326 :returns: Sorted list of available codes as tuples of strings. 

1327 ''' 

1328 return sorted(list(self.iter_codes(kind=kind))) 

1329 

1330 def get_counts(self, kind=None): 

1331 ''' 

1332 Get number of occurrences of any (kind, codes) combination. 

1333 

1334 :param kind: 

1335 If given, get codes only for selected content type. 

1336 :type kind: 

1337 str 

1338 

1339 :complexity: 

1340 O(1), independent of number of nuts. 

1341 

1342 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1343 if kind is not ``None`` 

1344 ''' 

1345 d = {} 

1346 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1347 if kind_id not in d: 

1348 v = d[kind_id] = {} 

1349 else: 

1350 v = d[kind_id] 

1351 

1352 if codes not in v: 

1353 v[codes] = 0 

1354 

1355 v[codes] += count 

1356 

1357 if kind is not None: 

1358 return d[to_kind_id(kind)] 

1359 else: 

1360 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1361 

1362 def glob_codes(self, kind, codes_list): 

1363 ''' 

1364 Find codes matching given patterns. 

1365 

1366 :param kind: 

1367 Content kind to be queried. 

1368 :type kind: 

1369 str 

1370 

1371 :param codes_list: 

1372 List of code patterns to query. If not given or empty, an empty 

1373 list is returned. 

1374 :type codes_list: 

1375 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

1376 

1377 :returns: 

1378 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1379 ''' 

1380 

1381 kind_id = to_kind_id(kind) 

1382 args = [kind_id] 

1383 pats = [] 

1384 for codes in codes_list: 

1385 pats.extend(codes_patterns_for_kind(kind_id, codes)) 

1386 

1387 if pats: 

1388 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1389 ('kind_codes.codes GLOB ?',) * len(pats)) 

1390 

1391 args.extend(pat.safe_str for pat in pats) 

1392 else: 

1393 codes_cond = '' 

1394 

1395 sql = self._sql(''' 

1396 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1397 WHERE 

1398 kind_id == ? ''' + codes_cond) 

1399 

1400 return list(map(list, self._conn.execute(sql, args))) 

1401 

1402 def update(self, constraint=None, **kwargs): 

1403 ''' 

1404 Update or partially update channel and event inventories. 

1405 

1406 :param constraint: 

1407 Selection of times or areas to be brought up to date. 

1408 :type constraint: 

1409 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1410 

1411 :param \\*\\*kwargs: 

1412 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1413 

1414 This function triggers all attached remote sources, to check for 

1415 updates in the meta-data. The sources will only submit queries when 

1416 their expiration date has passed, or if the selection spans into 

1417 previously unseen times or areas. 

1418 ''' 

1419 

1420 if constraint is None: 

1421 constraint = client.Constraint(**kwargs) 

1422 

1423 for source in self._sources: 

1424 source.update_channel_inventory(self, constraint) 

1425 source.update_event_inventory(self, constraint) 

1426 

1427 def update_waveform_promises(self, constraint=None, **kwargs): 

1428 ''' 

1429 Permit downloading of remote waveforms. 

1430 

1431 :param constraint: 

1432 Remote waveforms compatible with the given constraint are enabled 

1433 for download. 

1434 :type constraint: 

1435 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1436 

1437 :param \\*\\*kwargs: 

1438 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1439 

1440 Calling this method permits Squirrel to download waveforms from remote 

1441 sources when processing subsequent waveform requests. This works by 

1442 inserting so called waveform promises into the database. It will look 

1443 into the available channels for each remote source and create a promise 

1444 for each channel compatible with the given constraint. If the promise 

1445 then matches in a waveform request, Squirrel tries to download the 

1446 waveform. If the download is successful, the downloaded waveform is 

1447 added to the Squirrel and the promise is deleted. If the download 

1448 fails, the promise is kept if the reason of failure looks like being 

1449 temporary, e.g. because of a network failure. If the cause of failure 

1450 however seems to be permanent, the promise is deleted so that no 

1451 further attempts are made to download a waveform which might not be 

1452 available from that server at all. To force re-scheduling after a 

1453 permanent failure, call :py:meth:`update_waveform_promises` 

1454 yet another time. 

1455 ''' 

1456 

1457 if constraint is None: 

1458 constraint = client.Constraint(**kwargs) 

1459 

1460 for source in self._sources: 

1461 source.update_waveform_promises(self, constraint) 

1462 

1463 def update_responses(self, constraint=None, **kwargs): 

1464 # TODO 

1465 if constraint is None: 

1466 constraint = client.Constraint(**kwargs) 

1467 

1468 print('contraint ignored atm') 

1469 for source in self._sources: 

1470 source.update_response_inventory(self, constraint) 

1471 

1472 def get_nfiles(self): 

1473 ''' 

1474 Get number of files in selection. 

1475 ''' 

1476 

1477 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1478 for row in self._conn.execute(sql): 

1479 return row[0] 

1480 

1481 def get_nnuts(self): 

1482 ''' 

1483 Get number of nuts in selection. 

1484 ''' 

1485 

1486 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1487 for row in self._conn.execute(sql): 

1488 return row[0] 

1489 

1490 def get_total_size(self): 

1491 ''' 

1492 Get aggregated file size available in selection. 

1493 ''' 

1494 

1495 sql = self._sql(''' 

1496 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1497 INNER JOIN files 

1498 ON %(db)s.%(file_states)s.file_id = files.file_id 

1499 ''') 

1500 

1501 for row in self._conn.execute(sql): 

1502 return row[0] or 0 

1503 

1504 def get_stats(self): 

1505 ''' 

1506 Get statistics on contents available through this selection. 

1507 ''' 

1508 

1509 kinds = self.get_kinds() 

1510 time_spans = {} 

1511 for kind in kinds: 

1512 time_spans[kind] = self.get_time_span([kind]) 

1513 

1514 return SquirrelStats( 

1515 nfiles=self.get_nfiles(), 

1516 nnuts=self.get_nnuts(), 

1517 kinds=kinds, 

1518 codes=self.get_codes(), 

1519 total_size=self.get_total_size(), 

1520 counts=self.get_counts(), 

1521 time_spans=time_spans, 

1522 sources=[s.describe() for s in self._sources], 

1523 operators=[op.describe() for op in self._operators]) 

1524 

1525 def get_content( 

1526 self, 

1527 nut, 

1528 cache_id='default', 

1529 accessor_id='default', 

1530 show_progress=False): 

1531 

1532 ''' 

1533 Get and possibly load full content for a given index entry from file. 

1534 

1535 Loads the actual content objects (channel, station, waveform, ...) from 

1536 file. For efficiency, sibling content (all stuff in the same file 

1537 segment) will also be loaded as a side effect. The loaded contents are 

1538 cached in the Squirrel object. 

1539 ''' 

1540 

1541 content_cache = self._content_caches[cache_id] 

1542 if not content_cache.has(nut): 

1543 

1544 for nut_loaded in io.iload( 

1545 nut.file_path, 

1546 segment=nut.file_segment, 

1547 format=nut.file_format, 

1548 database=self._database, 

1549 update_selection=self, 

1550 show_progress=show_progress): 

1551 

1552 content_cache.put(nut_loaded) 

1553 

1554 try: 

1555 return content_cache.get(nut, accessor_id) 

1556 except KeyError: 

1557 raise error.NotAvailable( 

1558 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1559 

1560 def advance_accessor(self, accessor_id, cache_id=None): 

1561 ''' 

1562 Notify memory caches about consumer moving to a new data batch. 

1563 

1564 :param accessor_id: 

1565 Name of accessing consumer to be advanced. 

1566 :type accessor_id: 

1567 str 

1568 

1569 :param cache_id: 

1570 Name of cache to for which the accessor should be advanced. By 

1571 default the named accessor is advanced in all registered caches. 

1572 By default, two caches named ``'default'`` and ``'waveforms'`` are 

1573 available. 

1574 :type cache_id: 

1575 str 

1576 

1577 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1578 Squirrel's memory caching works and can be tuned. Default behaviour is 

1579 to release data when it has not been used in the latest data 

1580 window/batch. If the accessor is never advanced, data is cached 

1581 indefinitely - which is often desired e.g. for station meta-data. 

1582 Methods for consecutive data traversal, like 

1583 :py:meth:`chopper_waveforms` automatically advance and clear 

1584 their accessor. 

1585 ''' 

1586 for cache_ in ( 

1587 self._content_caches.keys() 

1588 if cache_id is None 

1589 else [cache_id]): 

1590 

1591 self._content_caches[cache_].advance_accessor(accessor_id) 

1592 

1593 def clear_accessor(self, accessor_id, cache_id=None): 

1594 ''' 

1595 Notify memory caches about a consumer having finished. 

1596 

1597 :param accessor_id: 

1598 Name of accessor to be cleared. 

1599 :type accessor_id: 

1600 str 

1601 

1602 :param cache_id: 

1603 Name of cache for which the accessor should be cleared. By default 

1604 the named accessor is cleared from all registered caches. By 

1605 default, two caches named ``'default'`` and ``'waveforms'`` are 

1606 available. 

1607 :type cache_id: 

1608 str 

1609 

1610 Calling this method clears all references to cache entries held by the 

1611 named accessor. Cache entries are then freed if not referenced by any 

1612 other accessor. 

1613 ''' 

1614 

1615 for cache_ in ( 

1616 self._content_caches.keys() 

1617 if cache_id is None 

1618 else [cache_id]): 

1619 

1620 self._content_caches[cache_].clear_accessor(accessor_id) 

1621 

1622 def get_cache_stats(self, cache_id): 

1623 return self._content_caches[cache_id].get_stats() 

1624 

1625 def _check_duplicates(self, nuts): 

1626 d = defaultdict(list) 

1627 for nut in nuts: 

1628 d[nut.codes].append(nut) 

1629 

1630 for codes, group in d.items(): 

1631 if len(group) > 1: 

1632 logger.warning( 

1633 'Multiple entries matching codes: %s' % str(codes)) 

1634 

1635 @filldocs 

1636 def get_stations( 

1637 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1638 model='squirrel'): 

1639 

1640 ''' 

1641 Get stations matching given constraints. 

1642 

1643 %(query_args)s 

1644 

1645 :param model: 

1646 Select object model for returned values: ``'squirrel'`` to get 

1647 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1648 objects with channel information attached. 

1649 :type model: 

1650 str 

1651 

1652 :returns: 

1653 List of :py:class:`pyrocko.squirrel.Station 

1654 <pyrocko.squirrel.model.Station>` objects by default or list of 

1655 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1656 objects if ``model='pyrocko'`` is requested. 

1657 

1658 See :py:meth:`iter_nuts` for details on time span matching. 

1659 ''' 

1660 

1661 if model == 'pyrocko': 

1662 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1663 elif model == 'squirrel': 

1664 args = self._get_selection_args( 

1665 STATION, obj, tmin, tmax, time, codes) 

1666 

1667 nuts = sorted( 

1668 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1669 self._check_duplicates(nuts) 

1670 return [self.get_content(nut) for nut in nuts] 

1671 else: 

1672 raise ValueError('Invalid station model: %s' % model) 

1673 

1674 @filldocs 

1675 def get_channels( 

1676 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1677 

1678 ''' 

1679 Get channels matching given constraints. 

1680 

1681 %(query_args)s 

1682 

1683 :returns: 

1684 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1685 

1686 See :py:meth:`iter_nuts` for details on time span matching. 

1687 ''' 

1688 

1689 args = self._get_selection_args( 

1690 CHANNEL, obj, tmin, tmax, time, codes) 

1691 

1692 nuts = sorted( 

1693 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1694 self._check_duplicates(nuts) 

1695 return [self.get_content(nut) for nut in nuts] 

1696 

1697 @filldocs 

1698 def get_sensors( 

1699 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1700 

1701 ''' 

1702 Get sensors matching given constraints. 

1703 

1704 %(query_args)s 

1705 

1706 :returns: 

1707 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1708 

1709 See :py:meth:`iter_nuts` for details on time span matching. 

1710 ''' 

1711 

1712 tmin, tmax, codes = self._get_selection_args( 

1713 CHANNEL, obj, tmin, tmax, time, codes) 

1714 

1715 if codes is not None: 

1716 if codes.channel != '*': 

1717 codes = codes.replace(codes.channel[:-1] + '?') 

1718 

1719 nuts = sorted( 

1720 self.iter_nuts( 

1721 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1722 self._check_duplicates(nuts) 

1723 return model.Sensor.from_channels( 

1724 self.get_content(nut) for nut in nuts) 

1725 

1726 @filldocs 

1727 def get_responses( 

1728 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1729 

1730 ''' 

1731 Get instrument responses matching given constraints. 

1732 

1733 %(query_args)s 

1734 

1735 :returns: 

1736 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1737 

1738 See :py:meth:`iter_nuts` for details on time span matching. 

1739 ''' 

1740 

1741 args = self._get_selection_args( 

1742 RESPONSE, obj, tmin, tmax, time, codes) 

1743 

1744 nuts = sorted( 

1745 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1746 self._check_duplicates(nuts) 

1747 return [self.get_content(nut) for nut in nuts] 

1748 

1749 @filldocs 

1750 def get_response( 

1751 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1752 

1753 ''' 

1754 Get instrument response matching given constraints. 

1755 

1756 %(query_args)s 

1757 

1758 :returns: 

1759 :py:class:`~pyrocko.squirrel.model.Response` object. 

1760 

1761 Same as :py:meth:`get_responses` but returning exactly one response. 

1762 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1763 than one is available. 

1764 

1765 See :py:meth:`iter_nuts` for details on time span matching. 

1766 ''' 

1767 

1768 responses = self.get_responses(obj, tmin, tmax, time, codes) 

1769 if len(responses) == 0: 

1770 raise error.NotAvailable( 

1771 'No instrument response available.') 

1772 elif len(responses) > 1: 

1773 raise error.NotAvailable( 

1774 'Multiple instrument responses matching given constraints.') 

1775 

1776 return responses[0] 

1777 

1778 @filldocs 

1779 def get_events( 

1780 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1781 

1782 ''' 

1783 Get events matching given constraints. 

1784 

1785 %(query_args)s 

1786 

1787 :returns: 

1788 List of :py:class:`~pyrocko.model.event.Event` objects. 

1789 

1790 See :py:meth:`iter_nuts` for details on time span matching. 

1791 ''' 

1792 

1793 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1794 nuts = sorted( 

1795 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1796 self._check_duplicates(nuts) 

1797 return [self.get_content(nut) for nut in nuts] 

1798 

1799 def _redeem_promises(self, *args): 

1800 

1801 tmin, tmax, _ = args 

1802 

1803 waveforms = list(self.iter_nuts('waveform', *args)) 

1804 promises = list(self.iter_nuts('waveform_promise', *args)) 

1805 

1806 codes_to_avail = defaultdict(list) 

1807 for nut in waveforms: 

1808 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1809 

1810 def tts(x): 

1811 if isinstance(x, tuple): 

1812 return tuple(tts(e) for e in x) 

1813 elif isinstance(x, list): 

1814 return list(tts(e) for e in x) 

1815 else: 

1816 return util.time_to_str(x) 

1817 

1818 orders = [] 

1819 for promise in promises: 

1820 waveforms_avail = codes_to_avail[promise.codes] 

1821 for block_tmin, block_tmax in blocks( 

1822 max(tmin, promise.tmin), 

1823 min(tmax, promise.tmax), 

1824 promise.deltat): 

1825 

1826 orders.append( 

1827 WaveformOrder( 

1828 source_id=promise.file_path, 

1829 codes=promise.codes, 

1830 tmin=block_tmin, 

1831 tmax=block_tmax, 

1832 deltat=promise.deltat, 

1833 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1834 

1835 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1836 

1837 order_keys_noop = set(order_key(order) for order in orders_noop) 

1838 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1839 logger.info( 

1840 'Waveform orders already satisified with cached/local data: ' 

1841 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1842 

1843 source_ids = [] 

1844 sources = {} 

1845 for source in self._sources: 

1846 if isinstance(source, fdsn.FDSNSource): 

1847 source_ids.append(source._source_id) 

1848 sources[source._source_id] = source 

1849 

1850 source_priority = dict( 

1851 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1852 

1853 order_groups = defaultdict(list) 

1854 for order in orders: 

1855 order_groups[order_key(order)].append(order) 

1856 

1857 for k, order_group in order_groups.items(): 

1858 order_group.sort( 

1859 key=lambda order: source_priority[order.source_id]) 

1860 

1861 n_order_groups = len(order_groups) 

1862 

1863 if len(order_groups) != 0 or len(orders) != 0: 

1864 logger.info( 

1865 'Waveform orders standing for download: %i (%i)' 

1866 % (len(order_groups), len(orders))) 

1867 

1868 task = make_task('Waveform orders processed', n_order_groups) 

1869 else: 

1870 task = None 

1871 

1872 def split_promise(order): 

1873 self._split_nuts( 

1874 'waveform_promise', 

1875 order.tmin, order.tmax, 

1876 codes=order.codes, 

1877 path=order.source_id) 

1878 

1879 def release_order_group(order): 

1880 okey = order_key(order) 

1881 for followup in order_groups[okey]: 

1882 split_promise(followup) 

1883 

1884 del order_groups[okey] 

1885 

1886 if task: 

1887 task.update(n_order_groups - len(order_groups)) 

1888 

1889 def noop(order): 

1890 pass 

1891 

1892 def success(order): 

1893 release_order_group(order) 

1894 split_promise(order) 

1895 

1896 def batch_add(paths): 

1897 self.add(paths) 

1898 

1899 calls = queue.Queue() 

1900 

1901 def enqueue(f): 

1902 def wrapper(*args): 

1903 calls.put((f, args)) 

1904 

1905 return wrapper 

1906 

1907 for order in orders_noop: 

1908 split_promise(order) 

1909 

1910 while order_groups: 

1911 

1912 orders_now = [] 

1913 empty = [] 

1914 for k, order_group in order_groups.items(): 

1915 try: 

1916 orders_now.append(order_group.pop(0)) 

1917 except IndexError: 

1918 empty.append(k) 

1919 

1920 for k in empty: 

1921 del order_groups[k] 

1922 

1923 by_source_id = defaultdict(list) 

1924 for order in orders_now: 

1925 by_source_id[order.source_id].append(order) 

1926 

1927 threads = [] 

1928 for source_id in by_source_id: 

1929 def download(): 

1930 try: 

1931 sources[source_id].download_waveforms( 

1932 by_source_id[source_id], 

1933 success=enqueue(success), 

1934 error_permanent=enqueue(split_promise), 

1935 error_temporary=noop, 

1936 batch_add=enqueue(batch_add)) 

1937 

1938 finally: 

1939 calls.put(None) 

1940 

1941 thread = threading.Thread(target=download) 

1942 thread.start() 

1943 threads.append(thread) 

1944 

1945 ndone = 0 

1946 while ndone < len(threads): 

1947 ret = calls.get() 

1948 if ret is None: 

1949 ndone += 1 

1950 else: 

1951 ret[0](*ret[1]) 

1952 

1953 for thread in threads: 

1954 thread.join() 

1955 

1956 if task: 

1957 task.update(n_order_groups - len(order_groups)) 

1958 

1959 if task: 

1960 task.done() 

1961 

1962 @filldocs 

1963 def get_waveform_nuts( 

1964 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1965 

1966 ''' 

1967 Get waveform content entities matching given constraints. 

1968 

1969 %(query_args)s 

1970 

1971 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

1972 resolves matching waveform promises (downloads waveforms from remote 

1973 sources). 

1974 

1975 See :py:meth:`iter_nuts` for details on time span matching. 

1976 ''' 

1977 

1978 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

1979 self._redeem_promises(*args) 

1980 return sorted( 

1981 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

1982 

1983 @filldocs 

1984 def get_waveforms( 

1985 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1986 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

1987 maxlap=None, snap=None, include_last=False, load_data=True, 

1988 accessor_id='default', operator_params=None): 

1989 

1990 ''' 

1991 Get waveforms matching given constraints. 

1992 

1993 %(query_args)s 

1994 

1995 :param uncut: 

1996 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

1997 and to disable degapping/deoverlapping. Returns untouched traces as 

1998 they are read from file segment. File segments are always read in 

1999 their entirety. 

2000 :type uncut: 

2001 bool 

2002 

2003 :param want_incomplete: 

2004 If ``True``, gappy/incomplete traces are included in the result. 

2005 :type want_incomplete: 

2006 bool 

2007 

2008 :param degap: 

2009 If ``True``, connect traces and remove gaps and overlaps. 

2010 :type degap: 

2011 bool 

2012 

2013 :param maxgap: 

2014 Maximum gap size in samples which is filled with interpolated 

2015 samples when ``degap`` is ``True``. 

2016 :type maxgap: 

2017 int 

2018 

2019 :param maxlap: 

2020 Maximum overlap size in samples which is removed when ``degap`` is 

2021 ``True``. 

2022 :type maxlap: 

2023 int 

2024 

2025 :param snap: 

2026 Rounding functions used when computing sample index from time 

2027 instance, for trace start and trace end, respectively. By default, 

2028 ``(round, round)`` is used. 

2029 :type snap: 

2030 tuple of 2 callables 

2031 

2032 :param include_last: 

2033 If ``True``, add one more sample to the returned traces (the sample 

2034 which would be the first sample of a query with ``tmin`` set to the 

2035 current value of ``tmax``). 

2036 :type include_last: 

2037 bool 

2038 

2039 :param load_data: 

2040 If ``True``, waveform data samples are read from files (or cache). 

2041 If ``False``, meta-information-only traces are returned (dummy 

2042 traces with no data samples). 

2043 :type load_data: 

2044 bool 

2045 

2046 :param accessor_id: 

2047 Name of consumer on who's behalf data is accessed. Used in cache 

2048 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2049 to distinguish different points of extraction for the decision of 

2050 when to release cached waveform data. Should be used when data is 

2051 alternately extracted from more than one region / selection. 

2052 :type accessor_id: 

2053 str 

2054 

2055 See :py:meth:`iter_nuts` for details on time span matching. 

2056 

2057 Loaded data is kept in memory (at least) until 

2058 :py:meth:`clear_accessor` has been called or 

2059 :py:meth:`advance_accessor` has been called two consecutive times 

2060 without data being accessed between the two calls (by this accessor). 

2061 Data may still be further kept in the memory cache if held alive by 

2062 consumers with a different ``accessor_id``. 

2063 ''' 

2064 

2065 tmin, tmax, codes = self._get_selection_args( 

2066 WAVEFORM, obj, tmin, tmax, time, codes) 

2067 

2068 self_tmin, self_tmax = self.get_time_span( 

2069 ['waveform', 'waveform_promise']) 

2070 

2071 if None in (self_tmin, self_tmax): 

2072 logger.warning( 

2073 'No waveforms available.') 

2074 return [] 

2075 

2076 tmin = tmin if tmin is not None else self_tmin 

2077 tmax = tmax if tmax is not None else self_tmax 

2078 

2079 if codes is not None: 

2080 operator = self.get_operator(codes) 

2081 if operator is not None: 

2082 return operator.get_waveforms( 

2083 self, codes, 

2084 tmin=tmin, tmax=tmax, 

2085 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2086 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2087 include_last=include_last, load_data=load_data, 

2088 accessor_id=accessor_id, params=operator_params) 

2089 

2090 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2091 

2092 if load_data: 

2093 traces = [ 

2094 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2095 

2096 else: 

2097 traces = [ 

2098 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2099 

2100 if uncut: 

2101 return traces 

2102 

2103 if snap is None: 

2104 snap = (round, round) 

2105 

2106 chopped = [] 

2107 for tr in traces: 

2108 if not load_data and tr.ydata is not None: 

2109 tr = tr.copy(data=False) 

2110 tr.ydata = None 

2111 

2112 try: 

2113 chopped.append(tr.chop( 

2114 tmin, tmax, 

2115 inplace=False, 

2116 snap=snap, 

2117 include_last=include_last)) 

2118 

2119 except trace.NoData: 

2120 pass 

2121 

2122 processed = self._process_chopped( 

2123 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2124 

2125 return processed 

2126 

2127 @filldocs 

2128 def chopper_waveforms( 

2129 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2130 tinc=None, tpad=0., 

2131 want_incomplete=True, snap_window=False, 

2132 degap=True, maxgap=5, maxlap=None, 

2133 snap=None, include_last=False, load_data=True, 

2134 accessor_id=None, clear_accessor=True, operator_params=None): 

2135 

2136 ''' 

2137 Iterate window-wise over waveform archive. 

2138 

2139 %(query_args)s 

2140 

2141 :param tinc: 

2142 Time increment (window shift time) (default uses ``tmax-tmin``). 

2143 :type tinc: 

2144 timestamp 

2145 

2146 :param tpad: 

2147 Padding time appended on either side of the data window (window 

2148 overlap is ``2*tpad``). 

2149 :type tpad: 

2150 timestamp 

2151 

2152 :param want_incomplete: 

2153 If ``True``, gappy/incomplete traces are included in the result. 

2154 :type want_incomplete: 

2155 bool 

2156 

2157 :param snap_window: 

2158 If ``True``, start time windows at multiples of tinc with respect 

2159 to system time zero. 

2160 :type snap_window: 

2161 bool 

2162 

2163 :param degap: 

2164 If ``True``, connect traces and remove gaps and overlaps. 

2165 :type degap: 

2166 bool 

2167 

2168 :param maxgap: 

2169 Maximum gap size in samples which is filled with interpolated 

2170 samples when ``degap`` is ``True``. 

2171 :type maxgap: 

2172 int 

2173 

2174 :param maxlap: 

2175 Maximum overlap size in samples which is removed when ``degap`` is 

2176 ``True``. 

2177 :type maxlap: 

2178 int 

2179 

2180 :param snap: 

2181 Rounding functions used when computing sample index from time 

2182 instance, for trace start and trace end, respectively. By default, 

2183 ``(round, round)`` is used. 

2184 :type snap: 

2185 tuple of 2 callables 

2186 

2187 :param include_last: 

2188 If ``True``, add one more sample to the returned traces (the sample 

2189 which would be the first sample of a query with ``tmin`` set to the 

2190 current value of ``tmax``). 

2191 :type include_last: 

2192 bool 

2193 

2194 :param load_data: 

2195 If ``True``, waveform data samples are read from files (or cache). 

2196 If ``False``, meta-information-only traces are returned (dummy 

2197 traces with no data samples). 

2198 :type load_data: 

2199 bool 

2200 

2201 :param accessor_id: 

2202 Name of consumer on who's behalf data is accessed. Used in cache 

2203 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2204 to distinguish different points of extraction for the decision of 

2205 when to release cached waveform data. Should be used when data is 

2206 alternately extracted from more than one region / selection. 

2207 :type accessor_id: 

2208 str 

2209 

2210 :param clear_accessor: 

2211 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2212 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2213 memory when the generator returns. 

2214 :type clear_accessor: 

2215 bool 

2216 

2217 :yields: 

2218 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2219 extracted time window. 

2220 

2221 See :py:meth:`iter_nuts` for details on time span matching. 

2222 ''' 

2223 

2224 tmin, tmax, codes = self._get_selection_args( 

2225 WAVEFORM, obj, tmin, tmax, time, codes) 

2226 

2227 self_tmin, self_tmax = self.get_time_span( 

2228 ['waveform', 'waveform_promise']) 

2229 

2230 if None in (self_tmin, self_tmax): 

2231 logger.warning( 

2232 'Content has undefined time span. No waveforms and no ' 

2233 'waveform promises?') 

2234 return 

2235 

2236 if snap_window and tinc is not None: 

2237 tmin = tmin if tmin is not None else self_tmin 

2238 tmax = tmax if tmax is not None else self_tmax 

2239 tmin = math.floor(tmin / tinc) * tinc 

2240 tmax = math.ceil(tmax / tinc) * tinc 

2241 else: 

2242 tmin = tmin if tmin is not None else self_tmin + tpad 

2243 tmax = tmax if tmax is not None else self_tmax - tpad 

2244 

2245 tinc = tinc if tinc is not None else tmax - tmin 

2246 

2247 try: 

2248 if accessor_id is None: 

2249 accessor_id = 'chopper%i' % self._n_choppers_active 

2250 

2251 self._n_choppers_active += 1 

2252 

2253 eps = tinc * 1e-6 

2254 if tinc != 0.0: 

2255 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2256 else: 

2257 nwin = 1 

2258 

2259 for iwin in range(nwin): 

2260 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2261 

2262 chopped = self.get_waveforms( 

2263 tmin=wmin-tpad, 

2264 tmax=wmax+tpad, 

2265 codes=codes, 

2266 snap=snap, 

2267 include_last=include_last, 

2268 load_data=load_data, 

2269 want_incomplete=want_incomplete, 

2270 degap=degap, 

2271 maxgap=maxgap, 

2272 maxlap=maxlap, 

2273 accessor_id=accessor_id, 

2274 operator_params=operator_params) 

2275 

2276 self.advance_accessor(accessor_id) 

2277 

2278 yield Batch( 

2279 tmin=wmin, 

2280 tmax=wmax, 

2281 i=iwin, 

2282 n=nwin, 

2283 traces=chopped) 

2284 

2285 iwin += 1 

2286 

2287 finally: 

2288 self._n_choppers_active -= 1 

2289 if clear_accessor: 

2290 self.clear_accessor(accessor_id, 'waveform') 

2291 

2292 def _process_chopped( 

2293 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2294 

2295 chopped.sort(key=lambda a: a.full_id) 

2296 if degap: 

2297 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2298 

2299 if not want_incomplete: 

2300 chopped_weeded = [] 

2301 for tr in chopped: 

2302 emin = tr.tmin - tmin 

2303 emax = tr.tmax + tr.deltat - tmax 

2304 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2305 chopped_weeded.append(tr) 

2306 

2307 elif degap: 

2308 if (0. < emin <= 5. * tr.deltat 

2309 and -5. * tr.deltat <= emax < 0.): 

2310 

2311 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2312 chopped_weeded.append(tr) 

2313 

2314 chopped = chopped_weeded 

2315 

2316 return chopped 

2317 

2318 def _get_pyrocko_stations( 

2319 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2320 

2321 from pyrocko import model as pmodel 

2322 

2323 by_nsl = defaultdict(lambda: (list(), list())) 

2324 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2325 sargs = station._get_pyrocko_station_args() 

2326 by_nsl[station.codes.nsl][0].append(sargs) 

2327 

2328 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2329 sargs = channel._get_pyrocko_station_args() 

2330 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2331 sargs_list.append(sargs) 

2332 channels_list.append(channel) 

2333 

2334 pstations = [] 

2335 nsls = list(by_nsl.keys()) 

2336 nsls.sort() 

2337 for nsl in nsls: 

2338 sargs_list, channels_list = by_nsl[nsl] 

2339 sargs = util.consistency_merge( 

2340 [('',) + x for x in sargs_list]) 

2341 

2342 by_c = defaultdict(list) 

2343 for ch in channels_list: 

2344 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2345 

2346 chas = list(by_c.keys()) 

2347 chas.sort() 

2348 pchannels = [] 

2349 for cha in chas: 

2350 list_of_cargs = by_c[cha] 

2351 cargs = util.consistency_merge( 

2352 [('',) + x for x in list_of_cargs]) 

2353 pchannels.append(pmodel.Channel(*cargs)) 

2354 

2355 pstations.append( 

2356 pmodel.Station(*sargs, channels=pchannels)) 

2357 

2358 return pstations 

2359 

2360 @property 

2361 def pile(self): 

2362 

2363 ''' 

2364 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2365 

2366 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2367 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2368 but uses the fluffy power of the Squirrel under the hood. 

2369 

2370 This interface can be used as a drop-in replacement for piles which are 

2371 used in existing scripts and programs for efficient waveform data 

2372 access. The Squirrel-based pile scales better for large datasets. Newer 

2373 scripts should use Squirrel's native methods to avoid the emulation 

2374 overhead. 

2375 ''' 

2376 from . import pile 

2377 

2378 if self._pile is None: 

2379 self._pile = pile.Pile(self) 

2380 

2381 return self._pile 

2382 

2383 def snuffle(self): 

2384 ''' 

2385 Look at dataset in Snuffler. 

2386 ''' 

2387 self.pile.snuffle() 

2388 

2389 def _gather_codes_keys(self, kind, gather, selector): 

2390 return set( 

2391 gather(codes) 

2392 for codes in self.iter_codes(kind) 

2393 if selector is None or selector(codes)) 

2394 

2395 def __str__(self): 

2396 return str(self.get_stats()) 

2397 

2398 def get_coverage( 

2399 self, kind, tmin=None, tmax=None, codes_list=None, limit=None): 

2400 

2401 ''' 

2402 Get coverage information. 

2403 

2404 Get information about strips of gapless data coverage. 

2405 

2406 :param kind: 

2407 Content kind to be queried. 

2408 :type kind: 

2409 str 

2410 

2411 :param tmin: 

2412 Start time of query interval. 

2413 :type tmin: 

2414 timestamp 

2415 

2416 :param tmax: 

2417 End time of query interval. 

2418 :type tmax: 

2419 timestamp 

2420 

2421 :param codes_list: 

2422 If given, restrict query to given content codes patterns. 

2423 :type codes_list: 

2424 :py:class:`list` of :py:class:`Codes` objects appropriate for the 

2425 queried content type, or anything which can be converted to 

2426 such objects. 

2427 

2428 :param limit: 

2429 Limit query to return only up to a given maximum number of entries 

2430 per matching time series (without setting this option, very gappy 

2431 data could cause the query to execute for a very long time). 

2432 :type limit: 

2433 int 

2434 

2435 :returns: 

2436 Information about time spans covered by the requested time series 

2437 data. 

2438 :rtype: 

2439 :py:class:`list` of :py:class:`Coverage` objects 

2440 ''' 

2441 

2442 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2443 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2444 kind_id = to_kind_id(kind) 

2445 

2446 codes_info = list(self._iter_codes_info(kind=kind)) 

2447 

2448 kdata_all = [] 

2449 if codes_list is None: 

2450 for _, codes, deltat, kind_codes_id, _ in codes_info: 

2451 kdata_all.append((codes, kind_codes_id, codes, deltat)) 

2452 

2453 else: 

2454 for pattern in codes_list: 

2455 pattern = to_codes(kind_id, pattern) 

2456 for _, codes, deltat, kind_codes_id, _ in codes_info: 

2457 if model.match_codes(pattern, codes): 

2458 kdata_all.append( 

2459 (pattern, kind_codes_id, codes, deltat)) 

2460 

2461 kind_codes_ids = [x[1] for x in kdata_all] 

2462 

2463 counts_at_tmin = {} 

2464 if tmin is not None: 

2465 for nut in self.iter_nuts( 

2466 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2467 

2468 k = nut.codes, nut.deltat 

2469 if k not in counts_at_tmin: 

2470 counts_at_tmin[k] = 0 

2471 

2472 counts_at_tmin[k] += 1 

2473 

2474 coverages = [] 

2475 for pattern, kind_codes_id, codes, deltat in kdata_all: 

2476 entry = [pattern, codes, deltat, None, None, []] 

2477 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2478 sql = self._sql(''' 

2479 SELECT 

2480 time_seconds, 

2481 time_offset 

2482 FROM %(db)s.%(coverage)s 

2483 WHERE 

2484 kind_codes_id == ? 

2485 ORDER BY 

2486 kind_codes_id ''' + order + ''', 

2487 time_seconds ''' + order + ''', 

2488 time_offset ''' + order + ''' 

2489 LIMIT 1 

2490 ''') 

2491 

2492 for row in self._conn.execute(sql, [kind_codes_id]): 

2493 entry[3+i] = model.tjoin(row[0], row[1]) 

2494 

2495 if None in entry[3:5]: 

2496 continue 

2497 

2498 args = [kind_codes_id] 

2499 

2500 sql_time = '' 

2501 if tmin is not None: 

2502 # intentionally < because (== tmin) is queried from nuts 

2503 sql_time += ' AND ( ? < time_seconds ' \ 

2504 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2505 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2506 

2507 if tmax is not None: 

2508 sql_time += ' AND ( time_seconds < ? ' \ 

2509 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2510 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2511 

2512 sql_limit = '' 

2513 if limit is not None: 

2514 sql_limit = ' LIMIT ?' 

2515 args.append(limit) 

2516 

2517 sql = self._sql(''' 

2518 SELECT 

2519 time_seconds, 

2520 time_offset, 

2521 step 

2522 FROM %(db)s.%(coverage)s 

2523 WHERE 

2524 kind_codes_id == ? 

2525 ''' + sql_time + ''' 

2526 ORDER BY 

2527 kind_codes_id, 

2528 time_seconds, 

2529 time_offset 

2530 ''' + sql_limit) 

2531 

2532 rows = list(self._conn.execute(sql, args)) 

2533 

2534 if limit is not None and len(rows) == limit: 

2535 entry[-1] = None 

2536 else: 

2537 counts = counts_at_tmin.get((codes, deltat), 0) 

2538 tlast = None 

2539 if tmin is not None: 

2540 entry[-1].append((tmin, counts)) 

2541 tlast = tmin 

2542 

2543 for row in rows: 

2544 t = model.tjoin(row[0], row[1]) 

2545 counts += row[2] 

2546 entry[-1].append((t, counts)) 

2547 tlast = t 

2548 

2549 if tmax is not None and (tlast is None or tlast != tmax): 

2550 entry[-1].append((tmax, counts)) 

2551 

2552 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2553 

2554 return coverages 

2555 

2556 def add_operator(self, op): 

2557 self._operators.append(op) 

2558 

2559 def update_operator_mappings(self): 

2560 available = self.get_codes(kind=('channel')) 

2561 

2562 for operator in self._operators: 

2563 operator.update_mappings(available, self._operator_registry) 

2564 

2565 def iter_operator_mappings(self): 

2566 for operator in self._operators: 

2567 for in_codes, out_codes in operator.iter_mappings(): 

2568 yield operator, in_codes, out_codes 

2569 

2570 def get_operator_mappings(self): 

2571 return list(self.iter_operator_mappings()) 

2572 

2573 def get_operator(self, codes): 

2574 try: 

2575 return self._operator_registry[codes][0] 

2576 except KeyError: 

2577 return None 

2578 

2579 def get_operator_group(self, codes): 

2580 try: 

2581 return self._operator_registry[codes] 

2582 except KeyError: 

2583 return None, (None, None, None) 

2584 

2585 def iter_operator_codes(self): 

2586 for _, _, out_codes in self.iter_operator_mappings(): 

2587 for codes in out_codes: 

2588 yield codes 

2589 

2590 def get_operator_codes(self): 

2591 return list(self.iter_operator_codes()) 

2592 

2593 def print_tables(self, table_names=None, stream=None): 

2594 ''' 

2595 Dump raw database tables in textual form (for debugging purposes). 

2596 

2597 :param table_names: 

2598 Names of tables to be dumped or ``None`` to dump all. 

2599 :type table_names: 

2600 :py:class:`list` of :py:class:`str` 

2601 

2602 :param stream: 

2603 Open file or ``None`` to dump to standard output. 

2604 ''' 

2605 

2606 if stream is None: 

2607 stream = sys.stdout 

2608 

2609 if isinstance(table_names, str): 

2610 table_names = [table_names] 

2611 

2612 if table_names is None: 

2613 table_names = [ 

2614 'selection_file_states', 

2615 'selection_nuts', 

2616 'selection_kind_codes_count', 

2617 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2618 

2619 m = { 

2620 'selection_file_states': '%(db)s.%(file_states)s', 

2621 'selection_nuts': '%(db)s.%(nuts)s', 

2622 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2623 'files': 'files', 

2624 'nuts': 'nuts', 

2625 'kind_codes': 'kind_codes', 

2626 'kind_codes_count': 'kind_codes_count'} 

2627 

2628 for table_name in table_names: 

2629 self._database.print_table( 

2630 m[table_name] % self._names, stream=stream) 

2631 

2632 

2633class SquirrelStats(Object): 

2634 ''' 

2635 Container to hold statistics about contents available from a Squirrel. 

2636 

2637 See also :py:meth:`Squirrel.get_stats`. 

2638 ''' 

2639 

2640 nfiles = Int.T( 

2641 help='Number of files in selection.') 

2642 nnuts = Int.T( 

2643 help='Number of index nuts in selection.') 

2644 codes = List.T( 

2645 Tuple.T(content_t=String.T()), 

2646 help='Available code sequences in selection, e.g. ' 

2647 '(agency, network, station, location) for stations nuts.') 

2648 kinds = List.T( 

2649 String.T(), 

2650 help='Available content types in selection.') 

2651 total_size = Int.T( 

2652 help='Aggregated file size of files is selection.') 

2653 counts = Dict.T( 

2654 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2655 help='Breakdown of how many nuts of any content type and code ' 

2656 'sequence are available in selection, ``counts[kind][codes]``.') 

2657 time_spans = Dict.T( 

2658 String.T(), Tuple.T(content_t=Timestamp.T()), 

2659 help='Time spans by content type.') 

2660 sources = List.T( 

2661 String.T(), 

2662 help='Descriptions of attached sources.') 

2663 operators = List.T( 

2664 String.T(), 

2665 help='Descriptions of attached operators.') 

2666 

2667 def __str__(self): 

2668 kind_counts = dict( 

2669 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2670 

2671 scodes = model.codes_to_str_abbreviated(self.codes) 

2672 

2673 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2674 ' ' + s for s in self.sources) 

2675 

2676 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2677 ' ' + s for s in self.operators) 

2678 

2679 def stime(t): 

2680 return util.tts(t) if t is not None and t not in ( 

2681 model.g_tmin, model.g_tmax) else '<none>' 

2682 

2683 def stable(rows): 

2684 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2685 return '\n'.join( 

2686 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2687 for row in rows) 

2688 

2689 def indent(s): 

2690 return '\n'.join(' '+line for line in s.splitlines()) 

2691 

2692 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2693 kind + ':', 

2694 str(kind_counts[kind]), 

2695 stime(self.time_spans[kind][0]), 

2696 '-', 

2697 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2698 

2699 s = ''' 

2700Number of files: %i 

2701Total size of known files: %s 

2702Number of index nuts: %i 

2703Available content kinds: %s 

2704Available codes: %s 

2705Sources: %s 

2706Operators: %s''' % ( 

2707 self.nfiles, 

2708 util.human_bytesize(self.total_size), 

2709 self.nnuts, 

2710 stspans, scodes, ssources, soperators) 

2711 

2712 return s.lstrip() 

2713 

2714 

2715__all__ = [ 

2716 'Squirrel', 

2717 'SquirrelStats', 

2718]