1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6from __future__ import absolute_import, print_function 

7 

8import sys 

9import os 

10 

11import math 

12import logging 

13import threading 

14import queue 

15from collections import defaultdict 

16 

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

18from pyrocko import util, trace 

19from pyrocko.progress import progress 

20 

21from . import model, io, cache, dataset 

22 

23from .model import to_kind_id, separator, WaveformOrder 

24from .client import fdsn, catalog 

25from .selection import Selection, filldocs 

26from .database import abspath 

27from . import client, environment, error 

28 

29logger = logging.getLogger('psq.base') 

30 

31guts_prefix = 'squirrel' 

32 

33 

34def make_task(*args): 

35 return progress.task(*args, logger=logger) 

36 

37 

38def lpick(condition, seq): 

39 ft = [], [] 

40 for ele in seq: 

41 ft[int(bool(condition(ele)))].append(ele) 

42 

43 return ft 

44 

45 

46def codes_fill(n, codes): 

47 return codes[:n] + ('*',) * (n-len(codes)) 

48 

49 

50c_kind_to_ncodes = { 

51 'station': 4, 

52 'channel': 6, 

53 'response': 6, 

54 'waveform': 6, 

55 'event': 1, 

56 'waveform_promise': 6, 

57 'undefined': 1} 

58 

59 

60c_inflated = ['', '*', '*', '*', '*', '*'] 

61c_offsets = [0, 2, 1, 1, 1, 1, 0] 

62 

63 

64def codes_inflate(codes): 

65 codes = codes[:6] 

66 inflated = list(c_inflated) 

67 ncodes = len(codes) 

68 offset = c_offsets[ncodes] 

69 inflated[offset:offset+ncodes] = codes 

70 return inflated 

71 

72 

73def codes_inflate2(codes): 

74 inflated = list(c_inflated) 

75 ncodes = len(codes) 

76 inflated[:ncodes] = codes 

77 return tuple(inflated) 

78 

79 

80def codes_patterns_for_kind(kind, codes): 

81 if not codes: 

82 return [] 

83 

84 if not isinstance(codes[0], str): 

85 out = [] 

86 for subcodes in codes: 

87 out.extend(codes_patterns_for_kind(kind, subcodes)) 

88 return out 

89 

90 if kind in ('event', 'undefined'): 

91 return [codes] 

92 

93 cfill = codes_inflate(codes)[:c_kind_to_ncodes[kind]] 

94 

95 if kind == 'station': 

96 cfill2 = list(cfill) 

97 cfill2[3] = '[*]' 

98 return [cfill, cfill2] 

99 

100 return [cfill] 

101 

102 

103def group_channels(channels): 

104 groups = defaultdict(list) 

105 for channel in channels: 

106 codes = channel.codes 

107 gcodes = codes[:-1] + (codes[-1][:-1],) 

108 groups[gcodes].append(channel) 

109 

110 return groups 

111 

112 

113def pyrocko_station_from_channel_group(group, extra_args): 

114 list_of_args = [channel._get_pyrocko_station_args() for channel in group] 

115 args = util.consistency_merge(list_of_args + extra_args) 

116 from pyrocko import model as pmodel 

117 return pmodel.Station( 

118 network=args[0], 

119 station=args[1], 

120 location=args[2], 

121 lat=args[3], 

122 lon=args[4], 

123 elevation=args[5], 

124 depth=args[6], 

125 channels=[ch.get_pyrocko_channel() for ch in group]) 

126 

127 

128def blocks(tmin, tmax, deltat, nsamples_block=100000): 

129 tblock = deltat * nsamples_block 

130 iblock_min = int(math.floor(tmin / tblock)) 

131 iblock_max = int(math.ceil(tmax / tblock)) 

132 for iblock in range(iblock_min, iblock_max): 

133 yield iblock * tblock, (iblock+1) * tblock 

134 

135 

136def gaps(avail, tmin, tmax): 

137 assert tmin < tmax 

138 

139 data = [(tmax, 1), (tmin, -1)] 

140 for (tmin_a, tmax_a) in avail: 

141 assert tmin_a < tmax_a 

142 data.append((tmin_a, 1)) 

143 data.append((tmax_a, -1)) 

144 

145 data.sort() 

146 s = 1 

147 gaps = [] 

148 tmin_g = None 

149 for t, x in data: 

150 if s == 1 and x == -1: 

151 tmin_g = t 

152 elif s == 0 and x == 1 and tmin_g is not None: 

153 tmax_g = t 

154 if tmin_g != tmax_g: 

155 gaps.append((tmin_g, tmax_g)) 

156 

157 s += x 

158 

159 return gaps 

160 

161 

162def order_key(order): 

163 return (order.codes, order.tmin, order.tmax) 

164 

165 

166class Batch(object): 

167 ''' 

168 Batch of waveforms from window wise data extraction. 

169 

170 Encapsulates state and results yielded for each window in window-wise 

171 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

172 

173 *Attributes:* 

174 

175 .. py:attribute:: tmin 

176 

177 Start of this time window. 

178 

179 .. py:attribute:: tmax 

180 

181 End of this time window. 

182 

183 .. py:attribute:: i 

184 

185 Index of this time window in sequence. 

186 

187 .. py:attribute:: n 

188 

189 Total number of time windows in sequence. 

190 

191 .. py:attribute:: traces 

192 

193 Extracted waveforms for this time window. 

194 ''' 

195 

196 def __init__(self, tmin, tmax, i, n, traces): 

197 self.tmin = tmin 

198 self.tmax = tmax 

199 self.i = i 

200 self.n = n 

201 self.traces = traces 

202 

203 

204class Squirrel(Selection): 

205 ''' 

206 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

207 

208 :param env: 

209 Squirrel environment instance or directory path to use as starting 

210 point for its detection. By default, the current directory is used as 

211 starting point. When searching for a usable environment the directory 

212 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

213 directory is used if it exists, otherwise the parent directories are 

214 search upwards for the existence of such a directory. If no such 

215 directory is found, the user's global Squirrel environment 

216 ``'$HOME/.pyrocko/squirrel'`` is used. 

217 :type env: 

218 :py:class:`~pyrocko.squirrel.environment.Environment` or 

219 :py:class:`str` 

220 

221 :param database: 

222 Database instance or path to database. By default the 

223 database found in the detected Squirrel environment is used. 

224 :type database: 

225 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

226 

227 :param cache_path: 

228 Directory path to use for data caching. By default, the ``'cache'`` 

229 directory in the detected Squirrel environment is used. 

230 :type cache_path: 

231 :py:class:`str` 

232 

233 :param persistent: 

234 If given a name, create a persistent selection. 

235 :type persistent: 

236 :py:class:`str` 

237 

238 This is the central class of the Squirrel framework. It provides a unified 

239 interface to query and access seismic waveforms, station meta-data and 

240 event information from local file collections and remote data sources. For 

241 prompt responses, a profound database setup is used under the hood. To 

242 speed up assemblage of ad-hoc data selections, files are indexed on first 

243 use and the extracted meta-data is remembered in the database for 

244 subsequent accesses. Bulk data is lazily loaded from disk and remote 

245 sources, just when requested. Once loaded, data is cached in memory to 

246 expedite typical access patterns. Files and data sources can be dynamically 

247 added to and removed from the Squirrel selection at runtime. 

248 

249 Queries are restricted to the contents of the files currently added to the 

250 Squirrel selection (usually a subset of the file meta-information 

251 collection in the database). This list of files is referred to here as the 

252 "selection". By default, temporary tables are created in the attached 

253 database to hold the names of the files in the selection as well as various 

254 indices and counters. These tables are only visible inside the application 

255 which created them and are deleted when the database connection is closed 

256 or the application exits. To create a selection which is not deleted at 

257 exit, supply a name to the ``persistent`` argument of the Squirrel 

258 constructor. Persistent selections are shared among applications using the 

259 same database. 

260 

261 **Method summary** 

262 

263 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

264 :py:class:`~pyrocko.squirrel.selection.Selection`. 

265 

266 .. autosummary:: 

267 

268 ~Squirrel.add 

269 ~Squirrel.add_source 

270 ~Squirrel.add_fdsn 

271 ~Squirrel.add_catalog 

272 ~Squirrel.add_dataset 

273 ~Squirrel.add_virtual 

274 ~Squirrel.update 

275 ~Squirrel.update_waveform_promises 

276 ~Squirrel.advance_accessor 

277 ~Squirrel.clear_accessor 

278 ~Squirrel.reload 

279 ~pyrocko.squirrel.selection.Selection.iter_paths 

280 ~Squirrel.iter_nuts 

281 ~Squirrel.iter_kinds 

282 ~Squirrel.iter_deltats 

283 ~Squirrel.iter_codes 

284 ~Squirrel.iter_counts 

285 ~pyrocko.squirrel.selection.Selection.get_paths 

286 ~Squirrel.get_nuts 

287 ~Squirrel.get_kinds 

288 ~Squirrel.get_deltats 

289 ~Squirrel.get_codes 

290 ~Squirrel.get_counts 

291 ~Squirrel.get_time_span 

292 ~Squirrel.get_deltat_span 

293 ~Squirrel.get_nfiles 

294 ~Squirrel.get_nnuts 

295 ~Squirrel.get_total_size 

296 ~Squirrel.get_stats 

297 ~Squirrel.get_content 

298 ~Squirrel.get_stations 

299 ~Squirrel.get_channels 

300 ~Squirrel.get_responses 

301 ~Squirrel.get_events 

302 ~Squirrel.get_waveform_nuts 

303 ~Squirrel.get_waveforms 

304 ~Squirrel.chopper_waveforms 

305 ~Squirrel.get_coverage 

306 ~Squirrel.pile 

307 ~Squirrel.snuffle 

308 ~Squirrel.glob_codes 

309 ~pyrocko.squirrel.selection.Selection.get_database 

310 ~Squirrel.print_tables 

311 ''' 

312 

313 def __init__( 

314 self, env=None, database=None, cache_path=None, persistent=None): 

315 

316 if not isinstance(env, environment.Environment): 

317 env = environment.get_environment(env) 

318 

319 if database is None: 

320 database = env.expand_path(env.database_path) 

321 

322 if cache_path is None: 

323 cache_path = env.expand_path(env.cache_path) 

324 

325 if persistent is None: 

326 persistent = env.persistent 

327 

328 Selection.__init__( 

329 self, database=database, persistent=persistent) 

330 

331 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

332 

333 self._content_caches = { 

334 'waveform': cache.ContentCache(), 

335 'default': cache.ContentCache()} 

336 

337 self._cache_path = cache_path 

338 

339 self._sources = [] 

340 self._operators = [] 

341 self._operator_registry = {} 

342 

343 self._pile = None 

344 self._n_choppers_active = 0 

345 

346 self._names.update({ 

347 'nuts': self.name + '_nuts', 

348 'kind_codes_count': self.name + '_kind_codes_count', 

349 'coverage': self.name + '_coverage'}) 

350 

351 with self.transaction() as cursor: 

352 self._create_tables_squirrel(cursor) 

353 

354 def _create_tables_squirrel(self, cursor): 

355 

356 cursor.execute(self._register_table(self._sql( 

357 ''' 

358 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

359 nut_id integer PRIMARY KEY, 

360 file_id integer, 

361 file_segment integer, 

362 file_element integer, 

363 kind_id integer, 

364 kind_codes_id integer, 

365 tmin_seconds integer, 

366 tmin_offset integer, 

367 tmax_seconds integer, 

368 tmax_offset integer, 

369 kscale integer) 

370 '''))) 

371 

372 cursor.execute(self._register_table(self._sql( 

373 ''' 

374 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

375 kind_codes_id integer PRIMARY KEY, 

376 count integer) 

377 '''))) 

378 

379 cursor.execute(self._sql( 

380 ''' 

381 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

382 ON %(nuts)s (file_id, file_segment, file_element) 

383 ''')) 

384 

385 cursor.execute(self._sql( 

386 ''' 

387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

388 ON %(nuts)s (file_id) 

389 ''')) 

390 

391 cursor.execute(self._sql( 

392 ''' 

393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

394 ON %(nuts)s (kind_id, tmin_seconds) 

395 ''')) 

396 

397 cursor.execute(self._sql( 

398 ''' 

399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

400 ON %(nuts)s (kind_id, tmax_seconds) 

401 ''')) 

402 

403 cursor.execute(self._sql( 

404 ''' 

405 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

406 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

407 ''')) 

408 

409 cursor.execute(self._sql( 

410 ''' 

411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

412 BEFORE DELETE ON main.files FOR EACH ROW 

413 BEGIN 

414 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

415 END 

416 ''')) 

417 

418 # trigger only on size to make silent update of mtime possible 

419 cursor.execute(self._sql( 

420 ''' 

421 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

422 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

423 BEGIN 

424 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

425 END 

426 ''')) 

427 

428 cursor.execute(self._sql( 

429 ''' 

430 CREATE TRIGGER IF NOT EXISTS 

431 %(db)s.%(file_states)s_delete_files 

432 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

433 BEGIN 

434 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

435 END 

436 ''')) 

437 

438 cursor.execute(self._sql( 

439 ''' 

440 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

441 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

442 BEGIN 

443 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

444 (new.kind_codes_id, 0); 

445 UPDATE %(kind_codes_count)s 

446 SET count = count + 1 

447 WHERE new.kind_codes_id 

448 == %(kind_codes_count)s.kind_codes_id; 

449 END 

450 ''')) 

451 

452 cursor.execute(self._sql( 

453 ''' 

454 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

455 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

456 BEGIN 

457 UPDATE %(kind_codes_count)s 

458 SET count = count - 1 

459 WHERE old.kind_codes_id 

460 == %(kind_codes_count)s.kind_codes_id; 

461 END 

462 ''')) 

463 

464 cursor.execute(self._register_table(self._sql( 

465 ''' 

466 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

467 kind_codes_id integer, 

468 time_seconds integer, 

469 time_offset integer, 

470 step integer) 

471 '''))) 

472 

473 cursor.execute(self._sql( 

474 ''' 

475 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

476 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

477 ''')) 

478 

479 cursor.execute(self._sql( 

480 ''' 

481 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

482 AFTER INSERT ON %(nuts)s FOR EACH ROW 

483 BEGIN 

484 INSERT OR IGNORE INTO %(coverage)s VALUES 

485 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

486 ; 

487 UPDATE %(coverage)s 

488 SET step = step + 1 

489 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

490 AND new.tmin_seconds == %(coverage)s.time_seconds 

491 AND new.tmin_offset == %(coverage)s.time_offset 

492 ; 

493 INSERT OR IGNORE INTO %(coverage)s VALUES 

494 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

495 ; 

496 UPDATE %(coverage)s 

497 SET step = step - 1 

498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

499 AND new.tmax_seconds == %(coverage)s.time_seconds 

500 AND new.tmax_offset == %(coverage)s.time_offset 

501 ; 

502 DELETE FROM %(coverage)s 

503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

504 AND new.tmin_seconds == %(coverage)s.time_seconds 

505 AND new.tmin_offset == %(coverage)s.time_offset 

506 AND step == 0 

507 ; 

508 DELETE FROM %(coverage)s 

509 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

510 AND new.tmax_seconds == %(coverage)s.time_seconds 

511 AND new.tmax_offset == %(coverage)s.time_offset 

512 AND step == 0 

513 ; 

514 END 

515 ''')) 

516 

517 cursor.execute(self._sql( 

518 ''' 

519 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

520 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

521 BEGIN 

522 INSERT OR IGNORE INTO %(coverage)s VALUES 

523 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

524 ; 

525 UPDATE %(coverage)s 

526 SET step = step - 1 

527 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

528 AND old.tmin_seconds == %(coverage)s.time_seconds 

529 AND old.tmin_offset == %(coverage)s.time_offset 

530 ; 

531 INSERT OR IGNORE INTO %(coverage)s VALUES 

532 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

533 ; 

534 UPDATE %(coverage)s 

535 SET step = step + 1 

536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

537 AND old.tmax_seconds == %(coverage)s.time_seconds 

538 AND old.tmax_offset == %(coverage)s.time_offset 

539 ; 

540 DELETE FROM %(coverage)s 

541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

542 AND old.tmin_seconds == %(coverage)s.time_seconds 

543 AND old.tmin_offset == %(coverage)s.time_offset 

544 AND step == 0 

545 ; 

546 DELETE FROM %(coverage)s 

547 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

548 AND old.tmax_seconds == %(coverage)s.time_seconds 

549 AND old.tmax_offset == %(coverage)s.time_offset 

550 AND step == 0 

551 ; 

552 END 

553 ''')) 

554 

555 def _delete(self): 

556 '''Delete database tables associated with this Squirrel.''' 

557 

558 for s in ''' 

559 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

560 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

561 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

562 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

563 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

564 DROP TABLE %(db)s.%(nuts)s; 

565 DROP TABLE %(db)s.%(kind_codes_count)s; 

566 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

567 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

568 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

569 '''.strip().splitlines(): 

570 

571 self._conn.execute(self._sql(s)) 

572 

573 Selection._delete(self) 

574 

575 @filldocs 

576 def add(self, 

577 paths, 

578 kinds=None, 

579 format='detect', 

580 regex=None, 

581 check=True, 

582 progress_viewer='terminal'): 

583 

584 ''' 

585 Add files to the selection. 

586 

587 :param paths: 

588 Iterator yielding paths to files or directories to be added to the 

589 selection. Recurses into directories. If given a ``str``, it 

590 is treated as a single path to be added. 

591 :type paths: 

592 :py:class:`list` of :py:class:`str` 

593 

594 :param kinds: 

595 Content types to be made available through the Squirrel selection. 

596 By default, all known content types are accepted. 

597 :type kinds: 

598 :py:class:`list` of :py:class:`str` 

599 

600 :param format: 

601 File format identifier or ``'detect'`` to enable auto-detection 

602 (available: %(file_formats)s). 

603 :type format: 

604 str 

605 

606 :param regex: 

607 If not ``None``, files are only included if their paths match the 

608 given regular expression pattern. 

609 :type format: 

610 str 

611 

612 :param check: 

613 If ``True``, all file modification times are checked to see if 

614 cached information has to be updated (slow). If ``False``, only 

615 previously unknown files are indexed and cached information is used 

616 for known files, regardless of file state (fast, corrresponds to 

617 Squirrel's ``--optimistic`` mode). File deletions will go 

618 undetected in the latter case. 

619 :type check: 

620 bool 

621 

622 :Complexity: 

623 O(log N) 

624 ''' 

625 

626 if isinstance(kinds, str): 

627 kinds = (kinds,) 

628 

629 if isinstance(paths, str): 

630 paths = [paths] 

631 

632 kind_mask = model.to_kind_mask(kinds) 

633 

634 with progress.view(progress_viewer): 

635 Selection.add( 

636 self, util.iter_select_files( 

637 paths, 

638 show_progress=False, 

639 regex=regex, 

640 pass_through=lambda path: path.startswith('virtual:') 

641 ), kind_mask, format) 

642 

643 self._load(check) 

644 self._update_nuts() 

645 

646 def reload(self): 

647 ''' 

648 Check for modifications and reindex modified files. 

649 

650 Based on file modification times. 

651 ''' 

652 

653 self._set_file_states_force_check() 

654 self._load(check=True) 

655 self._update_nuts() 

656 

657 def add_virtual(self, nuts, virtual_paths=None): 

658 ''' 

659 Add content which is not backed by files. 

660 

661 :param nuts: 

662 Content pieces to be added. 

663 :type nuts: 

664 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

665 

666 :param virtual_paths: 

667 List of virtual paths to prevent creating a temporary list of the 

668 nuts while aggregating the file paths for the selection. 

669 :type virtual_paths: 

670 :py:class:`list` of :py:class:`str` 

671 

672 Stores to the main database and the selection. 

673 ''' 

674 

675 if isinstance(virtual_paths, str): 

676 virtual_paths = [virtual_paths] 

677 

678 if virtual_paths is None: 

679 if not isinstance(nuts, list): 

680 nuts = list(nuts) 

681 virtual_paths = set(nut.file_path for nut in nuts) 

682 

683 Selection.add(self, virtual_paths) 

684 self.get_database().dig(nuts) 

685 self._update_nuts() 

686 

687 def add_volatile(self, nuts): 

688 if not isinstance(nuts, list): 

689 nuts = list(nuts) 

690 

691 paths = list(set(nut.file_path for nut in nuts)) 

692 io.backends.virtual.add_nuts(nuts) 

693 self.add_virtual(nuts, paths) 

694 self._volatile_paths.extend(paths) 

695 

696 def add_volatile_waveforms(self, traces): 

697 ''' 

698 Add in-memory waveforms which will be removed when the app closes. 

699 ''' 

700 

701 name = model.random_name() 

702 

703 path = 'virtual:volatile:%s' % name 

704 

705 nuts = [] 

706 for itr, tr in enumerate(traces): 

707 assert tr.tmin <= tr.tmax 

708 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

709 tmax_seconds, tmax_offset = model.tsplit(tr.tmax) 

710 nuts.append(model.Nut( 

711 file_path=path, 

712 file_format='virtual', 

713 file_segment=itr, 

714 file_element=0, 

715 codes=separator.join(tr.codes), 

716 tmin_seconds=tmin_seconds, 

717 tmin_offset=tmin_offset, 

718 tmax_seconds=tmax_seconds, 

719 tmax_offset=tmax_offset, 

720 deltat=tr.deltat, 

721 kind_id=to_kind_id('waveform'), 

722 content=tr)) 

723 

724 self.add_volatile(nuts) 

725 return path 

726 

727 def _load(self, check): 

728 for _ in io.iload( 

729 self, 

730 content=[], 

731 skip_unchanged=True, 

732 check=check): 

733 pass 

734 

735 def _update_nuts(self): 

736 transaction = self.transaction() 

737 with make_task('Aggregating selection') as task, \ 

738 transaction as cursor: 

739 

740 self._conn.set_progress_handler(task.update, 100000) 

741 nrows = cursor.execute(self._sql( 

742 ''' 

743 INSERT INTO %(db)s.%(nuts)s 

744 SELECT NULL, 

745 nuts.file_id, nuts.file_segment, nuts.file_element, 

746 nuts.kind_id, nuts.kind_codes_id, 

747 nuts.tmin_seconds, nuts.tmin_offset, 

748 nuts.tmax_seconds, nuts.tmax_offset, 

749 nuts.kscale 

750 FROM %(db)s.%(file_states)s 

751 INNER JOIN nuts 

752 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

753 INNER JOIN kind_codes 

754 ON nuts.kind_codes_id == 

755 kind_codes.kind_codes_id 

756 WHERE %(db)s.%(file_states)s.file_state != 2 

757 AND (((1 << kind_codes.kind_id) 

758 & %(db)s.%(file_states)s.kind_mask) != 0) 

759 ''')).rowcount 

760 

761 task.update(nrows) 

762 self._set_file_states_known(transaction) 

763 self._conn.set_progress_handler(None, 0) 

764 

765 def add_source(self, source, check=True, progress_viewer='terminal'): 

766 ''' 

767 Add remote resource. 

768 

769 :param source: 

770 Remote data access client instance. 

771 :type source: 

772 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

773 ''' 

774 

775 self._sources.append(source) 

776 source.setup(self, check=check, progress_viewer=progress_viewer) 

777 

778 def add_fdsn(self, *args, **kwargs): 

779 ''' 

780 Add FDSN site for transparent remote data access. 

781 

782 Arguments are passed to 

783 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

784 ''' 

785 

786 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

787 

788 def add_catalog(self, *args, **kwargs): 

789 ''' 

790 Add online catalog for transparent event data access. 

791 

792 Arguments are passed to 

793 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

794 ''' 

795 

796 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

797 

798 def add_dataset( 

799 self, ds, check=True, progress_viewer='terminal', 

800 warn_persistent=True): 

801 

802 ''' 

803 Read dataset description from file and add its contents. 

804 

805 :param ds: 

806 Path to dataset description file or dataset description object 

807 . See :py:mod:`~pyrocko.squirrel.dataset`. 

808 :type ds: 

809 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

810 

811 :param check: 

812 If ``True``, all file modification times are checked to see if 

813 cached information has to be updated (slow). If ``False``, only 

814 previously unknown files are indexed and cached information is used 

815 for known files, regardless of file state (fast, corrresponds to 

816 Squirrel's ``--optimistic`` mode). File deletions will go 

817 undetected in the latter case. 

818 :type check: 

819 bool 

820 ''' 

821 if isinstance(ds, str): 

822 ds = dataset.read_dataset(ds) 

823 path = ds 

824 else: 

825 path = None 

826 

827 if warn_persistent and ds.persistent and ( 

828 not self._persistent or (self._persistent != ds.persistent)): 

829 

830 logger.warning( 

831 'Dataset `persistent` flag ignored. Can not be set on already ' 

832 'existing Squirrel instance.%s' % ( 

833 ' Dataset: %s' % path if path else '')) 

834 

835 ds.setup(self, check=check, progress_viewer=progress_viewer) 

836 

837 def _get_selection_args( 

838 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

839 

840 if time is not None: 

841 tmin = time 

842 tmax = time 

843 

844 if obj is not None: 

845 tmin = tmin if tmin is not None else obj.tmin 

846 tmax = tmax if tmax is not None else obj.tmax 

847 codes = codes if codes is not None else codes_inflate2(obj.codes) 

848 

849 if isinstance(codes, str): 

850 codes = tuple(codes.split('.')) 

851 

852 return tmin, tmax, codes 

853 

854 def _selection_args_to_kwargs( 

855 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

856 

857 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

858 

859 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

860 

861 tmin_seconds, tmin_offset = model.tsplit(tmin) 

862 tmax_seconds, tmax_offset = model.tsplit(tmax) 

863 if naiv: 

864 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

865 args.append(tmax_seconds) 

866 else: 

867 tscale_edges = model.tscale_edges 

868 tmin_cond = [] 

869 for kscale in range(tscale_edges.size + 1): 

870 if kscale != tscale_edges.size: 

871 tscale = int(tscale_edges[kscale]) 

872 tmin_cond.append(''' 

873 (%(db)s.%(nuts)s.kind_id = ? 

874 AND %(db)s.%(nuts)s.kscale == ? 

875 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

876 ''') 

877 args.extend( 

878 (to_kind_id(kind), kscale, 

879 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

880 

881 else: 

882 tmin_cond.append(''' 

883 (%(db)s.%(nuts)s.kind_id == ? 

884 AND %(db)s.%(nuts)s.kscale == ? 

885 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

886 ''') 

887 

888 args.extend( 

889 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

890 if tmin_cond: 

891 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

892 

893 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

894 args.append(tmin_seconds) 

895 

896 def iter_nuts( 

897 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

898 kind_codes_ids=None, path=None): 

899 

900 ''' 

901 Iterate over content entities matching given constraints. 

902 

903 :param kind: 

904 Content kind (or kinds) to extract. 

905 :type kind: 

906 :py:class:`str`, :py:class:`list` of :py:class:`str` 

907 

908 :param tmin: 

909 Start time of query interval. 

910 :type tmin: 

911 timestamp 

912 

913 :param tmax: 

914 End time of query interval. 

915 :type tmax: 

916 timestamp 

917 

918 :param codes: 

919 Pattern of content codes to query. 

920 :type codes: 

921 :py:class:`tuple` of :py:class:`str` 

922 

923 :param naiv: 

924 Bypass time span lookup through indices (slow, for testing). 

925 :type naiv: 

926 :py:class:`bool` 

927 

928 :param kind_codes_ids: 

929 Kind-codes IDs of contents to be retrieved (internal use). 

930 :type kind_codes_ids: 

931 :py:class:`list` of :py:class:`str` 

932 

933 :yields: 

934 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

935 intersecting content. 

936 

937 :complexity: 

938 O(log N) for the time selection part due to heavy use of database 

939 indices. 

940 

941 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

942 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

943 closed-interval so that content intersecting with the time instant ``t 

944 = tmin = tmax`` is returned (otherwise nothing would be returned as 

945 ``[t, t)`` never matches anything). 

946 

947 Time spans of content entities to be matched are also treated as half 

948 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

949 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

950 modified to closed-interval when the content time span is an empty 

951 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

952 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

953 ''' 

954 

955 if not isinstance(kind, str): 

956 if kind is None: 

957 kind = model.g_content_kinds 

958 for kind_ in kind: 

959 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

960 yield nut 

961 

962 return 

963 

964 cond = [] 

965 args = [] 

966 if tmin is not None or tmax is not None: 

967 assert kind is not None 

968 if tmin is None: 

969 tmin = self.get_time_span()[0] 

970 if tmax is None: 

971 tmax = self.get_time_span()[1] + 1.0 

972 

973 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

974 

975 elif kind is not None: 

976 cond.append('kind_codes.kind_id == ?') 

977 args.append(to_kind_id(kind)) 

978 

979 if codes is not None: 

980 pats = codes_patterns_for_kind(kind, codes) 

981 if pats: 

982 cond.append( 

983 ' ( %s ) ' % ' OR '.join( 

984 ('kind_codes.codes GLOB ?',) * len(pats))) 

985 args.extend(separator.join(pat) for pat in pats) 

986 

987 if kind_codes_ids is not None: 

988 cond.append( 

989 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

990 '?'*len(kind_codes_ids))) 

991 

992 args.extend(kind_codes_ids) 

993 

994 db = self.get_database() 

995 if path is not None: 

996 cond.append('files.path == ?') 

997 args.append(db.relpath(abspath(path))) 

998 

999 sql = (''' 

1000 SELECT 

1001 files.path, 

1002 files.format, 

1003 files.mtime, 

1004 files.size, 

1005 %(db)s.%(nuts)s.file_segment, 

1006 %(db)s.%(nuts)s.file_element, 

1007 kind_codes.kind_id, 

1008 kind_codes.codes, 

1009 %(db)s.%(nuts)s.tmin_seconds, 

1010 %(db)s.%(nuts)s.tmin_offset, 

1011 %(db)s.%(nuts)s.tmax_seconds, 

1012 %(db)s.%(nuts)s.tmax_offset, 

1013 kind_codes.deltat 

1014 FROM files 

1015 INNER JOIN %(db)s.%(nuts)s 

1016 ON files.file_id == %(db)s.%(nuts)s.file_id 

1017 INNER JOIN kind_codes 

1018 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1019 ''') 

1020 

1021 if cond: 

1022 sql += ''' WHERE ''' + ' AND '.join(cond) 

1023 

1024 sql = self._sql(sql) 

1025 if tmin is None and tmax is None: 

1026 for row in self._conn.execute(sql, args): 

1027 row = (db.abspath(row[0]),) + row[1:] 

1028 nut = model.Nut(values_nocheck=row) 

1029 yield nut 

1030 else: 

1031 assert tmin is not None and tmax is not None 

1032 if tmin == tmax: 

1033 for row in self._conn.execute(sql, args): 

1034 row = (db.abspath(row[0]),) + row[1:] 

1035 nut = model.Nut(values_nocheck=row) 

1036 if (nut.tmin <= tmin < nut.tmax) \ 

1037 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1038 

1039 yield nut 

1040 else: 

1041 for row in self._conn.execute(sql, args): 

1042 row = (db.abspath(row[0]),) + row[1:] 

1043 nut = model.Nut(values_nocheck=row) 

1044 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1045 or (nut.tmin == nut.tmax 

1046 and tmin <= nut.tmin < tmax): 

1047 

1048 yield nut 

1049 

1050 def get_nuts(self, *args, **kwargs): 

1051 ''' 

1052 Get content entities matching given constraints. 

1053 

1054 Like :py:meth:`iter_nuts` but returns results as a list. 

1055 ''' 

1056 

1057 return list(self.iter_nuts(*args, **kwargs)) 

1058 

1059 def _split_nuts( 

1060 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1061 

1062 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1063 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1064 

1065 names_main_nuts = dict(self._names) 

1066 names_main_nuts.update(db='main', nuts='nuts') 

1067 

1068 db = self.get_database() 

1069 

1070 def main_nuts(s): 

1071 return s % names_main_nuts 

1072 

1073 with self.transaction() as cursor: 

1074 # modify selection and main 

1075 for sql_subst in [ 

1076 self._sql, main_nuts]: 

1077 

1078 cond = [] 

1079 args = [] 

1080 

1081 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1082 

1083 if codes is not None: 

1084 pats = codes_patterns_for_kind(kind, codes) 

1085 if pats: 

1086 cond.append( 

1087 ' ( %s ) ' % ' OR '.join( 

1088 ('kind_codes.codes GLOB ?',) * len(pats))) 

1089 args.extend(separator.join(pat) for pat in pats) 

1090 

1091 if path is not None: 

1092 cond.append('files.path == ?') 

1093 args.append(db.relpath(abspath(path))) 

1094 

1095 sql = sql_subst(''' 

1096 SELECT 

1097 %(db)s.%(nuts)s.nut_id, 

1098 %(db)s.%(nuts)s.tmin_seconds, 

1099 %(db)s.%(nuts)s.tmin_offset, 

1100 %(db)s.%(nuts)s.tmax_seconds, 

1101 %(db)s.%(nuts)s.tmax_offset, 

1102 kind_codes.deltat 

1103 FROM files 

1104 INNER JOIN %(db)s.%(nuts)s 

1105 ON files.file_id == %(db)s.%(nuts)s.file_id 

1106 INNER JOIN kind_codes 

1107 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1108 WHERE ''' + ' AND '.join(cond)) # noqa 

1109 

1110 insert = [] 

1111 delete = [] 

1112 for row in cursor.execute(sql, args): 

1113 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1114 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1115 

1116 nut_tmin = model.tjoin( 

1117 nut_tmin_seconds, nut_tmin_offset) 

1118 nut_tmax = model.tjoin( 

1119 nut_tmax_seconds, nut_tmax_offset) 

1120 

1121 if nut_tmin < tmax and tmin < nut_tmax: 

1122 if nut_tmin < tmin: 

1123 insert.append(( 

1124 nut_tmin_seconds, nut_tmin_offset, 

1125 tmin_seconds, tmin_offset, 

1126 model.tscale_to_kscale( 

1127 tmin_seconds - nut_tmin_seconds), 

1128 nut_id)) 

1129 

1130 if tmax < nut_tmax: 

1131 insert.append(( 

1132 tmax_seconds, tmax_offset, 

1133 nut_tmax_seconds, nut_tmax_offset, 

1134 model.tscale_to_kscale( 

1135 nut_tmax_seconds - tmax_seconds), 

1136 nut_id)) 

1137 

1138 delete.append((nut_id,)) 

1139 

1140 sql_add = ''' 

1141 INSERT INTO %(db)s.%(nuts)s ( 

1142 file_id, file_segment, file_element, kind_id, 

1143 kind_codes_id, tmin_seconds, tmin_offset, 

1144 tmax_seconds, tmax_offset, kscale ) 

1145 SELECT 

1146 file_id, file_segment, file_element, 

1147 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1148 FROM %(db)s.%(nuts)s 

1149 WHERE nut_id == ? 

1150 ''' 

1151 cursor.executemany(sql_subst(sql_add), insert) 

1152 

1153 sql_delete = ''' 

1154 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1155 ''' 

1156 cursor.executemany(sql_subst(sql_delete), delete) 

1157 

1158 def get_time_span(self, kinds=None): 

1159 ''' 

1160 Get time interval over all content in selection. 

1161 

1162 :complexity: 

1163 O(1), independent of the number of nuts. 

1164 

1165 :returns: (tmin, tmax) 

1166 ''' 

1167 

1168 sql_min = self._sql(''' 

1169 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1170 FROM %(db)s.%(nuts)s 

1171 WHERE kind_id == ? 

1172 AND tmin_seconds == ( 

1173 SELECT MIN(tmin_seconds) 

1174 FROM %(db)s.%(nuts)s 

1175 WHERE kind_id == ?) 

1176 ''') 

1177 

1178 sql_max = self._sql(''' 

1179 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1180 FROM %(db)s.%(nuts)s 

1181 WHERE kind_id == ? 

1182 AND tmax_seconds == ( 

1183 SELECT MAX(tmax_seconds) 

1184 FROM %(db)s.%(nuts)s 

1185 WHERE kind_id == ?) 

1186 ''') 

1187 

1188 gtmin = None 

1189 gtmax = None 

1190 

1191 if isinstance(kinds, str): 

1192 kinds = [kinds] 

1193 

1194 if kinds is None: 

1195 kind_ids = model.g_content_kind_ids 

1196 else: 

1197 kind_ids = model.to_kind_ids(kinds) 

1198 

1199 for kind_id in kind_ids: 

1200 for tmin_seconds, tmin_offset in self._conn.execute( 

1201 sql_min, (kind_id, kind_id)): 

1202 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1203 if tmin is not None and (gtmin is None or tmin < gtmin): 

1204 gtmin = tmin 

1205 

1206 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1207 sql_max, (kind_id, kind_id)): 

1208 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1209 if tmax is not None and (gtmax is None or tmax > gtmax): 

1210 gtmax = tmax 

1211 

1212 return gtmin, gtmax 

1213 

1214 def get_deltat_span(self, kind): 

1215 ''' 

1216 Get min and max sampling interval of all content of given kind. 

1217 

1218 :param kind: 

1219 Content kind 

1220 :type kind: 

1221 str 

1222 

1223 :returns: (deltat_min, deltat_max) 

1224 ''' 

1225 

1226 deltats = [ 

1227 deltat for deltat in self.get_deltats(kind) 

1228 if deltat is not None] 

1229 

1230 if deltats: 

1231 return min(deltats), max(deltats) 

1232 else: 

1233 return None, None 

1234 

1235 def iter_kinds(self, codes=None): 

1236 ''' 

1237 Iterate over content types available in selection. 

1238 

1239 :param codes: 

1240 If given, get kinds only for selected codes identifier. 

1241 :type codes: 

1242 :py:class:`tuple` of :py:class:`str` 

1243 

1244 :yields: 

1245 Available content kinds as :py:class:`str`. 

1246 

1247 :complexity: 

1248 O(1), independent of number of nuts. 

1249 ''' 

1250 

1251 return self._database._iter_kinds( 

1252 codes=codes, 

1253 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1254 

1255 def iter_deltats(self, kind=None): 

1256 ''' 

1257 Iterate over sampling intervals available in selection. 

1258 

1259 :param kind: 

1260 If given, get sampling intervals only for a given content type. 

1261 :type kind: 

1262 str 

1263 

1264 :yields: 

1265 :py:class:`float` values. 

1266 

1267 :complexity: 

1268 O(1), independent of number of nuts. 

1269 ''' 

1270 return self._database._iter_deltats( 

1271 kind=kind, 

1272 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1273 

1274 def iter_codes(self, kind=None): 

1275 ''' 

1276 Iterate over content identifier code sequences available in selection. 

1277 

1278 :param kind: 

1279 If given, get codes only for a given content type. 

1280 :type kind: 

1281 str 

1282 

1283 :yields: 

1284 :py:class:`tuple` of :py:class:`str` 

1285 

1286 :complexity: 

1287 O(1), independent of number of nuts. 

1288 ''' 

1289 return self._database._iter_codes( 

1290 kind=kind, 

1291 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1292 

1293 def iter_counts(self, kind=None): 

1294 ''' 

1295 Iterate over number of occurrences of any (kind, codes) combination. 

1296 

1297 :param kind: 

1298 If given, get counts only for selected content type. 

1299 :type kind: 

1300 str 

1301 

1302 :yields: 

1303 Tuples of the form ``((kind, codes), count)``. 

1304 

1305 :complexity: 

1306 O(1), independent of number of nuts. 

1307 ''' 

1308 return self._database._iter_counts( 

1309 kind=kind, 

1310 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1311 

1312 def get_kinds(self, codes=None): 

1313 ''' 

1314 Get content types available in selection. 

1315 

1316 :param codes: 

1317 If given, get kinds only for selected codes identifier. 

1318 :type codes: 

1319 :py:class:`tuple` of :py:class:`str` 

1320 

1321 :returns: 

1322 Sorted list of available content types. 

1323 

1324 :complexity: 

1325 O(1), independent of number of nuts. 

1326 

1327 ''' 

1328 return sorted(list(self.iter_kinds(codes=codes))) 

1329 

1330 def get_deltats(self, kind=None): 

1331 ''' 

1332 Get sampling intervals available in selection. 

1333 

1334 :param kind: 

1335 If given, get codes only for selected content type. 

1336 :type kind: 

1337 str 

1338 

1339 :complexity: 

1340 O(1), independent of number of nuts. 

1341 

1342 :returns: sorted list of available sampling intervals 

1343 ''' 

1344 return sorted(list(self.iter_deltats(kind=kind))) 

1345 

1346 def get_codes(self, kind=None): 

1347 ''' 

1348 Get identifier code sequences available in selection. 

1349 

1350 :param kind: 

1351 If given, get codes only for selected content type. 

1352 :type kind: 

1353 str 

1354 

1355 :complexity: 

1356 O(1), independent of number of nuts. 

1357 

1358 :returns: sorted list of available codes as tuples of strings 

1359 ''' 

1360 return sorted(list(self.iter_codes(kind=kind))) 

1361 

1362 def get_counts(self, kind=None): 

1363 ''' 

1364 Get number of occurrences of any (kind, codes) combination. 

1365 

1366 :param kind: 

1367 If given, get codes only for selected content type. 

1368 :type kind: 

1369 str 

1370 

1371 :complexity: 

1372 O(1), independent of number of nuts. 

1373 

1374 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1375 if kind is not ``None`` 

1376 ''' 

1377 d = {} 

1378 for (k, codes, deltat), count in self.iter_counts(): 

1379 if k not in d: 

1380 v = d[k] = {} 

1381 else: 

1382 v = d[k] 

1383 

1384 if codes not in v: 

1385 v[codes] = 0 

1386 

1387 v[codes] += count 

1388 

1389 if kind is not None: 

1390 return d[kind] 

1391 else: 

1392 return d 

1393 

1394 def glob_codes(self, kind, codes_list): 

1395 ''' 

1396 Find codes matching given patterns. 

1397 

1398 :param kind: 

1399 Content kind to be queried. 

1400 :type kind: 

1401 str 

1402 

1403 :param codes_list: 

1404 List of code patterns to query. If not given or empty, an empty 

1405 list is returned. 

1406 :type codes_list: 

1407 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

1408 

1409 :returns: 

1410 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1411 ''' 

1412 

1413 args = [to_kind_id(kind)] 

1414 pats = [] 

1415 for codes in codes_list: 

1416 pats.extend(codes_patterns_for_kind(kind, codes)) 

1417 

1418 codes_cond = ' ( %s ) ' % ' OR '.join( 

1419 ('kind_codes.codes GLOB ?',) * len(pats)) 

1420 

1421 args.extend(separator.join(pat) for pat in pats) 

1422 

1423 sql = self._sql(''' 

1424 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1425 WHERE 

1426 kind_id == ? 

1427 AND ''' + codes_cond) 

1428 

1429 return list(map(list, self._conn.execute(sql, args))) 

1430 

1431 def update(self, constraint=None, **kwargs): 

1432 ''' 

1433 Update or partially update channel and event inventories. 

1434 

1435 :param constraint: 

1436 Selection of times or areas to be brought up to date. 

1437 :type constraint: 

1438 :py:class:`~pyrocko.squirrel.client.Constraint` 

1439 

1440 :param \\*\\*kwargs: 

1441 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1442 

1443 This function triggers all attached remote sources, to check for 

1444 updates in the meta-data. The sources will only submit queries when 

1445 their expiration date has passed, or if the selection spans into 

1446 previously unseen times or areas. 

1447 ''' 

1448 

1449 if constraint is None: 

1450 constraint = client.Constraint(**kwargs) 

1451 

1452 for source in self._sources: 

1453 source.update_channel_inventory(self, constraint) 

1454 source.update_event_inventory(self, constraint) 

1455 

1456 def update_waveform_promises(self, constraint=None, **kwargs): 

1457 ''' 

1458 Permit downloading of remote waveforms. 

1459 

1460 :param constraint: 

1461 Remote waveforms compatible with the given constraint are enabled 

1462 for download. 

1463 :type constraint: 

1464 :py:class:`~pyrocko.squirrel.client.Constraint` 

1465 

1466 :param \\*\\*kwargs: 

1467 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1468 

1469 Calling this method permits Squirrel to download waveforms from remote 

1470 sources when processing subsequent waveform requests. This works by 

1471 inserting so called waveform promises into the database. It will look 

1472 into the available channels for each remote source and create a promise 

1473 for each channel compatible with the given constraint. If the promise 

1474 then matches in a waveform request, Squirrel tries to download the 

1475 waveform. If the download is successful, the downloaded waveform is 

1476 added to the Squirrel and the promise is deleted. If the download 

1477 fails, the promise is kept if the reason of failure looks like being 

1478 temporary, e.g. because of a network failure. If the cause of failure 

1479 however seems to be permanent, the promise is deleted so that no 

1480 further attempts are made to download a waveform which might not be 

1481 available from that server at all. To force re-scheduling after a 

1482 permanent failure, call :py:meth:`update_waveform_promises` 

1483 yet another time. 

1484 ''' 

1485 

1486 if constraint is None: 

1487 constraint = client.Constraint(**kwargs) 

1488 

1489 # TODO 

1490 print('contraint ignored atm') 

1491 

1492 for source in self._sources: 

1493 source.update_waveform_promises(self) 

1494 

1495 def update_responses(self, constraint=None, **kwargs): 

1496 # TODO 

1497 if constraint is None: 

1498 constraint = client.Constraint(**kwargs) 

1499 

1500 print('contraint ignored atm') 

1501 for source in self._sources: 

1502 source.update_response_inventory(self, constraint) 

1503 

1504 def get_nfiles(self): 

1505 ''' 

1506 Get number of files in selection. 

1507 ''' 

1508 

1509 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1510 for row in self._conn.execute(sql): 

1511 return row[0] 

1512 

1513 def get_nnuts(self): 

1514 ''' 

1515 Get number of nuts in selection. 

1516 ''' 

1517 

1518 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1519 for row in self._conn.execute(sql): 

1520 return row[0] 

1521 

1522 def get_total_size(self): 

1523 ''' 

1524 Get aggregated file size available in selection. 

1525 ''' 

1526 

1527 sql = self._sql(''' 

1528 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1529 INNER JOIN files 

1530 ON %(db)s.%(file_states)s.file_id = files.file_id 

1531 ''') 

1532 

1533 for row in self._conn.execute(sql): 

1534 return row[0] or 0 

1535 

1536 def get_stats(self): 

1537 ''' 

1538 Get statistics on contents available through this selection. 

1539 ''' 

1540 

1541 kinds = self.get_kinds() 

1542 time_spans = {} 

1543 for kind in kinds: 

1544 time_spans[kind] = self.get_time_span([kind]) 

1545 

1546 return SquirrelStats( 

1547 nfiles=self.get_nfiles(), 

1548 nnuts=self.get_nnuts(), 

1549 kinds=kinds, 

1550 codes=self.get_codes(), 

1551 total_size=self.get_total_size(), 

1552 counts=self.get_counts(), 

1553 time_spans=time_spans, 

1554 sources=[s.describe() for s in self._sources], 

1555 operators=[op.describe() for op in self._operators]) 

1556 

1557 def get_content( 

1558 self, 

1559 nut, 

1560 cache_id='default', 

1561 accessor_id='default', 

1562 show_progress=False): 

1563 

1564 ''' 

1565 Get and possibly load full content for a given index entry from file. 

1566 

1567 Loads the actual content objects (channel, station, waveform, ...) from 

1568 file. For efficiency sibling content (all stuff in the same file 

1569 segment) will also be loaded as a side effect. The loaded contents are 

1570 cached in the Squirrel object. 

1571 ''' 

1572 

1573 content_cache = self._content_caches[cache_id] 

1574 if not content_cache.has(nut): 

1575 

1576 for nut_loaded in io.iload( 

1577 nut.file_path, 

1578 segment=nut.file_segment, 

1579 format=nut.file_format, 

1580 database=self._database, 

1581 show_progress=show_progress): 

1582 

1583 content_cache.put(nut_loaded) 

1584 

1585 try: 

1586 return content_cache.get(nut, accessor_id) 

1587 except KeyError: 

1588 raise error.NotAvailable( 

1589 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1590 

1591 def advance_accessor(self, accessor_id, cache_id=None): 

1592 ''' 

1593 Notify memory caches about consumer moving to a new data batch. 

1594 

1595 :param accessor_id: 

1596 Name of accessing consumer to be advanced. 

1597 :type accessor_id: 

1598 str 

1599 

1600 :param cache_id: 

1601 Name of cache to for which the accessor should be advanced. By 

1602 default the named accessor is advanced in all registered caches. 

1603 By default, two caches named ``'default'`` and ``'waveforms'`` are 

1604 available. 

1605 :type cache_id: 

1606 str 

1607 

1608 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1609 Squirrel's memory caching works and can be tuned. Default behaviour is 

1610 to release data when it has not been used in the latest data 

1611 window/batch. If the accessor is never advanced, data is cached 

1612 indefinitely - which is often desired e.g. for station meta-data. 

1613 Methods for consecutive data traversal, like 

1614 :py:meth:`chopper_waveforms` automatically advance and clear 

1615 their accessor. 

1616 ''' 

1617 for cache_ in ( 

1618 self._content_caches.keys() 

1619 if cache_id is None 

1620 else [cache_id]): 

1621 

1622 self._content_caches[cache_].advance_accessor(accessor_id) 

1623 

1624 def clear_accessor(self, accessor_id, cache_id=None): 

1625 ''' 

1626 Notify memory caches about a consumer having finished. 

1627 

1628 :param accessor_id: 

1629 Name of accessor to be cleared. 

1630 :type accessor_id: 

1631 str 

1632 

1633 :param cache_id: 

1634 Name of cache to for which the accessor should be cleared. By 

1635 default the named accessor is cleared from all registered caches. 

1636 By default, two caches named ``'default'`` and ``'waveforms'`` are 

1637 available. 

1638 :type cache_id: 

1639 str 

1640 

1641 Calling this method clears all references to cache entries held by the 

1642 named accessor. Cache entries are then freed if not referenced by any 

1643 other accessor. 

1644 ''' 

1645 

1646 for cache_ in ( 

1647 self._content_caches.keys() 

1648 if cache_id is None 

1649 else [cache_id]): 

1650 

1651 self._content_caches[cache_].clear_accessor(accessor_id) 

1652 

1653 def _check_duplicates(self, nuts): 

1654 d = defaultdict(list) 

1655 for nut in nuts: 

1656 d[nut.codes].append(nut) 

1657 

1658 for codes, group in d.items(): 

1659 if len(group) > 1: 

1660 logger.warning( 

1661 'Multiple entries matching codes: %s' 

1662 % '.'.join(codes.split(separator))) 

1663 

1664 @filldocs 

1665 def get_stations( 

1666 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1667 model='squirrel'): 

1668 

1669 ''' 

1670 Get stations matching given constraints. 

1671 

1672 %(query_args)s 

1673 

1674 :param model: 

1675 Select object model for returned values: ``'squirrel'`` to get 

1676 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1677 objects with channel information attached. 

1678 :type model: 

1679 str 

1680 

1681 :returns: 

1682 List of :py:class:`pyrocko.squirrel.Station 

1683 <pyrocko.squirrel.model.Station>` objects by default or list of 

1684 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1685 objects if ``model='pyrocko'`` is requested. 

1686 

1687 See :py:meth:`iter_nuts` for details on time span matching. 

1688 ''' 

1689 

1690 if model == 'pyrocko': 

1691 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1692 elif model == 'squirrel': 

1693 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1694 nuts = sorted( 

1695 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1696 self._check_duplicates(nuts) 

1697 return [self.get_content(nut) for nut in nuts] 

1698 else: 

1699 raise ValueError('Invalid station model: %s' % model) 

1700 

1701 @filldocs 

1702 def get_channels( 

1703 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1704 

1705 ''' 

1706 Get channels matching given constraints. 

1707 

1708 %(query_args)s 

1709 

1710 :returns: 

1711 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1712 

1713 See :py:meth:`iter_nuts` for details on time span matching. 

1714 ''' 

1715 

1716 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1717 nuts = sorted( 

1718 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1719 self._check_duplicates(nuts) 

1720 return [self.get_content(nut) for nut in nuts] 

1721 

1722 @filldocs 

1723 def get_sensors( 

1724 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1725 

1726 ''' 

1727 Get sensors matching given constraints. 

1728 

1729 %(query_args)s 

1730 

1731 :returns: 

1732 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1733 

1734 See :py:meth:`iter_nuts` for details on time span matching. 

1735 ''' 

1736 

1737 tmin, tmax, codes = self._get_selection_args( 

1738 obj, tmin, tmax, time, codes) 

1739 

1740 if codes is not None: 

1741 if isinstance(codes, str): 

1742 codes = codes.split('.') 

1743 codes = tuple(codes_inflate(codes)) 

1744 if codes[4] != '*': 

1745 codes = codes[:4] + (codes[4][:-1] + '?',) + codes[5:] 

1746 

1747 nuts = sorted( 

1748 self.iter_nuts( 

1749 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1750 self._check_duplicates(nuts) 

1751 return model.Sensor.from_channels( 

1752 self.get_content(nut) for nut in nuts) 

1753 

1754 @filldocs 

1755 def get_responses( 

1756 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1757 

1758 ''' 

1759 Get instrument responses matching given constraints. 

1760 

1761 %(query_args)s 

1762 

1763 :returns: 

1764 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1765 

1766 See :py:meth:`iter_nuts` for details on time span matching. 

1767 ''' 

1768 

1769 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1770 nuts = sorted( 

1771 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1772 self._check_duplicates(nuts) 

1773 return [self.get_content(nut) for nut in nuts] 

1774 

1775 @filldocs 

1776 def get_response( 

1777 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1778 

1779 ''' 

1780 Get instrument response matching given constraints. 

1781 

1782 %(query_args)s 

1783 

1784 :returns: 

1785 :py:class:`~pyrocko.squirrel.model.Response` object. 

1786 

1787 Same as :py:meth:`get_responses` but returning exactly one response. 

1788 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1789 than one is available. 

1790 

1791 See :py:meth:`iter_nuts` for details on time span matching. 

1792 ''' 

1793 

1794 responses = self.get_responses(obj, tmin, tmax, time, codes) 

1795 if len(responses) == 0: 

1796 raise error.NotAvailable( 

1797 'No instrument response available.') 

1798 elif len(responses) > 1: 

1799 raise error.NotAvailable( 

1800 'Multiple instrument responses matching given constraints.') 

1801 

1802 return responses[0] 

1803 

1804 @filldocs 

1805 def get_events( 

1806 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1807 

1808 ''' 

1809 Get events matching given constraints. 

1810 

1811 %(query_args)s 

1812 

1813 :returns: 

1814 List of :py:class:`~pyrocko.model.event.Event` objects. 

1815 

1816 See :py:meth:`iter_nuts` for details on time span matching. 

1817 ''' 

1818 

1819 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

1820 nuts = sorted( 

1821 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1822 self._check_duplicates(nuts) 

1823 return [self.get_content(nut) for nut in nuts] 

1824 

1825 def _redeem_promises(self, *args): 

1826 

1827 tmin, tmax, _ = args 

1828 

1829 waveforms = list(self.iter_nuts('waveform', *args)) 

1830 promises = list(self.iter_nuts('waveform_promise', *args)) 

1831 

1832 codes_to_avail = defaultdict(list) 

1833 for nut in waveforms: 

1834 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax+nut.deltat)) 

1835 

1836 def tts(x): 

1837 if isinstance(x, tuple): 

1838 return tuple(tts(e) for e in x) 

1839 elif isinstance(x, list): 

1840 return list(tts(e) for e in x) 

1841 else: 

1842 return util.time_to_str(x) 

1843 

1844 orders = [] 

1845 for promise in promises: 

1846 waveforms_avail = codes_to_avail[promise.codes] 

1847 for block_tmin, block_tmax in blocks( 

1848 max(tmin, promise.tmin), 

1849 min(tmax, promise.tmax), 

1850 promise.deltat): 

1851 

1852 orders.append( 

1853 WaveformOrder( 

1854 source_id=promise.file_path, 

1855 codes=tuple(promise.codes.split(separator)), 

1856 tmin=block_tmin, 

1857 tmax=block_tmax, 

1858 deltat=promise.deltat, 

1859 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1860 

1861 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1862 

1863 order_keys_noop = set(order_key(order) for order in orders_noop) 

1864 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1865 logger.info( 

1866 'Waveform orders already satisified with cached/local data: ' 

1867 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1868 

1869 source_ids = [] 

1870 sources = {} 

1871 for source in self._sources: 

1872 if isinstance(source, fdsn.FDSNSource): 

1873 source_ids.append(source._source_id) 

1874 sources[source._source_id] = source 

1875 

1876 source_priority = dict( 

1877 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

1878 

1879 order_groups = defaultdict(list) 

1880 for order in orders: 

1881 order_groups[order_key(order)].append(order) 

1882 

1883 for k, order_group in order_groups.items(): 

1884 order_group.sort( 

1885 key=lambda order: source_priority[order.source_id]) 

1886 

1887 n_order_groups = len(order_groups) 

1888 

1889 if len(order_groups) != 0 or len(orders) != 0: 

1890 logger.info( 

1891 'Waveform orders standing for download: %i (%i)' 

1892 % (len(order_groups), len(orders))) 

1893 

1894 task = make_task('Waveform orders processed', n_order_groups) 

1895 else: 

1896 task = None 

1897 

1898 def split_promise(order): 

1899 self._split_nuts( 

1900 'waveform_promise', 

1901 order.tmin, order.tmax, 

1902 codes=order.codes, 

1903 path=order.source_id) 

1904 

1905 def release_order_group(order): 

1906 okey = order_key(order) 

1907 for followup in order_groups[okey]: 

1908 split_promise(followup) 

1909 

1910 del order_groups[okey] 

1911 

1912 if task: 

1913 task.update(n_order_groups - len(order_groups)) 

1914 

1915 def noop(order): 

1916 pass 

1917 

1918 def success(order): 

1919 release_order_group(order) 

1920 split_promise(order) 

1921 

1922 def batch_add(paths): 

1923 self.add(paths) 

1924 

1925 calls = queue.Queue() 

1926 

1927 def enqueue(f): 

1928 def wrapper(*args): 

1929 calls.put((f, args)) 

1930 

1931 return wrapper 

1932 

1933 for order in orders_noop: 

1934 split_promise(order) 

1935 

1936 while order_groups: 

1937 

1938 orders_now = [] 

1939 empty = [] 

1940 for k, order_group in order_groups.items(): 

1941 try: 

1942 orders_now.append(order_group.pop(0)) 

1943 except IndexError: 

1944 empty.append(k) 

1945 

1946 for k in empty: 

1947 del order_groups[k] 

1948 

1949 by_source_id = defaultdict(list) 

1950 for order in orders_now: 

1951 by_source_id[order.source_id].append(order) 

1952 

1953 threads = [] 

1954 for source_id in by_source_id: 

1955 def download(): 

1956 try: 

1957 sources[source_id].download_waveforms( 

1958 by_source_id[source_id], 

1959 success=enqueue(success), 

1960 error_permanent=enqueue(split_promise), 

1961 error_temporary=noop, 

1962 batch_add=enqueue(batch_add)) 

1963 

1964 finally: 

1965 calls.put(None) 

1966 

1967 thread = threading.Thread(target=download) 

1968 thread.start() 

1969 threads.append(thread) 

1970 

1971 ndone = 0 

1972 while ndone < len(threads): 

1973 ret = calls.get() 

1974 if ret is None: 

1975 ndone += 1 

1976 else: 

1977 ret[0](*ret[1]) 

1978 

1979 for thread in threads: 

1980 thread.join() 

1981 

1982 if task: 

1983 task.update(n_order_groups - len(order_groups)) 

1984 

1985 if task: 

1986 task.done() 

1987 

1988 @filldocs 

1989 def get_waveform_nuts( 

1990 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1991 

1992 ''' 

1993 Get waveform content entities matching given constraints. 

1994 

1995 %(query_args)s 

1996 

1997 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

1998 resolves matching waveform promises (downloads waveforms from remote 

1999 sources). 

2000 

2001 See :py:meth:`iter_nuts` for details on time span matching. 

2002 ''' 

2003 

2004 args = self._get_selection_args(obj, tmin, tmax, time, codes) 

2005 self._redeem_promises(*args) 

2006 return sorted( 

2007 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2008 

2009 @filldocs 

2010 def get_waveforms( 

2011 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2012 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2013 maxlap=None, snap=None, include_last=False, load_data=True, 

2014 accessor_id='default', operator_params=None): 

2015 

2016 ''' 

2017 Get waveforms matching given constraints. 

2018 

2019 %(query_args)s 

2020 

2021 :param uncut: 

2022 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2023 and to disable degapping/deoverlapping. Returns untouched traces as 

2024 they are read from file segment. File segments are always read in 

2025 their entirety. 

2026 :type uncut: 

2027 bool 

2028 

2029 :param want_incomplete: 

2030 If ``True``, gappy/incomplete traces are included in the result. 

2031 :type want_incomplete: 

2032 bool 

2033 

2034 :param degap: 

2035 If ``True``, connect traces and remove gaps and overlaps. 

2036 :type degap: 

2037 bool 

2038 

2039 :param maxgap: 

2040 Maximum gap size in samples which is filled with interpolated 

2041 samples when ``degap`` is ``True``. 

2042 :type maxgap: 

2043 int 

2044 

2045 :param maxlap: 

2046 Maximum overlap size in samples which is removed when ``degap`` is 

2047 ``True`` 

2048 :type maxlap: 

2049 int 

2050 

2051 :param snap: 

2052 Rounding functions used when computing sample index from time 

2053 instance, for trace start and trace end, respectively. By default, 

2054 ``(round, round)`` is used. 

2055 :type snap: 

2056 tuple of 2 callables 

2057 

2058 :param include_last: 

2059 If ``True``, add one more sample to the returned traces (the sample 

2060 which would be the first sample of a query with ``tmin`` set to the 

2061 current value of ``tmax``). 

2062 :type include_last: 

2063 bool 

2064 

2065 :param load_data: 

2066 If ``True``, waveform data samples are read from files (or cache). 

2067 If ``False``, meta-information-only traces are returned (dummy 

2068 traces with no data samples). 

2069 :type load_data: 

2070 bool 

2071 

2072 :param accessor_id: 

2073 Name of consumer on who's behalf data is accessed. Used in cache 

2074 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2075 to distinguish different points of extraction for the decision of 

2076 when to release cached waveform data. Should be used when data is 

2077 alternately extracted from more than one region / selection. 

2078 :type accessor_id: 

2079 str 

2080 

2081 See :py:meth:`iter_nuts` for details on time span matching. 

2082 

2083 Loaded data is kept in memory (at least) until 

2084 :py:meth:`clear_accessor` has been called or 

2085 :py:meth:`advance_accessor` has been called two consecutive times 

2086 without data being accessed between the two calls (by this accessor). 

2087 Data may still be further kept in the memory cache if held alive by 

2088 consumers with a different ``accessor_id``. 

2089 ''' 

2090 

2091 tmin, tmax, codes = self._get_selection_args( 

2092 obj, tmin, tmax, time, codes) 

2093 

2094 self_tmin, self_tmax = self.get_time_span( 

2095 ['waveform', 'waveform_promise']) 

2096 

2097 if None in (self_tmin, self_tmax): 

2098 logger.warning( 

2099 'No waveforms available.') 

2100 return [] 

2101 

2102 tmin = tmin if tmin is not None else self_tmin 

2103 tmax = tmax if tmax is not None else self_tmax 

2104 

2105 if codes is not None: 

2106 operator = self.get_operator(codes) 

2107 if operator is not None: 

2108 return operator.get_waveforms( 

2109 self, codes, 

2110 tmin=tmin, tmax=tmax, 

2111 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2112 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2113 include_last=include_last, load_data=load_data, 

2114 accessor_id=accessor_id, params=operator_params) 

2115 

2116 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes) 

2117 

2118 if load_data: 

2119 traces = [ 

2120 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2121 

2122 else: 

2123 traces = [ 

2124 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2125 

2126 if uncut: 

2127 return traces 

2128 

2129 if snap is None: 

2130 snap = (round, round) 

2131 

2132 chopped = [] 

2133 for tr in traces: 

2134 if not load_data and tr.ydata is not None: 

2135 tr = tr.copy(data=False) 

2136 tr.ydata = None 

2137 

2138 try: 

2139 chopped.append(tr.chop( 

2140 tmin, tmax, 

2141 inplace=False, 

2142 snap=snap, 

2143 include_last=include_last)) 

2144 

2145 except trace.NoData: 

2146 pass 

2147 

2148 processed = self._process_chopped( 

2149 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2150 

2151 return processed 

2152 

2153 @filldocs 

2154 def chopper_waveforms( 

2155 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2156 tinc=None, tpad=0., 

2157 want_incomplete=True, snap_window=False, 

2158 degap=True, maxgap=5, maxlap=None, 

2159 snap=None, include_last=False, load_data=True, 

2160 accessor_id=None, clear_accessor=True, operator_params=None): 

2161 

2162 ''' 

2163 Iterate window-wise over waveform archive. 

2164 

2165 %(query_args)s 

2166 

2167 :param tinc: 

2168 Time increment (window shift time) (default uses ``tmax-tmin``) 

2169 :type tinc: 

2170 timestamp 

2171 

2172 :param tpad: 

2173 Padding time appended on either side of the data window (window 

2174 overlap is ``2*tpad``). 

2175 :type tpad: 

2176 timestamp 

2177 

2178 :param want_incomplete: 

2179 If ``True``, gappy/incomplete traces are included in the result. 

2180 :type want_incomplete: 

2181 bool 

2182 

2183 :param snap_window: 

2184 If ``True``, start time windows at multiples of tinc with respect 

2185 to system time zero. 

2186 

2187 :param degap: 

2188 If ``True``, connect traces and remove gaps and overlaps. 

2189 :type degap: 

2190 bool 

2191 

2192 :param maxgap: 

2193 Maximum gap size in samples which is filled with interpolated 

2194 samples when ``degap`` is ``True``. 

2195 :type maxgap: 

2196 int 

2197 

2198 :param maxlap: 

2199 Maximum overlap size in samples which is removed when ``degap`` is 

2200 ``True`` 

2201 :type maxlap: 

2202 int 

2203 

2204 :param snap: 

2205 Rounding functions used when computing sample index from time 

2206 instance, for trace start and trace end, respectively. By default, 

2207 ``(round, round)`` is used. 

2208 :type snap: 

2209 tuple of 2 callables 

2210 

2211 :param include_last: 

2212 If ``True``, add one more sample to the returned traces (the sample 

2213 which would be the first sample of a query with ``tmin`` set to the 

2214 current value of ``tmax``). 

2215 :type include_last: 

2216 bool 

2217 

2218 :param load_data: 

2219 If ``True``, waveform data samples are read from files (or cache). 

2220 If ``False``, meta-information-only traces are returned (dummy 

2221 traces with no data samples). 

2222 :type load_data: 

2223 bool 

2224 

2225 :param accessor_id: 

2226 Name of consumer on who's behalf data is accessed. Used in cache 

2227 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2228 to distinguish different points of extraction for the decision of 

2229 when to release cached waveform data. Should be used when data is 

2230 alternately extracted from more than one region / selection. 

2231 :type accessor_id: 

2232 str 

2233 

2234 :param clear_accessor: 

2235 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2236 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2237 memory when the generator returns. 

2238 

2239 :yields: 

2240 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2241 extracted time window. 

2242 

2243 See :py:meth:`iter_nuts` for details on time span matching. 

2244 ''' 

2245 

2246 tmin, tmax, codes = self._get_selection_args( 

2247 obj, tmin, tmax, time, codes) 

2248 

2249 self_tmin, self_tmax = self.get_time_span( 

2250 ['waveform', 'waveform_promise']) 

2251 

2252 if None in (self_tmin, self_tmax): 

2253 logger.warning( 

2254 'Content has undefined time span. No waveforms and no ' 

2255 'waveform promises?') 

2256 return 

2257 

2258 if snap_window and tinc is not None: 

2259 tmin = tmin if tmin is not None else self_tmin 

2260 tmax = tmax if tmax is not None else self_tmax 

2261 tmin = math.floor(tmin / tinc) * tinc 

2262 tmax = math.ceil(tmax / tinc) * tinc 

2263 else: 

2264 tmin = tmin if tmin is not None else self_tmin + tpad 

2265 tmax = tmax if tmax is not None else self_tmax - tpad 

2266 

2267 tinc = tinc if tinc is not None else tmax - tmin 

2268 

2269 try: 

2270 if accessor_id is None: 

2271 accessor_id = 'chopper%i' % self._n_choppers_active 

2272 

2273 self._n_choppers_active += 1 

2274 

2275 eps = tinc * 1e-6 

2276 if tinc != 0.0: 

2277 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2278 else: 

2279 nwin = 1 

2280 

2281 for iwin in range(nwin): 

2282 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2283 chopped = [] 

2284 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2285 eps = tinc*1e-6 

2286 if wmin >= tmax-eps: 

2287 break 

2288 

2289 chopped = self.get_waveforms( 

2290 tmin=wmin-tpad, 

2291 tmax=wmax+tpad, 

2292 codes=codes, 

2293 snap=snap, 

2294 include_last=include_last, 

2295 load_data=load_data, 

2296 want_incomplete=want_incomplete, 

2297 degap=degap, 

2298 maxgap=maxgap, 

2299 maxlap=maxlap, 

2300 accessor_id=accessor_id, 

2301 operator_params=operator_params) 

2302 

2303 self.advance_accessor(accessor_id) 

2304 

2305 yield Batch( 

2306 tmin=wmin, 

2307 tmax=wmax, 

2308 i=iwin, 

2309 n=nwin, 

2310 traces=chopped) 

2311 

2312 iwin += 1 

2313 

2314 finally: 

2315 self._n_choppers_active -= 1 

2316 if clear_accessor: 

2317 self.clear_accessor(accessor_id, 'waveform') 

2318 

2319 def _process_chopped( 

2320 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2321 

2322 chopped.sort(key=lambda a: a.full_id) 

2323 if degap: 

2324 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2325 

2326 if not want_incomplete: 

2327 chopped_weeded = [] 

2328 for tr in chopped: 

2329 emin = tr.tmin - tmin 

2330 emax = tr.tmax + tr.deltat - tmax 

2331 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2332 chopped_weeded.append(tr) 

2333 

2334 elif degap: 

2335 if (0. < emin <= 5. * tr.deltat 

2336 and -5. * tr.deltat <= emax < 0.): 

2337 

2338 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2339 chopped_weeded.append(tr) 

2340 

2341 chopped = chopped_weeded 

2342 

2343 return chopped 

2344 

2345 def _get_pyrocko_stations( 

2346 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2347 

2348 from pyrocko import model as pmodel 

2349 

2350 by_nsl = defaultdict(lambda: (list(), list())) 

2351 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2352 sargs = station._get_pyrocko_station_args() 

2353 nsl = sargs[1:4] 

2354 by_nsl[nsl][0].append(sargs) 

2355 

2356 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2357 sargs = channel._get_pyrocko_station_args() 

2358 nsl = sargs[1:4] 

2359 sargs_list, channels_list = by_nsl[nsl] 

2360 sargs_list.append(sargs) 

2361 channels_list.append(channel) 

2362 

2363 pstations = [] 

2364 nsls = list(by_nsl.keys()) 

2365 nsls.sort() 

2366 for nsl in nsls: 

2367 sargs_list, channels_list = by_nsl[nsl] 

2368 sargs = util.consistency_merge(sargs_list) 

2369 

2370 by_c = defaultdict(list) 

2371 for ch in channels_list: 

2372 by_c[ch.channel].append(ch._get_pyrocko_channel_args()) 

2373 

2374 chas = list(by_c.keys()) 

2375 chas.sort() 

2376 pchannels = [] 

2377 for cha in chas: 

2378 list_of_cargs = by_c[cha] 

2379 cargs = util.consistency_merge(list_of_cargs) 

2380 pchannels.append(pmodel.Channel( 

2381 name=cargs[0], 

2382 azimuth=cargs[1], 

2383 dip=cargs[2])) 

2384 

2385 pstations.append(pmodel.Station( 

2386 network=sargs[0], 

2387 station=sargs[1], 

2388 location=sargs[2], 

2389 lat=sargs[3], 

2390 lon=sargs[4], 

2391 elevation=sargs[5], 

2392 depth=sargs[6] or 0.0, 

2393 channels=pchannels)) 

2394 

2395 return pstations 

2396 

2397 @property 

2398 def pile(self): 

2399 

2400 ''' 

2401 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2402 

2403 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2404 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2405 but uses the fluffy power of the Squirrel under the hood. 

2406 

2407 This interface can be used as a drop-in replacement for piles which are 

2408 used in existing scripts and programs for efficient waveform data 

2409 access. The Squirrel-based pile scales better for large datasets. Newer 

2410 scripts should use Squirrel's native methods to avoid the emulation 

2411 overhead. 

2412 ''' 

2413 from . import pile 

2414 

2415 if self._pile is None: 

2416 self._pile = pile.Pile(self) 

2417 

2418 return self._pile 

2419 

2420 def snuffle(self): 

2421 ''' 

2422 Look at dataset in Snuffler. 

2423 ''' 

2424 self.pile.snuffle() 

2425 

2426 def _gather_codes_keys(self, kind, gather, selector): 

2427 return set( 

2428 gather(codes) 

2429 for codes in self.iter_codes(kind) 

2430 if selector is None or selector(codes)) 

2431 

2432 def __str__(self): 

2433 return str(self.get_stats()) 

2434 

2435 def get_coverage( 

2436 self, kind, tmin=None, tmax=None, codes_list=None, limit=None): 

2437 

2438 ''' 

2439 Get coverage information. 

2440 

2441 Get information about strips of gapless data coverage. 

2442 

2443 :param kind: 

2444 Content kind to be queried. 

2445 :type kind: 

2446 str 

2447 

2448 :param tmin: 

2449 Start time of query interval. 

2450 :type tmin: 

2451 timestamp 

2452 

2453 :param tmax: 

2454 End time of query interval. 

2455 :type tmax: 

2456 timestamp 

2457 

2458 :param codes_list: 

2459 List of code patterns to query. If not given or empty, an empty 

2460 list is returned. 

2461 :type codes_list: 

2462 :py:class:`list` of :py:class:`tuple` of :py:class:`str` 

2463 

2464 :param limit: 

2465 Limit query to return only up to a given maximum number of entries 

2466 per matching channel (without setting this option, very gappy data 

2467 could cause the query to execute for a very long time). 

2468 :type limit: 

2469 int 

2470 

2471 :returns: 

2472 List of entries of the form ``(pattern, codes, deltat, tmin, tmax, 

2473 data)`` where ``pattern`` is the request code pattern which 

2474 yielded this entry, ``codes`` are the matching channel codes, 

2475 ``tmin`` and ``tmax`` are the global min and max times for which 

2476 data for this channel is available, regardless of any time 

2477 restrictions in the query. ``data`` is a list with (up to 

2478 ``limit``) change-points of the form ``(time, count)`` where a 

2479 ``count`` of zero indicates a data gap, a value of 1 normal data 

2480 coverage and higher values indicate duplicate/redundant data. 

2481 ''' 

2482 

2483 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2484 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2485 

2486 kdata_all = [] 

2487 for pattern in codes_list: 

2488 kdata = self.glob_codes(kind, [pattern]) 

2489 for row in kdata: 

2490 row[0:0] = [pattern] 

2491 

2492 kdata_all.extend(kdata) 

2493 

2494 kind_codes_ids = [x[1] for x in kdata_all] 

2495 

2496 counts_at_tmin = {} 

2497 if tmin is not None: 

2498 for nut in self.iter_nuts( 

2499 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2500 

2501 k = nut.codes, nut.deltat 

2502 if k not in counts_at_tmin: 

2503 counts_at_tmin[k] = 0 

2504 

2505 counts_at_tmin[k] += 1 

2506 

2507 coverage = [] 

2508 for pattern, kind_codes_id, codes, deltat in kdata_all: 

2509 entry = [pattern, codes, deltat, None, None, []] 

2510 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2511 sql = self._sql(''' 

2512 SELECT 

2513 time_seconds, 

2514 time_offset 

2515 FROM %(db)s.%(coverage)s 

2516 WHERE 

2517 kind_codes_id == ? 

2518 ORDER BY 

2519 kind_codes_id ''' + order + ''', 

2520 time_seconds ''' + order + ''', 

2521 time_offset ''' + order + ''' 

2522 LIMIT 1 

2523 ''') 

2524 

2525 for row in self._conn.execute(sql, [kind_codes_id]): 

2526 entry[3+i] = model.tjoin(row[0], row[1]) 

2527 

2528 if None in entry[3:5]: 

2529 continue 

2530 

2531 args = [kind_codes_id] 

2532 

2533 sql_time = '' 

2534 if tmin is not None: 

2535 # intentionally < because (== tmin) is queried from nuts 

2536 sql_time += ' AND ( ? < time_seconds ' \ 

2537 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2538 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2539 

2540 if tmax is not None: 

2541 sql_time += ' AND ( time_seconds < ? ' \ 

2542 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2543 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2544 

2545 sql_limit = '' 

2546 if limit is not None: 

2547 sql_limit = ' LIMIT ?' 

2548 args.append(limit) 

2549 

2550 sql = self._sql(''' 

2551 SELECT 

2552 time_seconds, 

2553 time_offset, 

2554 step 

2555 FROM %(db)s.%(coverage)s 

2556 WHERE 

2557 kind_codes_id == ? 

2558 ''' + sql_time + ''' 

2559 ORDER BY 

2560 kind_codes_id, 

2561 time_seconds, 

2562 time_offset 

2563 ''' + sql_limit) 

2564 

2565 rows = list(self._conn.execute(sql, args)) 

2566 

2567 if limit is not None and len(rows) == limit: 

2568 entry[-1] = None 

2569 else: 

2570 counts = counts_at_tmin.get((codes, deltat), 0) 

2571 tlast = None 

2572 if tmin is not None: 

2573 entry[-1].append((tmin, counts)) 

2574 tlast = tmin 

2575 

2576 for row in rows: 

2577 t = model.tjoin(row[0], row[1]) 

2578 counts += row[2] 

2579 entry[-1].append((t, counts)) 

2580 tlast = t 

2581 

2582 if tmax is not None and (tlast is None or tlast != tmax): 

2583 entry[-1].append((tmax, counts)) 

2584 

2585 coverage.append(entry) 

2586 

2587 return coverage 

2588 

2589 def add_operator(self, op): 

2590 self._operators.append(op) 

2591 

2592 def update_operator_mappings(self): 

2593 available = [ 

2594 separator.join(codes) 

2595 for codes in self.get_codes(kind=('channel'))] 

2596 

2597 for operator in self._operators: 

2598 operator.update_mappings(available, self._operator_registry) 

2599 

2600 def iter_operator_mappings(self): 

2601 for operator in self._operators: 

2602 for in_codes, out_codes in operator.iter_mappings(): 

2603 yield operator, in_codes, out_codes 

2604 

2605 def get_operator_mappings(self): 

2606 return list(self.iter_operator_mappings()) 

2607 

2608 def get_operator(self, codes): 

2609 if isinstance(codes, tuple): 

2610 codes = separator.join(codes) 

2611 try: 

2612 return self._operator_registry[codes][0] 

2613 except KeyError: 

2614 return None 

2615 

2616 def get_operator_group(self, codes): 

2617 if isinstance(codes, tuple): 

2618 codes = separator.join(codes) 

2619 try: 

2620 return self._operator_registry[codes] 

2621 except KeyError: 

2622 return None, (None, None, None) 

2623 

2624 def iter_operator_codes(self): 

2625 for _, _, out_codes in self.iter_operator_mappings(): 

2626 for codes in out_codes: 

2627 yield tuple(codes.split(separator)) 

2628 

2629 def get_operator_codes(self): 

2630 return list(self.iter_operator_codes()) 

2631 

2632 def print_tables(self, table_names=None, stream=None): 

2633 ''' 

2634 Dump raw database tables in textual form (for debugging purposes). 

2635 

2636 :param table_names: 

2637 Names of tables to be dumped or ``None`` to dump all. 

2638 :type table_names: 

2639 :py:class:`list` of :py:class:`str` 

2640 

2641 :param stream: 

2642 Open file or ``None`` to dump to standard output. 

2643 ''' 

2644 

2645 if stream is None: 

2646 stream = sys.stdout 

2647 

2648 if isinstance(table_names, str): 

2649 table_names = [table_names] 

2650 

2651 if table_names is None: 

2652 table_names = [ 

2653 'selection_file_states', 

2654 'selection_nuts', 

2655 'selection_kind_codes_count', 

2656 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2657 

2658 m = { 

2659 'selection_file_states': '%(db)s.%(file_states)s', 

2660 'selection_nuts': '%(db)s.%(nuts)s', 

2661 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2662 'files': 'files', 

2663 'nuts': 'nuts', 

2664 'kind_codes': 'kind_codes', 

2665 'kind_codes_count': 'kind_codes_count'} 

2666 

2667 for table_name in table_names: 

2668 self._database.print_table( 

2669 m[table_name] % self._names, stream=stream) 

2670 

2671 

2672class SquirrelStats(Object): 

2673 ''' 

2674 Container to hold statistics about contents available from a Squirrel. 

2675 

2676 See also :py:meth:`Squirrel.get_stats`. 

2677 ''' 

2678 

2679 nfiles = Int.T( 

2680 help='Number of files in selection.') 

2681 nnuts = Int.T( 

2682 help='Number of index nuts in selection.') 

2683 codes = List.T( 

2684 Tuple.T(content_t=String.T()), 

2685 help='Available code sequences in selection, e.g. ' 

2686 '(agency, network, station, location) for stations nuts.') 

2687 kinds = List.T( 

2688 String.T(), 

2689 help='Available content types in selection.') 

2690 total_size = Int.T( 

2691 help='Aggregated file size of files is selection.') 

2692 counts = Dict.T( 

2693 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2694 help='Breakdown of how many nuts of any content type and code ' 

2695 'sequence are available in selection, ``counts[kind][codes]``.') 

2696 time_spans = Dict.T( 

2697 String.T(), Tuple.T(content_t=Timestamp.T()), 

2698 help='Time spans by content type.') 

2699 sources = List.T( 

2700 String.T(), 

2701 help='Descriptions of attached sources.') 

2702 operators = List.T( 

2703 String.T(), 

2704 help='Descriptions of attached operators.') 

2705 

2706 def __str__(self): 

2707 kind_counts = dict( 

2708 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2709 

2710 scodes = model.codes_to_str_abbreviated(self.codes) 

2711 

2712 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2713 ' ' + s for s in self.sources) 

2714 

2715 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2716 ' ' + s for s in self.operators) 

2717 

2718 def stime(t): 

2719 return util.tts(t) if t is not None and t not in ( 

2720 model.g_tmin, model.g_tmax) else '<none>' 

2721 

2722 def stable(rows): 

2723 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2724 return '\n'.join( 

2725 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2726 for row in rows) 

2727 

2728 def indent(s): 

2729 return '\n'.join(' '+line for line in s.splitlines()) 

2730 

2731 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2732 kind + ':', 

2733 str(kind_counts[kind]), 

2734 stime(self.time_spans[kind][0]), 

2735 '-', 

2736 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

2737 

2738 s = ''' 

2739Number of files: %i 

2740Total size of known files: %s 

2741Number of index nuts: %i 

2742Available content kinds: %s 

2743Available codes: %s 

2744Sources: %s 

2745Operators: %s''' % ( 

2746 self.nfiles, 

2747 util.human_bytesize(self.total_size), 

2748 self.nnuts, 

2749 stspans, scodes, ssources, soperators) 

2750 

2751 return s.lstrip() 

2752 

2753 

2754__all__ = [ 

2755 'Squirrel', 

2756 'SquirrelStats', 

2757]