1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8 

9import math 

10import logging 

11import threading 

12import queue 

13from collections import defaultdict 

14 

15from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

16from pyrocko import util, trace 

17from pyrocko.progress import progress 

18 

19from . import model, io, cache, dataset 

20 

21from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

22 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

23 codes_patterns_for_kind 

24from .client import fdsn, catalog 

25from .selection import Selection, filldocs 

26from .database import abspath 

27from .operators.base import Operator, CodesPatternFiltering 

28from . import client, environment, error 

29 

30logger = logging.getLogger('psq.base') 

31 

32guts_prefix = 'squirrel' 

33 

34 

35def make_task(*args): 

36 return progress.task(*args, logger=logger) 

37 

38 

39def lpick(condition, seq): 

40 ft = [], [] 

41 for ele in seq: 

42 ft[int(bool(condition(ele)))].append(ele) 

43 

44 return ft 

45 

46 

47def len_plural(obj): 

48 return len(obj), '' if len(obj) == 1 else 's' 

49 

50 

51def blocks(tmin, tmax, deltat, nsamples_block=100000): 

52 tblock = util.to_time_float(deltat * nsamples_block) 

53 iblock_min = int(math.floor(tmin / tblock)) 

54 iblock_max = int(math.ceil(tmax / tblock)) 

55 for iblock in range(iblock_min, iblock_max): 

56 yield iblock * tblock, (iblock+1) * tblock 

57 

58 

59def gaps(avail, tmin, tmax): 

60 assert tmin < tmax 

61 

62 data = [(tmax, 1), (tmin, -1)] 

63 for (tmin_a, tmax_a) in avail: 

64 assert tmin_a < tmax_a 

65 data.append((tmin_a, 1)) 

66 data.append((tmax_a, -1)) 

67 

68 data.sort() 

69 s = 1 

70 gaps = [] 

71 tmin_g = None 

72 for t, x in data: 

73 if s == 1 and x == -1: 

74 tmin_g = t 

75 elif s == 0 and x == 1 and tmin_g is not None: 

76 tmax_g = t 

77 if tmin_g != tmax_g: 

78 gaps.append((tmin_g, tmax_g)) 

79 

80 s += x 

81 

82 return gaps 

83 

84 

85def order_key(order): 

86 return (order.codes, order.tmin, order.tmax) 

87 

88 

89def _is_exact(pat): 

90 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

91 

92 

93def prefix_tree(tups): 

94 if not tups: 

95 return [] 

96 

97 if len(tups[0]) == 1: 

98 return sorted((tup[0], []) for tup in tups) 

99 

100 d = defaultdict(list) 

101 for tup in tups: 

102 d[tup[0]].append(tup[1:]) 

103 

104 sub = [] 

105 for k in sorted(d.keys()): 

106 sub.append((k, prefix_tree(d[k]))) 

107 

108 return sub 

109 

110 

111def match_time_span(tmin, tmax, obj): 

112 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

113 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

114 

115 

116class Batch(object): 

117 ''' 

118 Batch of waveforms from window-wise data extraction. 

119 

120 Encapsulates state and results yielded for each window in window-wise 

121 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

122 

123 *Attributes:* 

124 

125 .. py:attribute:: tmin 

126 

127 Start of this time window. 

128 

129 .. py:attribute:: tmax 

130 

131 End of this time window. 

132 

133 .. py:attribute:: i 

134 

135 Index of this time window in sequence. 

136 

137 .. py:attribute:: n 

138 

139 Total number of time windows in sequence. 

140 

141 .. py:attribute:: igroup 

142 

143 Index of this time window's sequence group. 

144 

145 .. py:attribute:: ngroups 

146 

147 Total number of sequence groups. 

148 

149 .. py:attribute:: traces 

150 

151 Extracted waveforms for this time window. 

152 ''' 

153 

154 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

155 self.tmin = tmin 

156 self.tmax = tmax 

157 self.i = i 

158 self.n = n 

159 self.igroup = igroup 

160 self.ngroups = ngroups 

161 self.traces = traces 

162 

163 

164class Squirrel(Selection): 

165 ''' 

166 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

167 

168 :param env: 

169 Squirrel environment instance or directory path to use as starting 

170 point for its detection. By default, the current directory is used as 

171 starting point. When searching for a usable environment the directory 

172 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

173 directory is used if it exists, otherwise the parent directories are 

174 search upwards for the existence of such a directory. If no such 

175 directory is found, the user's global Squirrel environment 

176 ``'$HOME/.pyrocko/squirrel'`` is used. 

177 :type env: 

178 :py:class:`~pyrocko.squirrel.environment.Environment` or 

179 :py:class:`str` 

180 

181 :param database: 

182 Database instance or path to database. By default the 

183 database found in the detected Squirrel environment is used. 

184 :type database: 

185 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

186 

187 :param cache_path: 

188 Directory path to use for data caching. By default, the ``'cache'`` 

189 directory in the detected Squirrel environment is used. 

190 :type cache_path: 

191 :py:class:`str` 

192 

193 :param persistent: 

194 If given a name, create a persistent selection. 

195 :type persistent: 

196 :py:class:`str` 

197 

198 This is the central class of the Squirrel framework. It provides a unified 

199 interface to query and access seismic waveforms, station meta-data and 

200 event information from local file collections and remote data sources. For 

201 prompt responses, a profound database setup is used under the hood. To 

202 speed up assemblage of ad-hoc data selections, files are indexed on first 

203 use and the extracted meta-data is remembered in the database for 

204 subsequent accesses. Bulk data is lazily loaded from disk and remote 

205 sources, just when requested. Once loaded, data is cached in memory to 

206 expedite typical access patterns. Files and data sources can be dynamically 

207 added to and removed from the Squirrel selection at runtime. 

208 

209 Queries are restricted to the contents of the files currently added to the 

210 Squirrel selection (usually a subset of the file meta-information 

211 collection in the database). This list of files is referred to here as the 

212 "selection". By default, temporary tables are created in the attached 

213 database to hold the names of the files in the selection as well as various 

214 indices and counters. These tables are only visible inside the application 

215 which created them and are deleted when the database connection is closed 

216 or the application exits. To create a selection which is not deleted at 

217 exit, supply a name to the ``persistent`` argument of the Squirrel 

218 constructor. Persistent selections are shared among applications using the 

219 same database. 

220 

221 **Method summary** 

222 

223 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

224 :py:class:`~pyrocko.squirrel.selection.Selection`. 

225 

226 .. autosummary:: 

227 

228 ~Squirrel.add 

229 ~Squirrel.add_source 

230 ~Squirrel.add_fdsn 

231 ~Squirrel.add_catalog 

232 ~Squirrel.add_dataset 

233 ~Squirrel.add_virtual 

234 ~Squirrel.update 

235 ~Squirrel.update_waveform_promises 

236 ~Squirrel.advance_accessor 

237 ~Squirrel.clear_accessor 

238 ~Squirrel.reload 

239 ~pyrocko.squirrel.selection.Selection.iter_paths 

240 ~Squirrel.iter_nuts 

241 ~Squirrel.iter_kinds 

242 ~Squirrel.iter_deltats 

243 ~Squirrel.iter_codes 

244 ~pyrocko.squirrel.selection.Selection.get_paths 

245 ~Squirrel.get_nuts 

246 ~Squirrel.get_kinds 

247 ~Squirrel.get_deltats 

248 ~Squirrel.get_codes 

249 ~Squirrel.get_counts 

250 ~Squirrel.get_time_span 

251 ~Squirrel.get_deltat_span 

252 ~Squirrel.get_nfiles 

253 ~Squirrel.get_nnuts 

254 ~Squirrel.get_total_size 

255 ~Squirrel.get_stats 

256 ~Squirrel.get_content 

257 ~Squirrel.get_stations 

258 ~Squirrel.get_channels 

259 ~Squirrel.get_responses 

260 ~Squirrel.get_events 

261 ~Squirrel.get_waveform_nuts 

262 ~Squirrel.get_waveforms 

263 ~Squirrel.chopper_waveforms 

264 ~Squirrel.get_coverage 

265 ~Squirrel.pile 

266 ~Squirrel.snuffle 

267 ~Squirrel.glob_codes 

268 ~pyrocko.squirrel.selection.Selection.get_database 

269 ~Squirrel.print_tables 

270 ''' 

271 

272 def __init__( 

273 self, env=None, database=None, cache_path=None, persistent=None): 

274 

275 if not isinstance(env, environment.Environment): 

276 env = environment.get_environment(env) 

277 

278 if database is None: 

279 database = env.expand_path(env.database_path) 

280 

281 if cache_path is None: 

282 cache_path = env.expand_path(env.cache_path) 

283 

284 if persistent is None: 

285 persistent = env.persistent 

286 

287 Selection.__init__( 

288 self, database=database, persistent=persistent) 

289 

290 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

291 

292 self._content_caches = { 

293 'waveform': cache.ContentCache(), 

294 'default': cache.ContentCache()} 

295 

296 self._cache_path = cache_path 

297 

298 self._sources = [] 

299 self._operators = [] 

300 self._operator_registry = {} 

301 

302 self._pending_orders = [] 

303 

304 self._pile = None 

305 self._n_choppers_active = 0 

306 

307 self._names.update({ 

308 'nuts': self.name + '_nuts', 

309 'kind_codes_count': self.name + '_kind_codes_count', 

310 'coverage': self.name + '_coverage'}) 

311 

312 with self.transaction('create tables') as cursor: 

313 self._create_tables_squirrel(cursor) 

314 

315 def _create_tables_squirrel(self, cursor): 

316 

317 cursor.execute(self._register_table(self._sql( 

318 ''' 

319 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

320 nut_id integer PRIMARY KEY, 

321 file_id integer, 

322 file_segment integer, 

323 file_element integer, 

324 kind_id integer, 

325 kind_codes_id integer, 

326 tmin_seconds integer, 

327 tmin_offset integer, 

328 tmax_seconds integer, 

329 tmax_offset integer, 

330 kscale integer) 

331 '''))) 

332 

333 cursor.execute(self._register_table(self._sql( 

334 ''' 

335 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

336 kind_codes_id integer PRIMARY KEY, 

337 count integer) 

338 '''))) 

339 

340 cursor.execute(self._sql( 

341 ''' 

342 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

343 ON %(nuts)s (file_id, file_segment, file_element) 

344 ''')) 

345 

346 cursor.execute(self._sql( 

347 ''' 

348 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

349 ON %(nuts)s (file_id) 

350 ''')) 

351 

352 cursor.execute(self._sql( 

353 ''' 

354 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

355 ON %(nuts)s (kind_id, tmin_seconds) 

356 ''')) 

357 

358 cursor.execute(self._sql( 

359 ''' 

360 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

361 ON %(nuts)s (kind_id, tmax_seconds) 

362 ''')) 

363 

364 cursor.execute(self._sql( 

365 ''' 

366 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

367 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

368 ''')) 

369 

370 cursor.execute(self._sql( 

371 ''' 

372 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

373 BEFORE DELETE ON main.files FOR EACH ROW 

374 BEGIN 

375 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

376 END 

377 ''')) 

378 

379 # trigger only on size to make silent update of mtime possible 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

383 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

384 BEGIN 

385 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

386 END 

387 ''')) 

388 

389 cursor.execute(self._sql( 

390 ''' 

391 CREATE TRIGGER IF NOT EXISTS 

392 %(db)s.%(file_states)s_delete_files 

393 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

394 BEGIN 

395 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

396 END 

397 ''')) 

398 

399 cursor.execute(self._sql( 

400 ''' 

401 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

402 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

403 BEGIN 

404 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

405 (new.kind_codes_id, 0); 

406 UPDATE %(kind_codes_count)s 

407 SET count = count + 1 

408 WHERE new.kind_codes_id 

409 == %(kind_codes_count)s.kind_codes_id; 

410 END 

411 ''')) 

412 

413 cursor.execute(self._sql( 

414 ''' 

415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

416 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

417 BEGIN 

418 UPDATE %(kind_codes_count)s 

419 SET count = count - 1 

420 WHERE old.kind_codes_id 

421 == %(kind_codes_count)s.kind_codes_id; 

422 END 

423 ''')) 

424 

425 cursor.execute(self._register_table(self._sql( 

426 ''' 

427 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

428 kind_codes_id integer, 

429 time_seconds integer, 

430 time_offset integer, 

431 step integer) 

432 '''))) 

433 

434 cursor.execute(self._sql( 

435 ''' 

436 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

437 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

438 ''')) 

439 

440 cursor.execute(self._sql( 

441 ''' 

442 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

443 AFTER INSERT ON %(nuts)s FOR EACH ROW 

444 BEGIN 

445 INSERT OR IGNORE INTO %(coverage)s VALUES 

446 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

447 ; 

448 UPDATE %(coverage)s 

449 SET step = step + 1 

450 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

451 AND new.tmin_seconds == %(coverage)s.time_seconds 

452 AND new.tmin_offset == %(coverage)s.time_offset 

453 ; 

454 INSERT OR IGNORE INTO %(coverage)s VALUES 

455 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

456 ; 

457 UPDATE %(coverage)s 

458 SET step = step - 1 

459 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

460 AND new.tmax_seconds == %(coverage)s.time_seconds 

461 AND new.tmax_offset == %(coverage)s.time_offset 

462 ; 

463 DELETE FROM %(coverage)s 

464 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

465 AND new.tmin_seconds == %(coverage)s.time_seconds 

466 AND new.tmin_offset == %(coverage)s.time_offset 

467 AND step == 0 

468 ; 

469 DELETE FROM %(coverage)s 

470 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

471 AND new.tmax_seconds == %(coverage)s.time_seconds 

472 AND new.tmax_offset == %(coverage)s.time_offset 

473 AND step == 0 

474 ; 

475 END 

476 ''')) 

477 

478 cursor.execute(self._sql( 

479 ''' 

480 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

481 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

482 BEGIN 

483 INSERT OR IGNORE INTO %(coverage)s VALUES 

484 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

485 ; 

486 UPDATE %(coverage)s 

487 SET step = step - 1 

488 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

489 AND old.tmin_seconds == %(coverage)s.time_seconds 

490 AND old.tmin_offset == %(coverage)s.time_offset 

491 ; 

492 INSERT OR IGNORE INTO %(coverage)s VALUES 

493 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

494 ; 

495 UPDATE %(coverage)s 

496 SET step = step + 1 

497 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

498 AND old.tmax_seconds == %(coverage)s.time_seconds 

499 AND old.tmax_offset == %(coverage)s.time_offset 

500 ; 

501 DELETE FROM %(coverage)s 

502 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

503 AND old.tmin_seconds == %(coverage)s.time_seconds 

504 AND old.tmin_offset == %(coverage)s.time_offset 

505 AND step == 0 

506 ; 

507 DELETE FROM %(coverage)s 

508 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

509 AND old.tmax_seconds == %(coverage)s.time_seconds 

510 AND old.tmax_offset == %(coverage)s.time_offset 

511 AND step == 0 

512 ; 

513 END 

514 ''')) 

515 

516 def _delete(self): 

517 '''Delete database tables associated with this Squirrel.''' 

518 

519 with self.transaction('delete tables') as cursor: 

520 for s in ''' 

521 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

522 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

523 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

524 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

525 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

526 DROP TABLE %(db)s.%(nuts)s; 

527 DROP TABLE %(db)s.%(kind_codes_count)s; 

528 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

529 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

530 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

531 '''.strip().splitlines(): 

532 

533 cursor.execute(self._sql(s)) 

534 

535 Selection._delete(self) 

536 

537 @filldocs 

538 def add(self, 

539 paths, 

540 kinds=None, 

541 format='detect', 

542 include=None, 

543 exclude=None, 

544 check=True): 

545 

546 ''' 

547 Add files to the selection. 

548 

549 :param paths: 

550 Iterator yielding paths to files or directories to be added to the 

551 selection. Recurses into directories. If given a ``str``, it 

552 is treated as a single path to be added. 

553 :type paths: 

554 :py:class:`list` of :py:class:`str` 

555 

556 :param kinds: 

557 Content types to be made available through the Squirrel selection. 

558 By default, all known content types are accepted. 

559 :type kinds: 

560 :py:class:`list` of :py:class:`str` 

561 

562 :param format: 

563 File format identifier or ``'detect'`` to enable auto-detection 

564 (available: %(file_formats)s). 

565 :type format: 

566 str 

567 

568 :param include: 

569 If not ``None``, files are only included if their paths match the 

570 given regular expression pattern. 

571 :type format: 

572 str 

573 

574 :param exclude: 

575 If not ``None``, files are only included if their paths do not 

576 match the given regular expression pattern. 

577 :type format: 

578 str 

579 

580 :param check: 

581 If ``True``, all file modification times are checked to see if 

582 cached information has to be updated (slow). If ``False``, only 

583 previously unknown files are indexed and cached information is used 

584 for known files, regardless of file state (fast, corrresponds to 

585 Squirrel's ``--optimistic`` mode). File deletions will go 

586 undetected in the latter case. 

587 :type check: 

588 bool 

589 

590 :Complexity: 

591 O(log N) 

592 ''' 

593 

594 if isinstance(kinds, str): 

595 kinds = (kinds,) 

596 

597 if isinstance(paths, str): 

598 paths = [paths] 

599 

600 kind_mask = model.to_kind_mask(kinds) 

601 

602 with progress.view(): 

603 Selection.add( 

604 self, util.iter_select_files( 

605 paths, 

606 show_progress=False, 

607 include=include, 

608 exclude=exclude, 

609 pass_through=lambda path: path.startswith('virtual:') 

610 ), kind_mask, format) 

611 

612 self._load(check) 

613 self._update_nuts() 

614 

615 def reload(self): 

616 ''' 

617 Check for modifications and reindex modified files. 

618 

619 Based on file modification times. 

620 ''' 

621 

622 self._set_file_states_force_check() 

623 self._load(check=True) 

624 self._update_nuts() 

625 

626 def add_virtual(self, nuts, virtual_paths=None): 

627 ''' 

628 Add content which is not backed by files. 

629 

630 :param nuts: 

631 Content pieces to be added. 

632 :type nuts: 

633 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

634 

635 :param virtual_paths: 

636 List of virtual paths to prevent creating a temporary list of the 

637 nuts while aggregating the file paths for the selection. 

638 :type virtual_paths: 

639 :py:class:`list` of :py:class:`str` 

640 

641 Stores to the main database and the selection. 

642 ''' 

643 

644 if isinstance(virtual_paths, str): 

645 virtual_paths = [virtual_paths] 

646 

647 if virtual_paths is None: 

648 if not isinstance(nuts, list): 

649 nuts = list(nuts) 

650 virtual_paths = set(nut.file_path for nut in nuts) 

651 

652 Selection.add(self, virtual_paths) 

653 self.get_database().dig(nuts) 

654 self._update_nuts() 

655 

656 def add_volatile(self, nuts): 

657 if not isinstance(nuts, list): 

658 nuts = list(nuts) 

659 

660 paths = list(set(nut.file_path for nut in nuts)) 

661 io.backends.virtual.add_nuts(nuts) 

662 self.add_virtual(nuts, paths) 

663 self._volatile_paths.extend(paths) 

664 

665 def add_volatile_waveforms(self, traces): 

666 ''' 

667 Add in-memory waveforms which will be removed when the app closes. 

668 ''' 

669 

670 name = model.random_name() 

671 

672 path = 'virtual:volatile:%s' % name 

673 

674 nuts = [] 

675 for itr, tr in enumerate(traces): 

676 assert tr.tmin <= tr.tmax 

677 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

678 tmax_seconds, tmax_offset = model.tsplit( 

679 tr.tmin + tr.data_len()*tr.deltat) 

680 

681 nuts.append(model.Nut( 

682 file_path=path, 

683 file_format='virtual', 

684 file_segment=itr, 

685 file_element=0, 

686 file_mtime=0, 

687 codes=tr.codes, 

688 tmin_seconds=tmin_seconds, 

689 tmin_offset=tmin_offset, 

690 tmax_seconds=tmax_seconds, 

691 tmax_offset=tmax_offset, 

692 deltat=tr.deltat, 

693 kind_id=to_kind_id('waveform'), 

694 content=tr)) 

695 

696 self.add_volatile(nuts) 

697 return path 

698 

699 def _load(self, check): 

700 for _ in io.iload( 

701 self, 

702 content=[], 

703 skip_unchanged=True, 

704 check=check): 

705 pass 

706 

707 def _update_nuts(self, transaction=None): 

708 transaction = transaction or self.transaction('update nuts') 

709 with make_task('Aggregating selection') as task, \ 

710 transaction as cursor: 

711 

712 self._conn.set_progress_handler(task.update, 100000) 

713 nrows = cursor.execute(self._sql( 

714 ''' 

715 INSERT INTO %(db)s.%(nuts)s 

716 SELECT NULL, 

717 nuts.file_id, nuts.file_segment, nuts.file_element, 

718 nuts.kind_id, nuts.kind_codes_id, 

719 nuts.tmin_seconds, nuts.tmin_offset, 

720 nuts.tmax_seconds, nuts.tmax_offset, 

721 nuts.kscale 

722 FROM %(db)s.%(file_states)s 

723 INNER JOIN nuts 

724 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

725 INNER JOIN kind_codes 

726 ON nuts.kind_codes_id == 

727 kind_codes.kind_codes_id 

728 WHERE %(db)s.%(file_states)s.file_state != 2 

729 AND (((1 << kind_codes.kind_id) 

730 & %(db)s.%(file_states)s.kind_mask) != 0) 

731 ''')).rowcount 

732 

733 task.update(nrows) 

734 self._set_file_states_known(transaction) 

735 self._conn.set_progress_handler(None, 0) 

736 

737 def add_source(self, source, check=True): 

738 ''' 

739 Add remote resource. 

740 

741 :param source: 

742 Remote data access client instance. 

743 :type source: 

744 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

745 ''' 

746 

747 self._sources.append(source) 

748 source.setup(self, check=check) 

749 

750 def add_fdsn(self, *args, **kwargs): 

751 ''' 

752 Add FDSN site for transparent remote data access. 

753 

754 Arguments are passed to 

755 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

756 ''' 

757 

758 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

759 

760 def add_catalog(self, *args, **kwargs): 

761 ''' 

762 Add online catalog for transparent event data access. 

763 

764 Arguments are passed to 

765 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

766 ''' 

767 

768 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

769 

770 def add_dataset(self, ds, check=True): 

771 ''' 

772 Read dataset description from file and add its contents. 

773 

774 :param ds: 

775 Path to dataset description file or dataset description object 

776 . See :py:mod:`~pyrocko.squirrel.dataset`. 

777 :type ds: 

778 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

779 

780 :param check: 

781 If ``True``, all file modification times are checked to see if 

782 cached information has to be updated (slow). If ``False``, only 

783 previously unknown files are indexed and cached information is used 

784 for known files, regardless of file state (fast, corrresponds to 

785 Squirrel's ``--optimistic`` mode). File deletions will go 

786 undetected in the latter case. 

787 :type check: 

788 bool 

789 ''' 

790 if isinstance(ds, str): 

791 ds = dataset.read_dataset(ds) 

792 

793 ds.setup(self, check=check) 

794 

795 def _get_selection_args( 

796 self, kind_id, 

797 obj=None, tmin=None, tmax=None, time=None, codes=None): 

798 

799 if codes is not None: 

800 codes = codes_patterns_for_kind(kind_id, codes) 

801 

802 if time is not None: 

803 tmin = time 

804 tmax = time 

805 

806 if obj is not None: 

807 tmin = tmin if tmin is not None else obj.tmin 

808 tmax = tmax if tmax is not None else obj.tmax 

809 codes = codes if codes is not None else codes_patterns_for_kind( 

810 kind_id, obj.codes) 

811 

812 return tmin, tmax, codes 

813 

814 def _get_selection_args_str(self, *args, **kwargs): 

815 

816 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

817 return 'tmin: %s, tmax: %s, codes: %s' % ( 

818 util.time_to_str(tmin) if tmin is not None else 'none', 

819 util.time_to_str(tmax) if tmax is not None else 'none', 

820 ','.join(str(entry) for entry in codes)) 

821 

822 def _selection_args_to_kwargs( 

823 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

824 

825 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

826 

827 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

828 

829 tmin_seconds, tmin_offset = model.tsplit(tmin) 

830 tmax_seconds, tmax_offset = model.tsplit(tmax) 

831 if naiv: 

832 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

833 args.append(tmax_seconds) 

834 else: 

835 tscale_edges = model.tscale_edges 

836 tmin_cond = [] 

837 for kscale in range(tscale_edges.size + 1): 

838 if kscale != tscale_edges.size: 

839 tscale = int(tscale_edges[kscale]) 

840 tmin_cond.append(''' 

841 (%(db)s.%(nuts)s.kind_id = ? 

842 AND %(db)s.%(nuts)s.kscale == ? 

843 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

844 ''') 

845 args.extend( 

846 (to_kind_id(kind), kscale, 

847 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

848 

849 else: 

850 tmin_cond.append(''' 

851 (%(db)s.%(nuts)s.kind_id == ? 

852 AND %(db)s.%(nuts)s.kscale == ? 

853 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

854 ''') 

855 

856 args.extend( 

857 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

858 if tmin_cond: 

859 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

860 

861 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

862 args.append(tmin_seconds) 

863 

864 def _codes_match_sql(self, kind_id, codes, cond, args): 

865 pats = codes_patterns_for_kind(kind_id, codes) 

866 if pats is None: 

867 return 

868 

869 pats_exact = [] 

870 pats_nonexact = [] 

871 for pat in pats: 

872 spat = pat.safe_str 

873 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

874 

875 cond_exact = None 

876 if pats_exact: 

877 cond_exact = ' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

878 '?'*len(pats_exact)) 

879 

880 args.extend(pats_exact) 

881 

882 cond_nonexact = None 

883 if pats_nonexact: 

884 cond_nonexact = ' ( %s ) ' % ' OR '.join( 

885 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)) 

886 

887 args.extend(pats_nonexact) 

888 

889 if cond_exact and cond_nonexact: 

890 cond.append(' ( %s OR %s ) ' % (cond_exact, cond_nonexact)) 

891 

892 elif cond_exact: 

893 cond.append(cond_exact) 

894 

895 elif cond_nonexact: 

896 cond.append(cond_nonexact) 

897 

898 def iter_nuts( 

899 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

900 kind_codes_ids=None, path=None, limit=None): 

901 

902 ''' 

903 Iterate over content entities matching given constraints. 

904 

905 :param kind: 

906 Content kind (or kinds) to extract. 

907 :type kind: 

908 :py:class:`str`, :py:class:`list` of :py:class:`str` 

909 

910 :param tmin: 

911 Start time of query interval. 

912 :type tmin: 

913 timestamp 

914 

915 :param tmax: 

916 End time of query interval. 

917 :type tmax: 

918 timestamp 

919 

920 :param codes: 

921 List of code patterns to query. 

922 :type codes: 

923 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

924 objects appropriate for the queried content type, or anything which 

925 can be converted to such objects. 

926 

927 :param naiv: 

928 Bypass time span lookup through indices (slow, for testing). 

929 :type naiv: 

930 :py:class:`bool` 

931 

932 :param kind_codes_ids: 

933 Kind-codes IDs of contents to be retrieved (internal use). 

934 :type kind_codes_ids: 

935 :py:class:`list` of :py:class:`int` 

936 

937 :yields: 

938 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

939 intersecting content. 

940 

941 :complexity: 

942 O(log N) for the time selection part due to heavy use of database 

943 indices. 

944 

945 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

946 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

947 closed-interval so that content intersecting with the time instant ``t 

948 = tmin = tmax`` is returned (otherwise nothing would be returned as 

949 ``[t, t)`` never matches anything). 

950 

951 Time spans of content entities to be matched are also treated as half 

952 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

953 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

954 modified to closed-interval when the content time span is an empty 

955 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

956 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

957 ''' 

958 

959 if not isinstance(kind, str): 

960 if kind is None: 

961 kind = model.g_content_kinds 

962 for kind_ in kind: 

963 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

964 yield nut 

965 

966 return 

967 

968 kind_id = to_kind_id(kind) 

969 

970 cond = [] 

971 args = [] 

972 if tmin is not None or tmax is not None: 

973 assert kind is not None 

974 if tmin is None: 

975 tmin = self.get_time_span()[0] 

976 if tmax is None: 

977 tmax = self.get_time_span()[1] + 1.0 

978 

979 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

980 

981 cond.append('kind_codes.kind_id == ?') 

982 args.append(kind_id) 

983 

984 if codes is not None: 

985 self._codes_match_sql(kind_id, codes, cond, args) 

986 

987 if kind_codes_ids is not None: 

988 cond.append( 

989 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

990 '?'*len(kind_codes_ids))) 

991 

992 args.extend(kind_codes_ids) 

993 

994 db = self.get_database() 

995 if path is not None: 

996 cond.append('files.path == ?') 

997 args.append(db.relpath(abspath(path))) 

998 

999 sql = (''' 

1000 SELECT 

1001 files.path, 

1002 files.format, 

1003 files.mtime, 

1004 files.size, 

1005 %(db)s.%(nuts)s.file_segment, 

1006 %(db)s.%(nuts)s.file_element, 

1007 kind_codes.kind_id, 

1008 kind_codes.codes, 

1009 %(db)s.%(nuts)s.tmin_seconds, 

1010 %(db)s.%(nuts)s.tmin_offset, 

1011 %(db)s.%(nuts)s.tmax_seconds, 

1012 %(db)s.%(nuts)s.tmax_offset, 

1013 kind_codes.deltat 

1014 FROM files 

1015 INNER JOIN %(db)s.%(nuts)s 

1016 ON files.file_id == %(db)s.%(nuts)s.file_id 

1017 INNER JOIN kind_codes 

1018 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1019 ''') 

1020 

1021 if cond: 

1022 sql += ''' WHERE ''' + ' AND '.join(cond) 

1023 

1024 if limit is not None: 

1025 sql += ''' LIMIT %i''' % limit 

1026 

1027 sql = self._sql(sql) 

1028 if tmin is None and tmax is None: 

1029 for row in self._conn.execute(sql, args): 

1030 row = (db.abspath(row[0]),) + row[1:] 

1031 nut = model.Nut(values_nocheck=row) 

1032 yield nut 

1033 else: 

1034 assert tmin is not None and tmax is not None 

1035 if tmin == tmax: 

1036 for row in self._conn.execute(sql, args): 

1037 row = (db.abspath(row[0]),) + row[1:] 

1038 nut = model.Nut(values_nocheck=row) 

1039 if (nut.tmin <= tmin < nut.tmax) \ 

1040 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1041 

1042 yield nut 

1043 else: 

1044 for row in self._conn.execute(sql, args): 

1045 row = (db.abspath(row[0]),) + row[1:] 

1046 nut = model.Nut(values_nocheck=row) 

1047 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1048 or (nut.tmin == nut.tmax 

1049 and tmin <= nut.tmin < tmax): 

1050 

1051 yield nut 

1052 

1053 def get_nuts(self, *args, **kwargs): 

1054 ''' 

1055 Get content entities matching given constraints. 

1056 

1057 Like :py:meth:`iter_nuts` but returns results as a list. 

1058 ''' 

1059 

1060 return list(self.iter_nuts(*args, **kwargs)) 

1061 

1062 def _split_nuts( 

1063 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1064 

1065 kind_id = to_kind_id(kind) 

1066 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1067 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1068 

1069 names_main_nuts = dict(self._names) 

1070 names_main_nuts.update(db='main', nuts='nuts') 

1071 

1072 db = self.get_database() 

1073 

1074 def main_nuts(s): 

1075 return s % names_main_nuts 

1076 

1077 with self.transaction('split nuts') as cursor: 

1078 # modify selection and main 

1079 for sql_subst in [ 

1080 self._sql, main_nuts]: 

1081 

1082 cond = [] 

1083 args = [] 

1084 

1085 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1086 

1087 if codes is not None: 

1088 self._codes_match_sql(kind_id, codes, cond, args) 

1089 

1090 if path is not None: 

1091 cond.append('files.path == ?') 

1092 args.append(db.relpath(abspath(path))) 

1093 

1094 sql = sql_subst(''' 

1095 SELECT 

1096 %(db)s.%(nuts)s.nut_id, 

1097 %(db)s.%(nuts)s.tmin_seconds, 

1098 %(db)s.%(nuts)s.tmin_offset, 

1099 %(db)s.%(nuts)s.tmax_seconds, 

1100 %(db)s.%(nuts)s.tmax_offset, 

1101 kind_codes.deltat 

1102 FROM files 

1103 INNER JOIN %(db)s.%(nuts)s 

1104 ON files.file_id == %(db)s.%(nuts)s.file_id 

1105 INNER JOIN kind_codes 

1106 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1107 WHERE ''' + ' AND '.join(cond)) # noqa 

1108 

1109 insert = [] 

1110 delete = [] 

1111 for row in cursor.execute(sql, args): 

1112 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1113 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1114 

1115 nut_tmin = model.tjoin( 

1116 nut_tmin_seconds, nut_tmin_offset) 

1117 nut_tmax = model.tjoin( 

1118 nut_tmax_seconds, nut_tmax_offset) 

1119 

1120 if nut_tmin < tmax and tmin < nut_tmax: 

1121 if nut_tmin < tmin: 

1122 insert.append(( 

1123 nut_tmin_seconds, nut_tmin_offset, 

1124 tmin_seconds, tmin_offset, 

1125 model.tscale_to_kscale( 

1126 tmin_seconds - nut_tmin_seconds), 

1127 nut_id)) 

1128 

1129 if tmax < nut_tmax: 

1130 insert.append(( 

1131 tmax_seconds, tmax_offset, 

1132 nut_tmax_seconds, nut_tmax_offset, 

1133 model.tscale_to_kscale( 

1134 nut_tmax_seconds - tmax_seconds), 

1135 nut_id)) 

1136 

1137 delete.append((nut_id,)) 

1138 

1139 sql_add = ''' 

1140 INSERT INTO %(db)s.%(nuts)s ( 

1141 file_id, file_segment, file_element, kind_id, 

1142 kind_codes_id, tmin_seconds, tmin_offset, 

1143 tmax_seconds, tmax_offset, kscale ) 

1144 SELECT 

1145 file_id, file_segment, file_element, 

1146 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1147 FROM %(db)s.%(nuts)s 

1148 WHERE nut_id == ? 

1149 ''' 

1150 cursor.executemany(sql_subst(sql_add), insert) 

1151 

1152 sql_delete = ''' 

1153 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1154 ''' 

1155 cursor.executemany(sql_subst(sql_delete), delete) 

1156 

1157 def get_time_span(self, kinds=None): 

1158 ''' 

1159 Get time interval over all content in selection. 

1160 

1161 :param kinds: 

1162 If not ``None``, restrict query to given content kinds. 

1163 :type kind: 

1164 list of str 

1165 

1166 :complexity: 

1167 O(1), independent of the number of nuts. 

1168 

1169 :returns: 

1170 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1171 ''' 

1172 

1173 sql_min = self._sql(''' 

1174 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1175 FROM %(db)s.%(nuts)s 

1176 WHERE kind_id == ? 

1177 AND tmin_seconds == ( 

1178 SELECT MIN(tmin_seconds) 

1179 FROM %(db)s.%(nuts)s 

1180 WHERE kind_id == ?) 

1181 ''') 

1182 

1183 sql_max = self._sql(''' 

1184 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1185 FROM %(db)s.%(nuts)s 

1186 WHERE kind_id == ? 

1187 AND tmax_seconds == ( 

1188 SELECT MAX(tmax_seconds) 

1189 FROM %(db)s.%(nuts)s 

1190 WHERE kind_id == ?) 

1191 ''') 

1192 

1193 gtmin = None 

1194 gtmax = None 

1195 

1196 if isinstance(kinds, str): 

1197 kinds = [kinds] 

1198 

1199 if kinds is None: 

1200 kind_ids = model.g_content_kind_ids 

1201 else: 

1202 kind_ids = model.to_kind_ids(kinds) 

1203 

1204 for kind_id in kind_ids: 

1205 for tmin_seconds, tmin_offset in self._conn.execute( 

1206 sql_min, (kind_id, kind_id)): 

1207 tmin = model.tjoin(tmin_seconds, tmin_offset) 

1208 if tmin is not None and (gtmin is None or tmin < gtmin): 

1209 gtmin = tmin 

1210 

1211 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1212 sql_max, (kind_id, kind_id)): 

1213 tmax = model.tjoin(tmax_seconds, tmax_offset) 

1214 if tmax is not None and (gtmax is None or tmax > gtmax): 

1215 gtmax = tmax 

1216 

1217 return gtmin, gtmax 

1218 

1219 def has(self, kinds): 

1220 ''' 

1221 Check availability of given content kinds. 

1222 

1223 :param kinds: 

1224 Content kinds to query. 

1225 :type kind: 

1226 list of str 

1227 

1228 :returns: 

1229 ``True`` if any of the queried content kinds is available 

1230 in the selection. 

1231 ''' 

1232 self_tmin, self_tmax = self.get_time_span(kinds) 

1233 

1234 return None not in (self_tmin, self_tmax) 

1235 

1236 def get_deltat_span(self, kind): 

1237 ''' 

1238 Get min and max sampling interval of all content of given kind. 

1239 

1240 :param kind: 

1241 Content kind 

1242 :type kind: 

1243 str 

1244 

1245 :returns: ``(deltat_min, deltat_max)`` 

1246 ''' 

1247 

1248 deltats = [ 

1249 deltat for deltat in self.get_deltats(kind) 

1250 if deltat is not None] 

1251 

1252 if deltats: 

1253 return min(deltats), max(deltats) 

1254 else: 

1255 return None, None 

1256 

1257 def iter_kinds(self, codes=None): 

1258 ''' 

1259 Iterate over content types available in selection. 

1260 

1261 :param codes: 

1262 If given, get kinds only for selected codes identifier. 

1263 Only a single identifier may be given here and no pattern matching 

1264 is done, currently. 

1265 :type codes: 

1266 :py:class:`~pyrocko.squirrel.model.Codes` 

1267 

1268 :yields: 

1269 Available content kinds as :py:class:`str`. 

1270 

1271 :complexity: 

1272 O(1), independent of number of nuts. 

1273 ''' 

1274 

1275 return self._database._iter_kinds( 

1276 codes=codes, 

1277 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1278 

1279 def iter_deltats(self, kind=None): 

1280 ''' 

1281 Iterate over sampling intervals available in selection. 

1282 

1283 :param kind: 

1284 If given, get sampling intervals only for a given content type. 

1285 :type kind: 

1286 str 

1287 

1288 :yields: 

1289 :py:class:`float` values. 

1290 

1291 :complexity: 

1292 O(1), independent of number of nuts. 

1293 ''' 

1294 return self._database._iter_deltats( 

1295 kind=kind, 

1296 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1297 

1298 def iter_codes(self, kind=None): 

1299 ''' 

1300 Iterate over content identifier code sequences available in selection. 

1301 

1302 :param kind: 

1303 If given, get codes only for a given content type. 

1304 :type kind: 

1305 str 

1306 

1307 :yields: 

1308 :py:class:`tuple` of :py:class:`str` 

1309 

1310 :complexity: 

1311 O(1), independent of number of nuts. 

1312 ''' 

1313 return self._database._iter_codes( 

1314 kind=kind, 

1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1316 

1317 def _iter_codes_info(self, kind=None, codes=None): 

1318 ''' 

1319 Iterate over number of occurrences of any (kind, codes) combination. 

1320 

1321 :param kind: 

1322 If given, get counts only for selected content type. 

1323 :type kind: 

1324 str 

1325 

1326 :yields: 

1327 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1328 

1329 :complexity: 

1330 O(1), independent of number of nuts. 

1331 ''' 

1332 return self._database._iter_codes_info( 

1333 kind=kind, 

1334 codes=codes, 

1335 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1336 

1337 def get_kinds(self, codes=None): 

1338 ''' 

1339 Get content types available in selection. 

1340 

1341 :param codes: 

1342 If given, get kinds only for selected codes identifier. 

1343 Only a single identifier may be given here and no pattern matching 

1344 is done, currently. 

1345 :type codes: 

1346 :py:class:`~pyrocko.squirrel.model.Codes` 

1347 

1348 :returns: 

1349 Sorted list of available content types. 

1350 :rtype: 

1351 py:class:`list` of :py:class:`str` 

1352 

1353 :complexity: 

1354 O(1), independent of number of nuts. 

1355 

1356 ''' 

1357 return sorted(list(self.iter_kinds(codes=codes))) 

1358 

1359 def get_deltats(self, kind=None): 

1360 ''' 

1361 Get sampling intervals available in selection. 

1362 

1363 :param kind: 

1364 If given, get sampling intervals only for selected content type. 

1365 :type kind: 

1366 str 

1367 

1368 :complexity: 

1369 O(1), independent of number of nuts. 

1370 

1371 :returns: Sorted list of available sampling intervals. 

1372 ''' 

1373 return sorted(list(self.iter_deltats(kind=kind))) 

1374 

1375 def get_codes(self, kind=None): 

1376 ''' 

1377 Get identifier code sequences available in selection. 

1378 

1379 :param kind: 

1380 If given, get codes only for selected content type. 

1381 :type kind: 

1382 str 

1383 

1384 :complexity: 

1385 O(1), independent of number of nuts. 

1386 

1387 :returns: Sorted list of available codes as tuples of strings. 

1388 ''' 

1389 return sorted(list(self.iter_codes(kind=kind))) 

1390 

1391 def get_counts(self, kind=None): 

1392 ''' 

1393 Get number of occurrences of any (kind, codes) combination. 

1394 

1395 :param kind: 

1396 If given, get codes only for selected content type. 

1397 :type kind: 

1398 str 

1399 

1400 :complexity: 

1401 O(1), independent of number of nuts. 

1402 

1403 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1404 if kind is not ``None`` 

1405 ''' 

1406 d = {} 

1407 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1408 if kind_id not in d: 

1409 v = d[kind_id] = {} 

1410 else: 

1411 v = d[kind_id] 

1412 

1413 if codes not in v: 

1414 v[codes] = 0 

1415 

1416 v[codes] += count 

1417 

1418 if kind is not None: 

1419 return d[to_kind_id(kind)] 

1420 else: 

1421 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1422 

1423 def glob_codes(self, kind, codes): 

1424 ''' 

1425 Find codes matching given patterns. 

1426 

1427 :param kind: 

1428 Content kind to be queried. 

1429 :type kind: 

1430 str 

1431 

1432 :param codes: 

1433 List of code patterns to query. 

1434 :type codes: 

1435 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1436 objects appropriate for the queried content type, or anything which 

1437 can be converted to such objects. 

1438 

1439 :returns: 

1440 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1441 ''' 

1442 

1443 kind_id = to_kind_id(kind) 

1444 args = [kind_id] 

1445 pats = codes_patterns_for_kind(kind_id, codes) 

1446 

1447 if pats: 

1448 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1449 ('kind_codes.codes GLOB ?',) * len(pats)) 

1450 

1451 args.extend(pat.safe_str for pat in pats) 

1452 else: 

1453 codes_cond = '' 

1454 

1455 sql = self._sql(''' 

1456 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1457 WHERE 

1458 kind_id == ? ''' + codes_cond) 

1459 

1460 return list(map(list, self._conn.execute(sql, args))) 

1461 

1462 def update(self, constraint=None, **kwargs): 

1463 ''' 

1464 Update or partially update channel and event inventories. 

1465 

1466 :param constraint: 

1467 Selection of times or areas to be brought up to date. 

1468 :type constraint: 

1469 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1470 

1471 :param \\*\\*kwargs: 

1472 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1473 

1474 This function triggers all attached remote sources, to check for 

1475 updates in the meta-data. The sources will only submit queries when 

1476 their expiration date has passed, or if the selection spans into 

1477 previously unseen times or areas. 

1478 ''' 

1479 

1480 if constraint is None: 

1481 constraint = client.Constraint(**kwargs) 

1482 

1483 for source in self._sources: 

1484 source.update_channel_inventory(self, constraint) 

1485 source.update_event_inventory(self, constraint) 

1486 

1487 def update_waveform_promises(self, constraint=None, **kwargs): 

1488 ''' 

1489 Permit downloading of remote waveforms. 

1490 

1491 :param constraint: 

1492 Remote waveforms compatible with the given constraint are enabled 

1493 for download. 

1494 :type constraint: 

1495 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1496 

1497 :param \\*\\*kwargs: 

1498 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1499 

1500 Calling this method permits Squirrel to download waveforms from remote 

1501 sources when processing subsequent waveform requests. This works by 

1502 inserting so called waveform promises into the database. It will look 

1503 into the available channels for each remote source and create a promise 

1504 for each channel compatible with the given constraint. If the promise 

1505 then matches in a waveform request, Squirrel tries to download the 

1506 waveform. If the download is successful, the downloaded waveform is 

1507 added to the Squirrel and the promise is deleted. If the download 

1508 fails, the promise is kept if the reason of failure looks like being 

1509 temporary, e.g. because of a network failure. If the cause of failure 

1510 however seems to be permanent, the promise is deleted so that no 

1511 further attempts are made to download a waveform which might not be 

1512 available from that server at all. To force re-scheduling after a 

1513 permanent failure, call :py:meth:`update_waveform_promises` 

1514 yet another time. 

1515 ''' 

1516 

1517 if constraint is None: 

1518 constraint = client.Constraint(**kwargs) 

1519 

1520 for source in self._sources: 

1521 source.update_waveform_promises(self, constraint) 

1522 

1523 def remove_waveform_promises(self, from_database='selection'): 

1524 ''' 

1525 Remove waveform promises from live selection or global database. 

1526 

1527 Calling this function removes all waveform promises provided by the 

1528 attached sources. 

1529 

1530 :param from_database: 

1531 Remove from live selection ``'selection'`` or global database 

1532 ``'global'``. 

1533 ''' 

1534 for source in self._sources: 

1535 source.remove_waveform_promises(self, from_database=from_database) 

1536 

1537 def update_responses(self, constraint=None, **kwargs): 

1538 if constraint is None: 

1539 constraint = client.Constraint(**kwargs) 

1540 

1541 for source in self._sources: 

1542 source.update_response_inventory(self, constraint) 

1543 

1544 def get_nfiles(self): 

1545 ''' 

1546 Get number of files in selection. 

1547 ''' 

1548 

1549 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1550 for row in self._conn.execute(sql): 

1551 return row[0] 

1552 

1553 def get_nnuts(self): 

1554 ''' 

1555 Get number of nuts in selection. 

1556 ''' 

1557 

1558 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1559 for row in self._conn.execute(sql): 

1560 return row[0] 

1561 

1562 def get_total_size(self): 

1563 ''' 

1564 Get aggregated file size available in selection. 

1565 ''' 

1566 

1567 sql = self._sql(''' 

1568 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1569 INNER JOIN files 

1570 ON %(db)s.%(file_states)s.file_id = files.file_id 

1571 ''') 

1572 

1573 for row in self._conn.execute(sql): 

1574 return row[0] or 0 

1575 

1576 def get_stats(self): 

1577 ''' 

1578 Get statistics on contents available through this selection. 

1579 ''' 

1580 

1581 kinds = self.get_kinds() 

1582 time_spans = {} 

1583 for kind in kinds: 

1584 time_spans[kind] = self.get_time_span([kind]) 

1585 

1586 return SquirrelStats( 

1587 nfiles=self.get_nfiles(), 

1588 nnuts=self.get_nnuts(), 

1589 kinds=kinds, 

1590 codes=self.get_codes(), 

1591 total_size=self.get_total_size(), 

1592 counts=self.get_counts(), 

1593 time_spans=time_spans, 

1594 sources=[s.describe() for s in self._sources], 

1595 operators=[op.describe() for op in self._operators]) 

1596 

1597 @filldocs 

1598 def check( 

1599 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1600 ignore=[]): 

1601 ''' 

1602 Check for common data/metadata problems. 

1603 

1604 %(query_args)s 

1605 

1606 :param ignore: 

1607 Problem types to be ignored. 

1608 :type ignore: 

1609 :class:`list` of :class:`str` 

1610 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1611 

1612 :returns: 

1613 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1614 containing the results of the check. 

1615 

1616 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1617 ''' 

1618 

1619 from .check import do_check 

1620 tmin, tmax, codes = self._get_selection_args( 

1621 CHANNEL, obj, tmin, tmax, time, codes) 

1622 

1623 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1624 

1625 def get_content( 

1626 self, 

1627 nut, 

1628 cache_id='default', 

1629 accessor_id='default', 

1630 show_progress=False, 

1631 model='squirrel'): 

1632 

1633 ''' 

1634 Get and possibly load full content for a given index entry from file. 

1635 

1636 Loads the actual content objects (channel, station, waveform, ...) from 

1637 file. For efficiency, sibling content (all stuff in the same file 

1638 segment) will also be loaded as a side effect. The loaded contents are 

1639 cached in the Squirrel object. 

1640 ''' 

1641 

1642 content_cache = self._content_caches[cache_id] 

1643 if not content_cache.has(nut): 

1644 

1645 for nut_loaded in io.iload( 

1646 nut.file_path, 

1647 segment=nut.file_segment, 

1648 format=nut.file_format, 

1649 database=self._database, 

1650 update_selection=self, 

1651 show_progress=show_progress): 

1652 

1653 content_cache.put(nut_loaded) 

1654 

1655 try: 

1656 return content_cache.get(nut, accessor_id, model) 

1657 

1658 except KeyError: 

1659 raise error.NotAvailable( 

1660 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1661 

1662 def advance_accessor(self, accessor_id='default', cache_id=None): 

1663 ''' 

1664 Notify memory caches about consumer moving to a new data batch. 

1665 

1666 :param accessor_id: 

1667 Name of accessing consumer to be advanced. 

1668 :type accessor_id: 

1669 str 

1670 

1671 :param cache_id: 

1672 Name of cache to for which the accessor should be advanced. By 

1673 default the named accessor is advanced in all registered caches. 

1674 By default, two caches named ``'default'`` and ``'waveform'`` are 

1675 available. 

1676 :type cache_id: 

1677 str 

1678 

1679 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1680 Squirrel's memory caching works and can be tuned. Default behaviour is 

1681 to release data when it has not been used in the latest data 

1682 window/batch. If the accessor is never advanced, data is cached 

1683 indefinitely - which is often desired e.g. for station meta-data. 

1684 Methods for consecutive data traversal, like 

1685 :py:meth:`chopper_waveforms` automatically advance and clear 

1686 their accessor. 

1687 ''' 

1688 for cache_ in ( 

1689 self._content_caches.keys() 

1690 if cache_id is None 

1691 else [cache_id]): 

1692 

1693 self._content_caches[cache_].advance_accessor(accessor_id) 

1694 

1695 def clear_accessor(self, accessor_id, cache_id=None): 

1696 ''' 

1697 Notify memory caches about a consumer having finished. 

1698 

1699 :param accessor_id: 

1700 Name of accessor to be cleared. 

1701 :type accessor_id: 

1702 str 

1703 

1704 :param cache_id: 

1705 Name of cache for which the accessor should be cleared. By default 

1706 the named accessor is cleared from all registered caches. By 

1707 default, two caches named ``'default'`` and ``'waveform'`` are 

1708 available. 

1709 :type cache_id: 

1710 str 

1711 

1712 Calling this method clears all references to cache entries held by the 

1713 named accessor. Cache entries are then freed if not referenced by any 

1714 other accessor. 

1715 ''' 

1716 

1717 for cache_ in ( 

1718 self._content_caches.keys() 

1719 if cache_id is None 

1720 else [cache_id]): 

1721 

1722 self._content_caches[cache_].clear_accessor(accessor_id) 

1723 

1724 def get_cache_stats(self, cache_id): 

1725 return self._content_caches[cache_id].get_stats() 

1726 

1727 @filldocs 

1728 def get_stations( 

1729 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1730 model='squirrel'): 

1731 

1732 ''' 

1733 Get stations matching given constraints. 

1734 

1735 %(query_args)s 

1736 

1737 :param model: 

1738 Select object model for returned values: ``'squirrel'`` to get 

1739 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1740 objects with channel information attached. 

1741 :type model: 

1742 str 

1743 

1744 :returns: 

1745 List of :py:class:`pyrocko.squirrel.Station 

1746 <pyrocko.squirrel.model.Station>` objects by default or list of 

1747 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1748 objects if ``model='pyrocko'`` is requested. 

1749 

1750 See :py:meth:`iter_nuts` for details on time span matching. 

1751 ''' 

1752 

1753 if model == 'pyrocko': 

1754 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1755 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1756 args = self._get_selection_args( 

1757 STATION, obj, tmin, tmax, time, codes) 

1758 

1759 nuts = sorted( 

1760 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1761 

1762 return [self.get_content(nut, model=model) for nut in nuts] 

1763 else: 

1764 raise ValueError('Invalid station model: %s' % model) 

1765 

1766 @filldocs 

1767 def get_channels( 

1768 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1769 model='squirrel'): 

1770 

1771 ''' 

1772 Get channels matching given constraints. 

1773 

1774 %(query_args)s 

1775 

1776 :returns: 

1777 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1778 

1779 See :py:meth:`iter_nuts` for details on time span matching. 

1780 ''' 

1781 

1782 args = self._get_selection_args( 

1783 CHANNEL, obj, tmin, tmax, time, codes) 

1784 

1785 nuts = sorted( 

1786 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1787 

1788 return [self.get_content(nut, model=model) for nut in nuts] 

1789 

1790 @filldocs 

1791 def get_sensors( 

1792 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1793 

1794 ''' 

1795 Get sensors matching given constraints. 

1796 

1797 %(query_args)s 

1798 

1799 :returns: 

1800 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1801 

1802 See :py:meth:`iter_nuts` for details on time span matching. 

1803 ''' 

1804 

1805 tmin, tmax, codes = self._get_selection_args( 

1806 CHANNEL, obj, tmin, tmax, time, codes) 

1807 

1808 if codes is not None: 

1809 codes = codes_patterns_list( 

1810 (entry.replace(channel=entry.channel[:-1] + '?') 

1811 if entry.channel != '*' else entry) 

1812 for entry in codes) 

1813 

1814 nuts = sorted( 

1815 self.iter_nuts( 

1816 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1817 

1818 return [ 

1819 sensor for sensor in model.Sensor.from_channels( 

1820 self.get_content(nut) for nut in nuts) 

1821 if match_time_span(tmin, tmax, sensor)] 

1822 

1823 @filldocs 

1824 def get_responses( 

1825 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1826 model='squirrel'): 

1827 

1828 ''' 

1829 Get instrument responses matching given constraints. 

1830 

1831 %(query_args)s 

1832 

1833 :returns: 

1834 List of :py:class:`~pyrocko.squirrel.model.Response` objects. 

1835 

1836 See :py:meth:`iter_nuts` for details on time span matching. 

1837 ''' 

1838 

1839 args = self._get_selection_args( 

1840 RESPONSE, obj, tmin, tmax, time, codes) 

1841 

1842 nuts = sorted( 

1843 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1844 

1845 return [self.get_content(nut, model=model) for nut in nuts] 

1846 

1847 @filldocs 

1848 def get_response( 

1849 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1850 model='squirrel'): 

1851 

1852 ''' 

1853 Get instrument response matching given constraints. 

1854 

1855 %(query_args)s 

1856 

1857 :returns: 

1858 :py:class:`~pyrocko.squirrel.model.Response` object. 

1859 

1860 Same as :py:meth:`get_responses` but returning exactly one response. 

1861 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more 

1862 than one is available. 

1863 

1864 See :py:meth:`iter_nuts` for details on time span matching. 

1865 ''' 

1866 

1867 if model == 'stationxml': 

1868 model_ = 'stationxml+' 

1869 else: 

1870 model_ = model 

1871 

1872 responses = self.get_responses( 

1873 obj, tmin, tmax, time, codes, model=model_) 

1874 if len(responses) == 0: 

1875 raise error.NotAvailable( 

1876 'No instrument response available (%s).' 

1877 % self._get_selection_args_str( 

1878 RESPONSE, obj, tmin, tmax, time, codes)) 

1879 

1880 elif len(responses) > 1: 

1881 if model_ == 'squirrel': 

1882 resps_sq = responses 

1883 elif model_ == 'stationxml+': 

1884 resps_sq = [resp[0] for resp in responses] 

1885 else: 

1886 raise ValueError('Invalid response model: %s' % model) 

1887 

1888 rinfo = ':\n' + '\n'.join( 

1889 ' ' + resp.summary for resp in resps_sq) 

1890 

1891 raise error.NotAvailable( 

1892 'Multiple instrument responses matching given constraints ' 

1893 '(%s)%s' % ( 

1894 self._get_selection_args_str( 

1895 RESPONSE, obj, tmin, tmax, time, codes), rinfo)) 

1896 

1897 if model == 'stationxml': 

1898 return responses[0][1] 

1899 else: 

1900 return responses[0] 

1901 

1902 @filldocs 

1903 def get_events( 

1904 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1905 

1906 ''' 

1907 Get events matching given constraints. 

1908 

1909 %(query_args)s 

1910 

1911 :returns: 

1912 List of :py:class:`~pyrocko.model.event.Event` objects. 

1913 

1914 See :py:meth:`iter_nuts` for details on time span matching. 

1915 ''' 

1916 

1917 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1918 nuts = sorted( 

1919 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1920 

1921 return [self.get_content(nut) for nut in nuts] 

1922 

1923 def _redeem_promises(self, *args, order_only=False): 

1924 

1925 def split_promise(order): 

1926 self._split_nuts( 

1927 'waveform_promise', 

1928 order.tmin, order.tmax, 

1929 codes=order.codes, 

1930 path=order.source_id) 

1931 

1932 tmin, tmax, _ = args 

1933 

1934 waveforms = list(self.iter_nuts('waveform', *args)) 

1935 promises = list(self.iter_nuts('waveform_promise', *args)) 

1936 

1937 codes_to_avail = defaultdict(list) 

1938 for nut in waveforms: 

1939 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

1940 

1941 def tts(x): 

1942 if isinstance(x, tuple): 

1943 return tuple(tts(e) for e in x) 

1944 elif isinstance(x, list): 

1945 return list(tts(e) for e in x) 

1946 else: 

1947 return util.time_to_str(x) 

1948 

1949 orders = [] 

1950 for promise in promises: 

1951 waveforms_avail = codes_to_avail[promise.codes] 

1952 for block_tmin, block_tmax in blocks( 

1953 max(tmin, promise.tmin), 

1954 min(tmax, promise.tmax), 

1955 promise.deltat): 

1956 

1957 orders.append( 

1958 WaveformOrder( 

1959 source_id=promise.file_path, 

1960 codes=promise.codes, 

1961 tmin=block_tmin, 

1962 tmax=block_tmax, 

1963 deltat=promise.deltat, 

1964 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

1965 

1966 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

1967 

1968 order_keys_noop = set(order_key(order) for order in orders_noop) 

1969 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

1970 logger.info( 

1971 'Waveform orders already satisified with cached/local data: ' 

1972 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

1973 

1974 for order in orders_noop: 

1975 split_promise(order) 

1976 

1977 if order_only: 

1978 if orders: 

1979 self._pending_orders.extend(orders) 

1980 logger.info( 

1981 'Enqueuing %i waveform order%s.' 

1982 % len_plural(orders)) 

1983 return 

1984 else: 

1985 if self._pending_orders: 

1986 orders.extend(self._pending_orders) 

1987 logger.info( 

1988 'Adding %i previously enqueued order%s.' 

1989 % len_plural(self._pending_orders)) 

1990 

1991 self._pending_orders = [] 

1992 

1993 source_ids = [] 

1994 sources = {} 

1995 for source in self._sources: 

1996 if isinstance(source, fdsn.FDSNSource): 

1997 source_ids.append(source._source_id) 

1998 sources[source._source_id] = source 

1999 

2000 source_priority = dict( 

2001 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2002 

2003 order_groups = defaultdict(list) 

2004 for order in orders: 

2005 order_groups[order_key(order)].append(order) 

2006 

2007 for k, order_group in order_groups.items(): 

2008 order_group.sort( 

2009 key=lambda order: source_priority[order.source_id]) 

2010 

2011 n_order_groups = len(order_groups) 

2012 

2013 if len(order_groups) != 0 or len(orders) != 0: 

2014 logger.info( 

2015 'Waveform orders standing for download: %i (%i)' 

2016 % (len(order_groups), len(orders))) 

2017 

2018 task = make_task('Waveform orders processed', n_order_groups) 

2019 else: 

2020 task = None 

2021 

2022 def release_order_group(order): 

2023 okey = order_key(order) 

2024 for followup in order_groups[okey]: 

2025 split_promise(followup) 

2026 

2027 del order_groups[okey] 

2028 

2029 if task: 

2030 task.update(n_order_groups - len(order_groups)) 

2031 

2032 def noop(order): 

2033 pass 

2034 

2035 def success(order): 

2036 release_order_group(order) 

2037 split_promise(order) 

2038 

2039 def batch_add(paths): 

2040 self.add(paths) 

2041 

2042 calls = queue.Queue() 

2043 

2044 def enqueue(f): 

2045 def wrapper(*args): 

2046 calls.put((f, args)) 

2047 

2048 return wrapper 

2049 

2050 while order_groups: 

2051 

2052 orders_now = [] 

2053 empty = [] 

2054 for k, order_group in order_groups.items(): 

2055 try: 

2056 orders_now.append(order_group.pop(0)) 

2057 except IndexError: 

2058 empty.append(k) 

2059 

2060 for k in empty: 

2061 del order_groups[k] 

2062 

2063 by_source_id = defaultdict(list) 

2064 for order in orders_now: 

2065 by_source_id[order.source_id].append(order) 

2066 

2067 threads = [] 

2068 for source_id in by_source_id: 

2069 def download(): 

2070 try: 

2071 sources[source_id].download_waveforms( 

2072 by_source_id[source_id], 

2073 success=enqueue(success), 

2074 error_permanent=enqueue(split_promise), 

2075 error_temporary=noop, 

2076 batch_add=enqueue(batch_add)) 

2077 

2078 finally: 

2079 calls.put(None) 

2080 

2081 thread = threading.Thread(target=download) 

2082 thread.start() 

2083 threads.append(thread) 

2084 

2085 ndone = 0 

2086 while ndone < len(threads): 

2087 ret = calls.get() 

2088 if ret is None: 

2089 ndone += 1 

2090 else: 

2091 ret[0](*ret[1]) 

2092 

2093 for thread in threads: 

2094 thread.join() 

2095 

2096 if task: 

2097 task.update(n_order_groups - len(order_groups)) 

2098 

2099 if task: 

2100 task.done() 

2101 

2102 @filldocs 

2103 def get_waveform_nuts( 

2104 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2105 order_only=False): 

2106 

2107 ''' 

2108 Get waveform content entities matching given constraints. 

2109 

2110 %(query_args)s 

2111 

2112 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2113 resolves matching waveform promises (downloads waveforms from remote 

2114 sources). 

2115 

2116 See :py:meth:`iter_nuts` for details on time span matching. 

2117 ''' 

2118 

2119 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2120 self._redeem_promises(*args, order_only=order_only) 

2121 return sorted( 

2122 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2123 

2124 @filldocs 

2125 def have_waveforms( 

2126 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2127 

2128 ''' 

2129 Check if any waveforms or waveform promises are available for given 

2130 constraints. 

2131 

2132 %(query_args)s 

2133 ''' 

2134 

2135 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2136 return bool(list( 

2137 self.iter_nuts('waveform', *args, limit=1))) \ 

2138 or bool(list( 

2139 self.iter_nuts('waveform_promise', *args, limit=1))) 

2140 

2141 @filldocs 

2142 def get_waveforms( 

2143 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2144 uncut=False, want_incomplete=True, degap=True, maxgap=5, 

2145 maxlap=None, snap=None, include_last=False, load_data=True, 

2146 accessor_id='default', operator_params=None, order_only=False): 

2147 

2148 ''' 

2149 Get waveforms matching given constraints. 

2150 

2151 %(query_args)s 

2152 

2153 :param uncut: 

2154 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2155 and to disable degapping/deoverlapping. Returns untouched traces as 

2156 they are read from file segment. File segments are always read in 

2157 their entirety. 

2158 :type uncut: 

2159 bool 

2160 

2161 :param want_incomplete: 

2162 If ``True``, gappy/incomplete traces are included in the result. 

2163 :type want_incomplete: 

2164 bool 

2165 

2166 :param degap: 

2167 If ``True``, connect traces and remove gaps and overlaps. 

2168 :type degap: 

2169 bool 

2170 

2171 :param maxgap: 

2172 Maximum gap size in samples which is filled with interpolated 

2173 samples when ``degap`` is ``True``. 

2174 :type maxgap: 

2175 int 

2176 

2177 :param maxlap: 

2178 Maximum overlap size in samples which is removed when ``degap`` is 

2179 ``True``. 

2180 :type maxlap: 

2181 int 

2182 

2183 :param snap: 

2184 Rounding functions used when computing sample index from time 

2185 instance, for trace start and trace end, respectively. By default, 

2186 ``(round, round)`` is used. 

2187 :type snap: 

2188 tuple of 2 callables 

2189 

2190 :param include_last: 

2191 If ``True``, add one more sample to the returned traces (the sample 

2192 which would be the first sample of a query with ``tmin`` set to the 

2193 current value of ``tmax``). 

2194 :type include_last: 

2195 bool 

2196 

2197 :param load_data: 

2198 If ``True``, waveform data samples are read from files (or cache). 

2199 If ``False``, meta-information-only traces are returned (dummy 

2200 traces with no data samples). 

2201 :type load_data: 

2202 bool 

2203 

2204 :param accessor_id: 

2205 Name of consumer on who's behalf data is accessed. Used in cache 

2206 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2207 to distinguish different points of extraction for the decision of 

2208 when to release cached waveform data. Should be used when data is 

2209 alternately extracted from more than one region / selection. 

2210 :type accessor_id: 

2211 str 

2212 

2213 See :py:meth:`iter_nuts` for details on time span matching. 

2214 

2215 Loaded data is kept in memory (at least) until 

2216 :py:meth:`clear_accessor` has been called or 

2217 :py:meth:`advance_accessor` has been called two consecutive times 

2218 without data being accessed between the two calls (by this accessor). 

2219 Data may still be further kept in the memory cache if held alive by 

2220 consumers with a different ``accessor_id``. 

2221 ''' 

2222 

2223 tmin, tmax, codes = self._get_selection_args( 

2224 WAVEFORM, obj, tmin, tmax, time, codes) 

2225 

2226 self_tmin, self_tmax = self.get_time_span( 

2227 ['waveform', 'waveform_promise']) 

2228 

2229 if None in (self_tmin, self_tmax): 

2230 logger.warning( 

2231 'No waveforms available.') 

2232 return [] 

2233 

2234 tmin = tmin if tmin is not None else self_tmin 

2235 tmax = tmax if tmax is not None else self_tmax 

2236 

2237 if codes is not None and len(codes) == 1: 

2238 # TODO: fix for multiple / mixed codes 

2239 operator = self.get_operator(codes[0]) 

2240 if operator is not None: 

2241 return operator.get_waveforms( 

2242 self, codes[0], 

2243 tmin=tmin, tmax=tmax, 

2244 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2245 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2246 include_last=include_last, load_data=load_data, 

2247 accessor_id=accessor_id, params=operator_params) 

2248 

2249 nuts = self.get_waveform_nuts( 

2250 obj, tmin, tmax, time, codes, order_only=order_only) 

2251 

2252 if order_only: 

2253 return [] 

2254 

2255 if load_data: 

2256 traces = [ 

2257 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2258 

2259 else: 

2260 traces = [ 

2261 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2262 

2263 if uncut: 

2264 return traces 

2265 

2266 if snap is None: 

2267 snap = (round, round) 

2268 

2269 chopped = [] 

2270 for tr in traces: 

2271 if not load_data and tr.ydata is not None: 

2272 tr = tr.copy(data=False) 

2273 tr.ydata = None 

2274 

2275 try: 

2276 chopped.append(tr.chop( 

2277 tmin, tmax, 

2278 inplace=False, 

2279 snap=snap, 

2280 include_last=include_last)) 

2281 

2282 except trace.NoData: 

2283 pass 

2284 

2285 processed = self._process_chopped( 

2286 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2287 

2288 return processed 

2289 

2290 @filldocs 

2291 def chopper_waveforms( 

2292 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2293 tinc=None, tpad=0., 

2294 want_incomplete=True, snap_window=False, 

2295 degap=True, maxgap=5, maxlap=None, 

2296 snap=None, include_last=False, load_data=True, 

2297 accessor_id=None, clear_accessor=True, operator_params=None, 

2298 grouping=None): 

2299 

2300 ''' 

2301 Iterate window-wise over waveform archive. 

2302 

2303 %(query_args)s 

2304 

2305 :param tinc: 

2306 Time increment (window shift time) (default uses ``tmax-tmin``). 

2307 :type tinc: 

2308 timestamp 

2309 

2310 :param tpad: 

2311 Padding time appended on either side of the data window (window 

2312 overlap is ``2*tpad``). 

2313 :type tpad: 

2314 timestamp 

2315 

2316 :param want_incomplete: 

2317 If ``True``, gappy/incomplete traces are included in the result. 

2318 :type want_incomplete: 

2319 bool 

2320 

2321 :param snap_window: 

2322 If ``True``, start time windows at multiples of tinc with respect 

2323 to system time zero. 

2324 :type snap_window: 

2325 bool 

2326 

2327 :param degap: 

2328 If ``True``, connect traces and remove gaps and overlaps. 

2329 :type degap: 

2330 bool 

2331 

2332 :param maxgap: 

2333 Maximum gap size in samples which is filled with interpolated 

2334 samples when ``degap`` is ``True``. 

2335 :type maxgap: 

2336 int 

2337 

2338 :param maxlap: 

2339 Maximum overlap size in samples which is removed when ``degap`` is 

2340 ``True``. 

2341 :type maxlap: 

2342 int 

2343 

2344 :param snap: 

2345 Rounding functions used when computing sample index from time 

2346 instance, for trace start and trace end, respectively. By default, 

2347 ``(round, round)`` is used. 

2348 :type snap: 

2349 tuple of 2 callables 

2350 

2351 :param include_last: 

2352 If ``True``, add one more sample to the returned traces (the sample 

2353 which would be the first sample of a query with ``tmin`` set to the 

2354 current value of ``tmax``). 

2355 :type include_last: 

2356 bool 

2357 

2358 :param load_data: 

2359 If ``True``, waveform data samples are read from files (or cache). 

2360 If ``False``, meta-information-only traces are returned (dummy 

2361 traces with no data samples). 

2362 :type load_data: 

2363 bool 

2364 

2365 :param accessor_id: 

2366 Name of consumer on who's behalf data is accessed. Used in cache 

2367 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2368 to distinguish different points of extraction for the decision of 

2369 when to release cached waveform data. Should be used when data is 

2370 alternately extracted from more than one region / selection. 

2371 :type accessor_id: 

2372 str 

2373 

2374 :param clear_accessor: 

2375 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2376 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2377 memory when the generator returns. 

2378 :type clear_accessor: 

2379 bool 

2380 

2381 :param grouping: 

2382 By default, traversal over the data is over time and all matching 

2383 traces of a time window are yielded. Using this option, it is 

2384 possible to traverse the data first by group (e.g. station or 

2385 network) and second by time. This can reduce the number of traces 

2386 in each batch and thus reduce the memory footprint of the process. 

2387 :type grouping: 

2388 :py:class:`~pyrocko.squirrel.operator.Grouping` 

2389 

2390 :yields: 

2391 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2392 extracted time window. 

2393 

2394 See :py:meth:`iter_nuts` for details on time span matching. 

2395 ''' 

2396 

2397 tmin, tmax, codes = self._get_selection_args( 

2398 WAVEFORM, obj, tmin, tmax, time, codes) 

2399 

2400 self_tmin, self_tmax = self.get_time_span( 

2401 ['waveform', 'waveform_promise']) 

2402 

2403 if None in (self_tmin, self_tmax): 

2404 logger.warning( 

2405 'Content has undefined time span. No waveforms and no ' 

2406 'waveform promises?') 

2407 return 

2408 

2409 if snap_window and tinc is not None: 

2410 tmin = tmin if tmin is not None else self_tmin 

2411 tmax = tmax if tmax is not None else self_tmax 

2412 tmin = math.floor(tmin / tinc) * tinc 

2413 tmax = math.ceil(tmax / tinc) * tinc 

2414 else: 

2415 tmin = tmin if tmin is not None else self_tmin + tpad 

2416 tmax = tmax if tmax is not None else self_tmax - tpad 

2417 

2418 tinc = tinc if tinc is not None else tmax - tmin 

2419 

2420 try: 

2421 if accessor_id is None: 

2422 accessor_id = 'chopper%i' % self._n_choppers_active 

2423 

2424 self._n_choppers_active += 1 

2425 

2426 eps = tinc * 1e-6 

2427 if tinc != 0.0: 

2428 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2429 else: 

2430 nwin = 1 

2431 

2432 if grouping is None: 

2433 codes_list = [codes] 

2434 else: 

2435 operator = Operator( 

2436 filtering=CodesPatternFiltering(codes=codes), 

2437 grouping=grouping) 

2438 

2439 available = set(self.get_codes(kind='waveform')) 

2440 available.update(self.get_codes(kind='waveform_promise')) 

2441 operator.update_mappings(sorted(available)) 

2442 

2443 codes_list = [ 

2444 codes_patterns_list(scl) 

2445 for scl in operator.iter_in_codes()] 

2446 

2447 ngroups = len(codes_list) 

2448 for igroup, scl in enumerate(codes_list): 

2449 for iwin in range(nwin): 

2450 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2451 

2452 chopped = self.get_waveforms( 

2453 tmin=wmin-tpad, 

2454 tmax=wmax+tpad, 

2455 codes=scl, 

2456 snap=snap, 

2457 include_last=include_last, 

2458 load_data=load_data, 

2459 want_incomplete=want_incomplete, 

2460 degap=degap, 

2461 maxgap=maxgap, 

2462 maxlap=maxlap, 

2463 accessor_id=accessor_id, 

2464 operator_params=operator_params) 

2465 

2466 self.advance_accessor(accessor_id) 

2467 

2468 yield Batch( 

2469 tmin=wmin, 

2470 tmax=wmax, 

2471 i=iwin, 

2472 n=nwin, 

2473 igroup=igroup, 

2474 ngroups=ngroups, 

2475 traces=chopped) 

2476 

2477 finally: 

2478 self._n_choppers_active -= 1 

2479 if clear_accessor: 

2480 self.clear_accessor(accessor_id, 'waveform') 

2481 

2482 def _process_chopped( 

2483 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2484 

2485 chopped.sort(key=lambda a: a.full_id) 

2486 if degap: 

2487 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2488 

2489 if not want_incomplete: 

2490 chopped_weeded = [] 

2491 for tr in chopped: 

2492 emin = tr.tmin - tmin 

2493 emax = tr.tmax + tr.deltat - tmax 

2494 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2495 chopped_weeded.append(tr) 

2496 

2497 elif degap: 

2498 if (0. < emin <= 5. * tr.deltat 

2499 and -5. * tr.deltat <= emax < 0.): 

2500 

2501 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2502 chopped_weeded.append(tr) 

2503 

2504 chopped = chopped_weeded 

2505 

2506 return chopped 

2507 

2508 def _get_pyrocko_stations( 

2509 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2510 

2511 from pyrocko import model as pmodel 

2512 

2513 if codes is not None: 

2514 codes = codes_patterns_for_kind(STATION, codes) 

2515 

2516 by_nsl = defaultdict(lambda: (list(), list())) 

2517 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2518 sargs = station._get_pyrocko_station_args() 

2519 by_nsl[station.codes.nsl][0].append(sargs) 

2520 

2521 if codes is not None: 

2522 codes = [model.CodesNSLCE(c) for c in codes] 

2523 

2524 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2525 sargs = channel._get_pyrocko_station_args() 

2526 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2527 sargs_list.append(sargs) 

2528 channels_list.append(channel) 

2529 

2530 pstations = [] 

2531 nsls = list(by_nsl.keys()) 

2532 nsls.sort() 

2533 for nsl in nsls: 

2534 sargs_list, channels_list = by_nsl[nsl] 

2535 sargs = util.consistency_merge( 

2536 [('',) + x for x in sargs_list]) 

2537 

2538 by_c = defaultdict(list) 

2539 for ch in channels_list: 

2540 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2541 

2542 chas = list(by_c.keys()) 

2543 chas.sort() 

2544 pchannels = [] 

2545 for cha in chas: 

2546 list_of_cargs = by_c[cha] 

2547 cargs = util.consistency_merge( 

2548 [('',) + x for x in list_of_cargs]) 

2549 pchannels.append(pmodel.Channel(*cargs)) 

2550 

2551 pstations.append( 

2552 pmodel.Station(*sargs, channels=pchannels)) 

2553 

2554 return pstations 

2555 

2556 @property 

2557 def pile(self): 

2558 

2559 ''' 

2560 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2561 

2562 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2563 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2564 but uses the fluffy power of the Squirrel under the hood. 

2565 

2566 This interface can be used as a drop-in replacement for piles which are 

2567 used in existing scripts and programs for efficient waveform data 

2568 access. The Squirrel-based pile scales better for large datasets. Newer 

2569 scripts should use Squirrel's native methods to avoid the emulation 

2570 overhead. 

2571 ''' 

2572 from . import pile 

2573 

2574 if self._pile is None: 

2575 self._pile = pile.Pile(self) 

2576 

2577 return self._pile 

2578 

2579 def snuffle(self): 

2580 ''' 

2581 Look at dataset in Snuffler. 

2582 ''' 

2583 self.pile.snuffle() 

2584 

2585 def _gather_codes_keys(self, kind, gather, selector): 

2586 return set( 

2587 gather(codes) 

2588 for codes in self.iter_codes(kind) 

2589 if selector is None or selector(codes)) 

2590 

2591 def __str__(self): 

2592 return str(self.get_stats()) 

2593 

2594 def get_coverage( 

2595 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2596 

2597 ''' 

2598 Get coverage information. 

2599 

2600 Get information about strips of gapless data coverage. 

2601 

2602 :param kind: 

2603 Content kind to be queried. 

2604 :type kind: 

2605 str 

2606 

2607 :param tmin: 

2608 Start time of query interval. 

2609 :type tmin: 

2610 timestamp 

2611 

2612 :param tmax: 

2613 End time of query interval. 

2614 :type tmax: 

2615 timestamp 

2616 

2617 :param codes: 

2618 If given, restrict query to given content codes patterns. 

2619 :type codes: 

2620 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2621 objects appropriate for the queried content type, or anything which 

2622 can be converted to such objects. 

2623 

2624 :param limit: 

2625 Limit query to return only up to a given maximum number of entries 

2626 per matching time series (without setting this option, very gappy 

2627 data could cause the query to execute for a very long time). 

2628 :type limit: 

2629 int 

2630 

2631 :returns: 

2632 Information about time spans covered by the requested time series 

2633 data. 

2634 :rtype: 

2635 :py:class:`list` of :py:class:`Coverage` objects 

2636 ''' 

2637 

2638 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2639 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2640 kind_id = to_kind_id(kind) 

2641 

2642 codes_info = list(self._iter_codes_info(kind=kind)) 

2643 

2644 kdata_all = [] 

2645 if codes is None: 

2646 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2647 kdata_all.append( 

2648 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2649 

2650 else: 

2651 for codes_entry in codes: 

2652 pattern = to_codes(kind_id, codes_entry) 

2653 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2654 if model.match_codes(pattern, codes_entry): 

2655 kdata_all.append( 

2656 (pattern, kind_codes_id, codes_entry, deltat)) 

2657 

2658 kind_codes_ids = [x[1] for x in kdata_all] 

2659 

2660 counts_at_tmin = {} 

2661 if tmin is not None: 

2662 for nut in self.iter_nuts( 

2663 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2664 

2665 k = nut.codes, nut.deltat 

2666 if k not in counts_at_tmin: 

2667 counts_at_tmin[k] = 0 

2668 

2669 counts_at_tmin[k] += 1 

2670 

2671 coverages = [] 

2672 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2673 entry = [pattern, codes_entry, deltat, None, None, []] 

2674 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2675 sql = self._sql(''' 

2676 SELECT 

2677 time_seconds, 

2678 time_offset 

2679 FROM %(db)s.%(coverage)s 

2680 WHERE 

2681 kind_codes_id == ? 

2682 ORDER BY 

2683 kind_codes_id ''' + order + ''', 

2684 time_seconds ''' + order + ''', 

2685 time_offset ''' + order + ''' 

2686 LIMIT 1 

2687 ''') 

2688 

2689 for row in self._conn.execute(sql, [kind_codes_id]): 

2690 entry[3+i] = model.tjoin(row[0], row[1]) 

2691 

2692 if None in entry[3:5]: 

2693 continue 

2694 

2695 args = [kind_codes_id] 

2696 

2697 sql_time = '' 

2698 if tmin is not None: 

2699 # intentionally < because (== tmin) is queried from nuts 

2700 sql_time += ' AND ( ? < time_seconds ' \ 

2701 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2702 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2703 

2704 if tmax is not None: 

2705 sql_time += ' AND ( time_seconds < ? ' \ 

2706 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2707 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2708 

2709 sql_limit = '' 

2710 if limit is not None: 

2711 sql_limit = ' LIMIT ?' 

2712 args.append(limit) 

2713 

2714 sql = self._sql(''' 

2715 SELECT 

2716 time_seconds, 

2717 time_offset, 

2718 step 

2719 FROM %(db)s.%(coverage)s 

2720 WHERE 

2721 kind_codes_id == ? 

2722 ''' + sql_time + ''' 

2723 ORDER BY 

2724 kind_codes_id, 

2725 time_seconds, 

2726 time_offset 

2727 ''' + sql_limit) 

2728 

2729 rows = list(self._conn.execute(sql, args)) 

2730 

2731 if limit is not None and len(rows) == limit: 

2732 entry[-1] = None 

2733 else: 

2734 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2735 tlast = None 

2736 if tmin is not None: 

2737 entry[-1].append((tmin, counts)) 

2738 tlast = tmin 

2739 

2740 for row in rows: 

2741 t = model.tjoin(row[0], row[1]) 

2742 counts += row[2] 

2743 entry[-1].append((t, counts)) 

2744 tlast = t 

2745 

2746 if tmax is not None and (tlast is None or tlast != tmax): 

2747 entry[-1].append((tmax, counts)) 

2748 

2749 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2750 

2751 return coverages 

2752 

2753 def get_stationxml( 

2754 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2755 level='response'): 

2756 

2757 ''' 

2758 Get station/channel/response metadata in StationXML representation. 

2759 

2760 %(query_args)s 

2761 

2762 :returns: 

2763 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2764 ''' 

2765 

2766 if level not in ('network', 'station', 'channel', 'response'): 

2767 raise ValueError('Invalid level: %s' % level) 

2768 

2769 tmin, tmax, codes = self._get_selection_args( 

2770 CHANNEL, obj, tmin, tmax, time, codes) 

2771 

2772 filtering = CodesPatternFiltering(codes=codes) 

2773 

2774 nslcs = list(set( 

2775 codes.nslc for codes in 

2776 filtering.filter(self.get_codes(kind='channel')))) 

2777 

2778 from pyrocko.io import stationxml as sx 

2779 

2780 networks = [] 

2781 for net, stas in prefix_tree(nslcs): 

2782 network = sx.Network(code=net) 

2783 networks.append(network) 

2784 

2785 if level not in ('station', 'channel', 'response'): 

2786 continue 

2787 

2788 for sta, locs in stas: 

2789 stations = self.get_stations( 

2790 tmin=tmin, 

2791 tmax=tmax, 

2792 codes=(net, sta, '*'), 

2793 model='stationxml') 

2794 

2795 errors = sx.check_overlaps( 

2796 'Station', (net, sta), stations) 

2797 

2798 if errors: 

2799 raise sx.Inconsistencies( 

2800 'Inconsistencies found:\n %s' 

2801 % '\n '.join(errors)) 

2802 

2803 network.station_list.extend(stations) 

2804 

2805 if level not in ('channel', 'response'): 

2806 continue 

2807 

2808 for loc, chas in locs: 

2809 for cha, _ in chas: 

2810 channels = self.get_channels( 

2811 tmin=tmin, 

2812 tmax=tmax, 

2813 codes=(net, sta, loc, cha), 

2814 model='stationxml') 

2815 

2816 errors = sx.check_overlaps( 

2817 'Channel', (net, sta, loc, cha), channels) 

2818 

2819 if errors: 

2820 raise sx.Inconsistencies( 

2821 'Inconsistencies found:\n %s' 

2822 % '\n '.join(errors)) 

2823 

2824 for channel in channels: 

2825 station = sx.find_containing(stations, channel) 

2826 if station is not None: 

2827 station.channel_list.append(channel) 

2828 else: 

2829 raise sx.Inconsistencies( 

2830 'No station or station epoch found for ' 

2831 'channel: %s' % '.'.join( 

2832 (net, sta, loc, cha))) 

2833 

2834 if level != 'response': 

2835 continue 

2836 

2837 response_sq, response_sx = self.get_response( 

2838 codes=(net, sta, loc, cha), 

2839 tmin=channel.start_date, 

2840 tmax=channel.end_date, 

2841 model='stationxml+') 

2842 

2843 if not ( 

2844 sx.eq_open( 

2845 channel.start_date, response_sq.tmin) 

2846 and sx.eq_open( 

2847 channel.end_date, response_sq.tmax)): 

2848 

2849 raise sx.Inconsistencies( 

2850 'Response time span does not match ' 

2851 'channel time span: %s' % '.'.join( 

2852 (net, sta, loc, cha))) 

2853 

2854 channel.response = response_sx 

2855 

2856 return sx.FDSNStationXML( 

2857 source='Generated by Pyrocko Squirrel.', 

2858 network_list=networks) 

2859 

2860 def add_operator(self, op): 

2861 self._operators.append(op) 

2862 

2863 def update_operator_mappings(self): 

2864 available = self.get_codes(kind=('channel')) 

2865 

2866 for operator in self._operators: 

2867 operator.update_mappings(available, self._operator_registry) 

2868 

2869 def iter_operator_mappings(self): 

2870 for operator in self._operators: 

2871 for in_codes, out_codes in operator.iter_mappings(): 

2872 yield operator, in_codes, out_codes 

2873 

2874 def get_operator_mappings(self): 

2875 return list(self.iter_operator_mappings()) 

2876 

2877 def get_operator(self, codes): 

2878 try: 

2879 return self._operator_registry[codes][0] 

2880 except KeyError: 

2881 return None 

2882 

2883 def get_operator_group(self, codes): 

2884 try: 

2885 return self._operator_registry[codes] 

2886 except KeyError: 

2887 return None, (None, None, None) 

2888 

2889 def iter_operator_codes(self): 

2890 for _, _, out_codes in self.iter_operator_mappings(): 

2891 for codes in out_codes: 

2892 yield codes 

2893 

2894 def get_operator_codes(self): 

2895 return list(self.iter_operator_codes()) 

2896 

2897 def print_tables(self, table_names=None, stream=None): 

2898 ''' 

2899 Dump raw database tables in textual form (for debugging purposes). 

2900 

2901 :param table_names: 

2902 Names of tables to be dumped or ``None`` to dump all. 

2903 :type table_names: 

2904 :py:class:`list` of :py:class:`str` 

2905 

2906 :param stream: 

2907 Open file or ``None`` to dump to standard output. 

2908 ''' 

2909 

2910 if stream is None: 

2911 stream = sys.stdout 

2912 

2913 if isinstance(table_names, str): 

2914 table_names = [table_names] 

2915 

2916 if table_names is None: 

2917 table_names = [ 

2918 'selection_file_states', 

2919 'selection_nuts', 

2920 'selection_kind_codes_count', 

2921 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

2922 

2923 m = { 

2924 'selection_file_states': '%(db)s.%(file_states)s', 

2925 'selection_nuts': '%(db)s.%(nuts)s', 

2926 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

2927 'files': 'files', 

2928 'nuts': 'nuts', 

2929 'kind_codes': 'kind_codes', 

2930 'kind_codes_count': 'kind_codes_count'} 

2931 

2932 for table_name in table_names: 

2933 self._database.print_table( 

2934 m[table_name] % self._names, stream=stream) 

2935 

2936 

2937class SquirrelStats(Object): 

2938 ''' 

2939 Container to hold statistics about contents available from a Squirrel. 

2940 

2941 See also :py:meth:`Squirrel.get_stats`. 

2942 ''' 

2943 

2944 nfiles = Int.T( 

2945 help='Number of files in selection.') 

2946 nnuts = Int.T( 

2947 help='Number of index nuts in selection.') 

2948 codes = List.T( 

2949 Tuple.T(content_t=String.T()), 

2950 help='Available code sequences in selection, e.g. ' 

2951 '(agency, network, station, location) for stations nuts.') 

2952 kinds = List.T( 

2953 String.T(), 

2954 help='Available content types in selection.') 

2955 total_size = Int.T( 

2956 help='Aggregated file size of files is selection.') 

2957 counts = Dict.T( 

2958 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

2959 help='Breakdown of how many nuts of any content type and code ' 

2960 'sequence are available in selection, ``counts[kind][codes]``.') 

2961 time_spans = Dict.T( 

2962 String.T(), Tuple.T(content_t=Timestamp.T()), 

2963 help='Time spans by content type.') 

2964 sources = List.T( 

2965 String.T(), 

2966 help='Descriptions of attached sources.') 

2967 operators = List.T( 

2968 String.T(), 

2969 help='Descriptions of attached operators.') 

2970 

2971 def __str__(self): 

2972 kind_counts = dict( 

2973 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

2974 

2975 scodes = model.codes_to_str_abbreviated(self.codes) 

2976 

2977 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

2978 ' ' + s for s in self.sources) 

2979 

2980 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

2981 ' ' + s for s in self.operators) 

2982 

2983 def stime(t): 

2984 return util.tts(t) if t is not None and t not in ( 

2985 model.g_tmin, model.g_tmax) else '<none>' 

2986 

2987 def stable(rows): 

2988 ns = [max(len(w) for w in col) for col in zip(*rows)] 

2989 return '\n'.join( 

2990 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

2991 for row in rows) 

2992 

2993 def indent(s): 

2994 return '\n'.join(' '+line for line in s.splitlines()) 

2995 

2996 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

2997 kind + ':', 

2998 str(kind_counts[kind]), 

2999 stime(self.time_spans[kind][0]), 

3000 '-', 

3001 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3002 

3003 s = ''' 

3004Number of files: %i 

3005Total size of known files: %s 

3006Number of index nuts: %i 

3007Available content kinds: %s 

3008Available codes: %s 

3009Sources: %s 

3010Operators: %s''' % ( 

3011 self.nfiles, 

3012 util.human_bytesize(self.total_size), 

3013 self.nnuts, 

3014 stspans, scodes, ssources, soperators) 

3015 

3016 return s.lstrip() 

3017 

3018 

3019__all__ = [ 

3020 'Squirrel', 

3021 'SquirrelStats', 

3022]