1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8 

9import math 

10import logging 

11import threading 

12import queue 

13from collections import defaultdict 

14 

15from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

16from pyrocko import util, trace 

17from pyrocko.progress import progress 

18from pyrocko.plot import nice_time_tick_inc_approx_secs 

19 

20from . import model, io, cache, dataset 

21 

22from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

23 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

24 codes_patterns_for_kind 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from .operators.base import Operator, CodesPatternFiltering 

29from . import client, environment, error 

30 

31logger = logging.getLogger('psq.base') 

32 

33guts_prefix = 'squirrel' 

34 

35 

36def nonef(f, xs): 

37 xs_ = [x for x in xs if x is not None] 

38 if xs_: 

39 return f(xs_) 

40 else: 

41 return None 

42 

43 

44def make_task(*args): 

45 return progress.task(*args, logger=logger) 

46 

47 

48def lpick(condition, seq): 

49 ft = [], [] 

50 for ele in seq: 

51 ft[int(bool(condition(ele)))].append(ele) 

52 

53 return ft 

54 

55 

56def len_plural(obj): 

57 return len(obj), '' if len(obj) == 1 else 's' 

58 

59 

60def blocks(tmin, tmax, deltat, nsamples_block=100000): 

61 tblock = nice_time_tick_inc_approx_secs( 

62 util.to_time_float(deltat * nsamples_block)) 

63 iblock_min = int(math.floor(tmin / tblock)) 

64 iblock_max = int(math.ceil(tmax / tblock)) 

65 for iblock in range(iblock_min, iblock_max): 

66 yield iblock * tblock, (iblock+1) * tblock 

67 

68 

69def gaps(avail, tmin, tmax): 

70 assert tmin < tmax 

71 

72 data = [(tmax, 1), (tmin, -1)] 

73 for (tmin_a, tmax_a) in avail: 

74 assert tmin_a < tmax_a 

75 data.append((tmin_a, 1)) 

76 data.append((tmax_a, -1)) 

77 

78 data.sort() 

79 s = 1 

80 gaps = [] 

81 tmin_g = None 

82 for t, x in data: 

83 if s == 1 and x == -1: 

84 tmin_g = t 

85 elif s == 0 and x == 1 and tmin_g is not None: 

86 tmax_g = t 

87 if tmin_g != tmax_g: 

88 gaps.append((tmin_g, tmax_g)) 

89 

90 s += x 

91 

92 return gaps 

93 

94 

95def order_key(order): 

96 return (order.codes, order.tmin, order.tmax) 

97 

98 

99def _is_exact(pat): 

100 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

101 

102 

103def prefix_tree(tups): 

104 if not tups: 

105 return [] 

106 

107 if len(tups[0]) == 1: 

108 return sorted((tup[0], []) for tup in tups) 

109 

110 d = defaultdict(list) 

111 for tup in tups: 

112 d[tup[0]].append(tup[1:]) 

113 

114 sub = [] 

115 for k in sorted(d.keys()): 

116 sub.append((k, prefix_tree(d[k]))) 

117 

118 return sub 

119 

120 

121def match_time_span(tmin, tmax, obj): 

122 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

123 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

124 

125 

126class Batch(object): 

127 ''' 

128 Batch of waveforms from window-wise data extraction. 

129 

130 Encapsulates state and results yielded for each window in window-wise 

131 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

132 

133 *Attributes:* 

134 

135 .. py:attribute:: tmin 

136 

137 Start of this time window. 

138 

139 .. py:attribute:: tmax 

140 

141 End of this time window. 

142 

143 .. py:attribute:: i 

144 

145 Index of this time window in sequence. 

146 

147 .. py:attribute:: n 

148 

149 Total number of time windows in sequence. 

150 

151 .. py:attribute:: igroup 

152 

153 Index of this time window's sequence group. 

154 

155 .. py:attribute:: ngroups 

156 

157 Total number of sequence groups. 

158 

159 .. py:attribute:: traces 

160 

161 Extracted waveforms for this time window. 

162 ''' 

163 

164 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

165 self.tmin = tmin 

166 self.tmax = tmax 

167 self.i = i 

168 self.n = n 

169 self.igroup = igroup 

170 self.ngroups = ngroups 

171 self.traces = traces 

172 

173 

174class Squirrel(Selection): 

175 ''' 

176 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

177 

178 :param env: 

179 Squirrel environment instance or directory path to use as starting 

180 point for its detection. By default, the current directory is used as 

181 starting point. When searching for a usable environment the directory 

182 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

183 directory is used if it exists, otherwise the parent directories are 

184 search upwards for the existence of such a directory. If no such 

185 directory is found, the user's global Squirrel environment 

186 ``'$HOME/.pyrocko/squirrel'`` is used. 

187 :type env: 

188 :py:class:`~pyrocko.squirrel.environment.Environment` or 

189 :py:class:`str` 

190 

191 :param database: 

192 Database instance or path to database. By default the 

193 database found in the detected Squirrel environment is used. 

194 :type database: 

195 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

196 

197 :param cache_path: 

198 Directory path to use for data caching. By default, the ``'cache'`` 

199 directory in the detected Squirrel environment is used. 

200 :type cache_path: 

201 :py:class:`str` 

202 

203 :param persistent: 

204 If given a name, create a persistent selection. 

205 :type persistent: 

206 :py:class:`str` 

207 

208 This is the central class of the Squirrel framework. It provides a unified 

209 interface to query and access seismic waveforms, station meta-data and 

210 event information from local file collections and remote data sources. For 

211 prompt responses, a profound database setup is used under the hood. To 

212 speed up assemblage of ad-hoc data selections, files are indexed on first 

213 use and the extracted meta-data is remembered in the database for 

214 subsequent accesses. Bulk data is lazily loaded from disk and remote 

215 sources, just when requested. Once loaded, data is cached in memory to 

216 expedite typical access patterns. Files and data sources can be dynamically 

217 added to and removed from the Squirrel selection at runtime. 

218 

219 Queries are restricted to the contents of the files currently added to the 

220 Squirrel selection (usually a subset of the file meta-information 

221 collection in the database). This list of files is referred to here as the 

222 "selection". By default, temporary tables are created in the attached 

223 database to hold the names of the files in the selection as well as various 

224 indices and counters. These tables are only visible inside the application 

225 which created them and are deleted when the database connection is closed 

226 or the application exits. To create a selection which is not deleted at 

227 exit, supply a name to the ``persistent`` argument of the Squirrel 

228 constructor. Persistent selections are shared among applications using the 

229 same database. 

230 

231 **Method summary** 

232 

233 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

234 :py:class:`~pyrocko.squirrel.selection.Selection`. 

235 

236 .. autosummary:: 

237 

238 ~Squirrel.add 

239 ~Squirrel.add_source 

240 ~Squirrel.add_fdsn 

241 ~Squirrel.add_catalog 

242 ~Squirrel.add_dataset 

243 ~Squirrel.add_virtual 

244 ~Squirrel.update 

245 ~Squirrel.update_waveform_promises 

246 ~Squirrel.advance_accessor 

247 ~Squirrel.clear_accessor 

248 ~Squirrel.reload 

249 ~pyrocko.squirrel.selection.Selection.iter_paths 

250 ~Squirrel.iter_nuts 

251 ~Squirrel.iter_kinds 

252 ~Squirrel.iter_deltats 

253 ~Squirrel.iter_codes 

254 ~pyrocko.squirrel.selection.Selection.get_paths 

255 ~Squirrel.get_nuts 

256 ~Squirrel.get_kinds 

257 ~Squirrel.get_deltats 

258 ~Squirrel.get_codes 

259 ~Squirrel.get_counts 

260 ~Squirrel.get_time_span 

261 ~Squirrel.get_deltat_span 

262 ~Squirrel.get_nfiles 

263 ~Squirrel.get_nnuts 

264 ~Squirrel.get_total_size 

265 ~Squirrel.get_stats 

266 ~Squirrel.get_content 

267 ~Squirrel.get_stations 

268 ~Squirrel.get_channels 

269 ~Squirrel.get_responses 

270 ~Squirrel.get_events 

271 ~Squirrel.get_waveform_nuts 

272 ~Squirrel.get_waveforms 

273 ~Squirrel.chopper_waveforms 

274 ~Squirrel.get_coverage 

275 ~Squirrel.pile 

276 ~Squirrel.snuffle 

277 ~Squirrel.glob_codes 

278 ~pyrocko.squirrel.selection.Selection.get_database 

279 ~Squirrel.print_tables 

280 ''' 

281 

282 def __init__( 

283 self, env=None, database=None, cache_path=None, persistent=None): 

284 

285 if not isinstance(env, environment.Environment): 

286 env = environment.get_environment(env) 

287 

288 if database is None: 

289 database = env.expand_path(env.database_path) 

290 

291 if cache_path is None: 

292 cache_path = env.expand_path(env.cache_path) 

293 

294 if persistent is None: 

295 persistent = env.persistent 

296 

297 Selection.__init__( 

298 self, database=database, persistent=persistent) 

299 

300 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

301 

302 self._content_caches = { 

303 'waveform': cache.ContentCache(), 

304 'default': cache.ContentCache()} 

305 

306 self._cache_path = cache_path 

307 

308 self._sources = [] 

309 self._operators = [] 

310 self._operator_registry = {} 

311 

312 self._pending_orders = [] 

313 

314 self._pile = None 

315 self._n_choppers_active = 0 

316 

317 self._names.update({ 

318 'nuts': self.name + '_nuts', 

319 'kind_codes_count': self.name + '_kind_codes_count', 

320 'coverage': self.name + '_coverage'}) 

321 

322 with self.transaction('create tables') as cursor: 

323 self._create_tables_squirrel(cursor) 

324 

325 def _create_tables_squirrel(self, cursor): 

326 

327 cursor.execute(self._register_table(self._sql( 

328 ''' 

329 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

330 nut_id integer PRIMARY KEY, 

331 file_id integer, 

332 file_segment integer, 

333 file_element integer, 

334 kind_id integer, 

335 kind_codes_id integer, 

336 tmin_seconds integer, 

337 tmin_offset integer, 

338 tmax_seconds integer, 

339 tmax_offset integer, 

340 kscale integer) 

341 '''))) 

342 

343 cursor.execute(self._register_table(self._sql( 

344 ''' 

345 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

346 kind_codes_id integer PRIMARY KEY, 

347 count integer) 

348 '''))) 

349 

350 cursor.execute(self._sql( 

351 ''' 

352 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

353 ON %(nuts)s (file_id, file_segment, file_element) 

354 ''')) 

355 

356 cursor.execute(self._sql( 

357 ''' 

358 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

359 ON %(nuts)s (file_id) 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

365 ON %(nuts)s (kind_id, tmin_seconds) 

366 ''')) 

367 

368 cursor.execute(self._sql( 

369 ''' 

370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

371 ON %(nuts)s (kind_id, tmax_seconds) 

372 ''')) 

373 

374 cursor.execute(self._sql( 

375 ''' 

376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

377 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

383 BEFORE DELETE ON main.files FOR EACH ROW 

384 BEGIN 

385 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

386 END 

387 ''')) 

388 

389 # trigger only on size to make silent update of mtime possible 

390 cursor.execute(self._sql( 

391 ''' 

392 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

393 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

394 BEGIN 

395 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

396 END 

397 ''')) 

398 

399 cursor.execute(self._sql( 

400 ''' 

401 CREATE TRIGGER IF NOT EXISTS 

402 %(db)s.%(file_states)s_delete_files 

403 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

404 BEGIN 

405 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

406 END 

407 ''')) 

408 

409 cursor.execute(self._sql( 

410 ''' 

411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

412 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

413 BEGIN 

414 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

415 (new.kind_codes_id, 0); 

416 UPDATE %(kind_codes_count)s 

417 SET count = count + 1 

418 WHERE new.kind_codes_id 

419 == %(kind_codes_count)s.kind_codes_id; 

420 END 

421 ''')) 

422 

423 cursor.execute(self._sql( 

424 ''' 

425 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

426 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

427 BEGIN 

428 UPDATE %(kind_codes_count)s 

429 SET count = count - 1 

430 WHERE old.kind_codes_id 

431 == %(kind_codes_count)s.kind_codes_id; 

432 END 

433 ''')) 

434 

435 cursor.execute(self._register_table(self._sql( 

436 ''' 

437 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

438 kind_codes_id integer, 

439 time_seconds integer, 

440 time_offset integer, 

441 step integer) 

442 '''))) 

443 

444 cursor.execute(self._sql( 

445 ''' 

446 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

447 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

448 ''')) 

449 

450 cursor.execute(self._sql( 

451 ''' 

452 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

453 AFTER INSERT ON %(nuts)s FOR EACH ROW 

454 BEGIN 

455 INSERT OR IGNORE INTO %(coverage)s VALUES 

456 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

457 ; 

458 UPDATE %(coverage)s 

459 SET step = step + 1 

460 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

461 AND new.tmin_seconds == %(coverage)s.time_seconds 

462 AND new.tmin_offset == %(coverage)s.time_offset 

463 ; 

464 INSERT OR IGNORE INTO %(coverage)s VALUES 

465 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

466 ; 

467 UPDATE %(coverage)s 

468 SET step = step - 1 

469 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

470 AND new.tmax_seconds == %(coverage)s.time_seconds 

471 AND new.tmax_offset == %(coverage)s.time_offset 

472 ; 

473 DELETE FROM %(coverage)s 

474 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

475 AND new.tmin_seconds == %(coverage)s.time_seconds 

476 AND new.tmin_offset == %(coverage)s.time_offset 

477 AND step == 0 

478 ; 

479 DELETE FROM %(coverage)s 

480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

481 AND new.tmax_seconds == %(coverage)s.time_seconds 

482 AND new.tmax_offset == %(coverage)s.time_offset 

483 AND step == 0 

484 ; 

485 END 

486 ''')) 

487 

488 cursor.execute(self._sql( 

489 ''' 

490 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

491 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

492 BEGIN 

493 INSERT OR IGNORE INTO %(coverage)s VALUES 

494 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

495 ; 

496 UPDATE %(coverage)s 

497 SET step = step - 1 

498 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

499 AND old.tmin_seconds == %(coverage)s.time_seconds 

500 AND old.tmin_offset == %(coverage)s.time_offset 

501 ; 

502 INSERT OR IGNORE INTO %(coverage)s VALUES 

503 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

504 ; 

505 UPDATE %(coverage)s 

506 SET step = step + 1 

507 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

508 AND old.tmax_seconds == %(coverage)s.time_seconds 

509 AND old.tmax_offset == %(coverage)s.time_offset 

510 ; 

511 DELETE FROM %(coverage)s 

512 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

513 AND old.tmin_seconds == %(coverage)s.time_seconds 

514 AND old.tmin_offset == %(coverage)s.time_offset 

515 AND step == 0 

516 ; 

517 DELETE FROM %(coverage)s 

518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

519 AND old.tmax_seconds == %(coverage)s.time_seconds 

520 AND old.tmax_offset == %(coverage)s.time_offset 

521 AND step == 0 

522 ; 

523 END 

524 ''')) 

525 

526 def _delete(self): 

527 '''Delete database tables associated with this Squirrel.''' 

528 

529 with self.transaction('delete tables') as cursor: 

530 for s in ''' 

531 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

532 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

533 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

534 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

535 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

536 DROP TABLE %(db)s.%(nuts)s; 

537 DROP TABLE %(db)s.%(kind_codes_count)s; 

538 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

539 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

540 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

541 '''.strip().splitlines(): 

542 

543 cursor.execute(self._sql(s)) 

544 

545 Selection._delete(self) 

546 

547 @filldocs 

548 def add(self, 

549 paths, 

550 kinds=None, 

551 format='detect', 

552 include=None, 

553 exclude=None, 

554 check=True): 

555 

556 ''' 

557 Add files to the selection. 

558 

559 :param paths: 

560 Iterator yielding paths to files or directories to be added to the 

561 selection. Recurses into directories. If given a ``str``, it 

562 is treated as a single path to be added. 

563 :type paths: 

564 :py:class:`list` of :py:class:`str` 

565 

566 :param kinds: 

567 Content types to be made available through the Squirrel selection. 

568 By default, all known content types are accepted. 

569 :type kinds: 

570 :py:class:`list` of :py:class:`str` 

571 

572 :param format: 

573 File format identifier or ``'detect'`` to enable auto-detection 

574 (available: %(file_formats)s). 

575 :type format: 

576 str 

577 

578 :param include: 

579 If not ``None``, files are only included if their paths match the 

580 given regular expression pattern. 

581 :type format: 

582 str 

583 

584 :param exclude: 

585 If not ``None``, files are only included if their paths do not 

586 match the given regular expression pattern. 

587 :type format: 

588 str 

589 

590 :param check: 

591 If ``True``, all file modification times are checked to see if 

592 cached information has to be updated (slow). If ``False``, only 

593 previously unknown files are indexed and cached information is used 

594 for known files, regardless of file state (fast, corrresponds to 

595 Squirrel's ``--optimistic`` mode). File deletions will go 

596 undetected in the latter case. 

597 :type check: 

598 bool 

599 

600 :Complexity: 

601 O(log N) 

602 ''' 

603 

604 if isinstance(kinds, str): 

605 kinds = (kinds,) 

606 

607 if isinstance(paths, str): 

608 paths = [paths] 

609 

610 kind_mask = model.to_kind_mask(kinds) 

611 

612 with progress.view(): 

613 Selection.add( 

614 self, util.iter_select_files( 

615 paths, 

616 show_progress=False, 

617 include=include, 

618 exclude=exclude, 

619 pass_through=lambda path: path.startswith('virtual:') 

620 ), kind_mask, format) 

621 

622 self._load(check) 

623 self._update_nuts() 

624 

625 def reload(self): 

626 ''' 

627 Check for modifications and reindex modified files. 

628 

629 Based on file modification times. 

630 ''' 

631 

632 self._set_file_states_force_check() 

633 self._load(check=True) 

634 self._update_nuts() 

635 

636 def add_virtual(self, nuts, virtual_paths=None): 

637 ''' 

638 Add content which is not backed by files. 

639 

640 :param nuts: 

641 Content pieces to be added. 

642 :type nuts: 

643 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

644 

645 :param virtual_paths: 

646 List of virtual paths to prevent creating a temporary list of the 

647 nuts while aggregating the file paths for the selection. 

648 :type virtual_paths: 

649 :py:class:`list` of :py:class:`str` 

650 

651 Stores to the main database and the selection. 

652 ''' 

653 

654 if isinstance(virtual_paths, str): 

655 virtual_paths = [virtual_paths] 

656 

657 if virtual_paths is None: 

658 if not isinstance(nuts, list): 

659 nuts = list(nuts) 

660 virtual_paths = set(nut.file_path for nut in nuts) 

661 

662 Selection.add(self, virtual_paths) 

663 self.get_database().dig(nuts) 

664 self._update_nuts() 

665 

666 def add_volatile(self, nuts): 

667 if not isinstance(nuts, list): 

668 nuts = list(nuts) 

669 

670 paths = list(set(nut.file_path for nut in nuts)) 

671 io.backends.virtual.add_nuts(nuts) 

672 self.add_virtual(nuts, paths) 

673 self._volatile_paths.extend(paths) 

674 

675 def add_volatile_waveforms(self, traces): 

676 ''' 

677 Add in-memory waveforms which will be removed when the app closes. 

678 ''' 

679 

680 name = model.random_name() 

681 

682 path = 'virtual:volatile:%s' % name 

683 

684 nuts = [] 

685 for itr, tr in enumerate(traces): 

686 assert tr.tmin <= tr.tmax 

687 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

688 tmax_seconds, tmax_offset = model.tsplit( 

689 tr.tmin + tr.data_len()*tr.deltat) 

690 

691 nuts.append(model.Nut( 

692 file_path=path, 

693 file_format='virtual', 

694 file_segment=itr, 

695 file_element=0, 

696 file_mtime=0, 

697 codes=tr.codes, 

698 tmin_seconds=tmin_seconds, 

699 tmin_offset=tmin_offset, 

700 tmax_seconds=tmax_seconds, 

701 tmax_offset=tmax_offset, 

702 deltat=tr.deltat, 

703 kind_id=to_kind_id('waveform'), 

704 content=tr)) 

705 

706 self.add_volatile(nuts) 

707 return path 

708 

709 def _load(self, check): 

710 for _ in io.iload( 

711 self, 

712 content=[], 

713 skip_unchanged=True, 

714 check=check): 

715 pass 

716 

717 def _update_nuts(self, transaction=None): 

718 transaction = transaction or self.transaction('update nuts') 

719 with make_task('Aggregating selection') as task, \ 

720 transaction as cursor: 

721 

722 self._conn.set_progress_handler(task.update, 100000) 

723 nrows = cursor.execute(self._sql( 

724 ''' 

725 INSERT INTO %(db)s.%(nuts)s 

726 SELECT NULL, 

727 nuts.file_id, nuts.file_segment, nuts.file_element, 

728 nuts.kind_id, nuts.kind_codes_id, 

729 nuts.tmin_seconds, nuts.tmin_offset, 

730 nuts.tmax_seconds, nuts.tmax_offset, 

731 nuts.kscale 

732 FROM %(db)s.%(file_states)s 

733 INNER JOIN nuts 

734 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

735 INNER JOIN kind_codes 

736 ON nuts.kind_codes_id == 

737 kind_codes.kind_codes_id 

738 WHERE %(db)s.%(file_states)s.file_state != 2 

739 AND (((1 << kind_codes.kind_id) 

740 & %(db)s.%(file_states)s.kind_mask) != 0) 

741 ''')).rowcount 

742 

743 task.update(nrows) 

744 self._set_file_states_known(transaction) 

745 self._conn.set_progress_handler(None, 0) 

746 

747 def add_source(self, source, check=True): 

748 ''' 

749 Add remote resource. 

750 

751 :param source: 

752 Remote data access client instance. 

753 :type source: 

754 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

755 ''' 

756 

757 self._sources.append(source) 

758 source.setup(self, check=check) 

759 

760 def add_fdsn(self, *args, **kwargs): 

761 ''' 

762 Add FDSN site for transparent remote data access. 

763 

764 Arguments are passed to 

765 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

766 ''' 

767 

768 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

769 

770 def add_catalog(self, *args, **kwargs): 

771 ''' 

772 Add online catalog for transparent event data access. 

773 

774 Arguments are passed to 

775 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

776 ''' 

777 

778 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

779 

780 def add_dataset(self, ds, check=True): 

781 ''' 

782 Read dataset description from file and add its contents. 

783 

784 :param ds: 

785 Path to dataset description file or dataset description object 

786 . See :py:mod:`~pyrocko.squirrel.dataset`. 

787 :type ds: 

788 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

789 

790 :param check: 

791 If ``True``, all file modification times are checked to see if 

792 cached information has to be updated (slow). If ``False``, only 

793 previously unknown files are indexed and cached information is used 

794 for known files, regardless of file state (fast, corrresponds to 

795 Squirrel's ``--optimistic`` mode). File deletions will go 

796 undetected in the latter case. 

797 :type check: 

798 bool 

799 ''' 

800 if isinstance(ds, str): 

801 ds = dataset.read_dataset(ds) 

802 

803 ds.setup(self, check=check) 

804 

805 def _get_selection_args( 

806 self, kind_id, 

807 obj=None, tmin=None, tmax=None, time=None, codes=None): 

808 

809 if codes is not None: 

810 codes = codes_patterns_for_kind(kind_id, codes) 

811 

812 if time is not None: 

813 tmin = time 

814 tmax = time 

815 

816 if obj is not None: 

817 tmin = tmin if tmin is not None else obj.tmin 

818 tmax = tmax if tmax is not None else obj.tmax 

819 codes = codes if codes is not None else codes_patterns_for_kind( 

820 kind_id, obj.codes) 

821 

822 return tmin, tmax, codes 

823 

824 def _get_selection_args_str(self, *args, **kwargs): 

825 

826 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

827 return 'tmin: %s, tmax: %s, codes: %s' % ( 

828 util.time_to_str(tmin) if tmin is not None else 'none', 

829 util.time_to_str(tmax) if tmax is not None else 'none', 

830 ','.join(str(entry) for entry in codes)) 

831 

832 def _selection_args_to_kwargs( 

833 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

834 

835 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

836 

837 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

838 

839 tmin_seconds, tmin_offset = model.tsplit(tmin) 

840 tmax_seconds, tmax_offset = model.tsplit(tmax) 

841 if naiv: 

842 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

843 args.append(tmax_seconds) 

844 else: 

845 tscale_edges = model.tscale_edges 

846 tmin_cond = [] 

847 for kscale in range(tscale_edges.size + 1): 

848 if kscale != tscale_edges.size: 

849 tscale = int(tscale_edges[kscale]) 

850 tmin_cond.append(''' 

851 (%(db)s.%(nuts)s.kind_id = ? 

852 AND %(db)s.%(nuts)s.kscale == ? 

853 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

854 ''') 

855 args.extend( 

856 (to_kind_id(kind), kscale, 

857 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

858 

859 else: 

860 tmin_cond.append(''' 

861 (%(db)s.%(nuts)s.kind_id == ? 

862 AND %(db)s.%(nuts)s.kscale == ? 

863 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

864 ''') 

865 

866 args.extend( 

867 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

868 if tmin_cond: 

869 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

870 

871 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

872 args.append(tmin_seconds) 

873 

874 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

875 pats = codes_patterns_for_kind(kind_id, codes) 

876 if pats is None: 

877 return 

878 

879 pats_exact = [] 

880 pats_nonexact = [] 

881 for pat in pats: 

882 spat = pat.safe_str 

883 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

884 

885 codes_cond = [] 

886 if pats_exact: 

887 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

888 '?'*len(pats_exact))) 

889 

890 args.extend(pats_exact) 

891 

892 if pats_nonexact: 

893 codes_cond.append(' ( %s ) ' % ' OR '.join( 

894 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

895 

896 args.extend(pats_nonexact) 

897 

898 if codes_cond: 

899 cond.append('%s ( %s )' % ( 

900 'NOT' if not positive else '', 

901 ' OR '.join(codes_cond))) 

902 

903 def iter_nuts( 

904 self, kind=None, tmin=None, tmax=None, codes=None, 

905 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

906 naiv=False, kind_codes_ids=None, path=None, limit=None): 

907 

908 ''' 

909 Iterate over content entities matching given constraints. 

910 

911 :param kind: 

912 Content kind (or kinds) to extract. 

913 :type kind: 

914 :py:class:`str`, :py:class:`list` of :py:class:`str` 

915 

916 :param tmin: 

917 Start time of query interval. 

918 :type tmin: 

919 timestamp 

920 

921 :param tmax: 

922 End time of query interval. 

923 :type tmax: 

924 timestamp 

925 

926 :param codes: 

927 List of code patterns to query. 

928 :type codes: 

929 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

930 objects appropriate for the queried content type, or anything which 

931 can be converted to such objects. 

932 

933 :param naiv: 

934 Bypass time span lookup through indices (slow, for testing). 

935 :type naiv: 

936 :py:class:`bool` 

937 

938 :param kind_codes_ids: 

939 Kind-codes IDs of contents to be retrieved (internal use). 

940 :type kind_codes_ids: 

941 :py:class:`list` of :py:class:`int` 

942 

943 :yields: 

944 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

945 intersecting content. 

946 

947 :complexity: 

948 O(log N) for the time selection part due to heavy use of database 

949 indices. 

950 

951 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

952 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

953 closed-interval so that content intersecting with the time instant ``t 

954 = tmin = tmax`` is returned (otherwise nothing would be returned as 

955 ``[t, t)`` never matches anything). 

956 

957 Time spans of content entities to be matched are also treated as half 

958 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

959 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

960 modified to closed-interval when the content time span is an empty 

961 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

962 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

963 ''' 

964 

965 if not isinstance(kind, str): 

966 if kind is None: 

967 kind = model.g_content_kinds 

968 for kind_ in kind: 

969 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

970 yield nut 

971 

972 return 

973 

974 kind_id = to_kind_id(kind) 

975 

976 cond = [] 

977 args = [] 

978 if tmin is not None or tmax is not None: 

979 assert kind is not None 

980 if tmin is None: 

981 tmin = self.get_time_span()[0] 

982 if tmax is None: 

983 tmax = self.get_time_span()[1] + 1.0 

984 

985 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

986 

987 cond.append('kind_codes.kind_id == ?') 

988 args.append(kind_id) 

989 

990 if codes is not None: 

991 self._codes_match_sql(True, kind_id, codes, cond, args) 

992 

993 if codes_exclude is not None: 

994 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

995 

996 if sample_rate_min is not None: 

997 cond.append('kind_codes.deltat <= ?') 

998 args.append(1.0/sample_rate_min) 

999 

1000 if sample_rate_max is not None: 

1001 cond.append('? <= kind_codes.deltat') 

1002 args.append(1.0/sample_rate_max) 

1003 

1004 if kind_codes_ids is not None: 

1005 cond.append( 

1006 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1007 '?'*len(kind_codes_ids))) 

1008 

1009 args.extend(kind_codes_ids) 

1010 

1011 db = self.get_database() 

1012 if path is not None: 

1013 cond.append('files.path == ?') 

1014 args.append(db.relpath(abspath(path))) 

1015 

1016 sql = (''' 

1017 SELECT 

1018 files.path, 

1019 files.format, 

1020 files.mtime, 

1021 files.size, 

1022 %(db)s.%(nuts)s.file_segment, 

1023 %(db)s.%(nuts)s.file_element, 

1024 kind_codes.kind_id, 

1025 kind_codes.codes, 

1026 %(db)s.%(nuts)s.tmin_seconds, 

1027 %(db)s.%(nuts)s.tmin_offset, 

1028 %(db)s.%(nuts)s.tmax_seconds, 

1029 %(db)s.%(nuts)s.tmax_offset, 

1030 kind_codes.deltat 

1031 FROM files 

1032 INNER JOIN %(db)s.%(nuts)s 

1033 ON files.file_id == %(db)s.%(nuts)s.file_id 

1034 INNER JOIN kind_codes 

1035 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1036 ''') 

1037 

1038 if cond: 

1039 sql += ''' WHERE ''' + ' AND '.join(cond) 

1040 

1041 if limit is not None: 

1042 sql += ''' LIMIT %i''' % limit 

1043 

1044 sql = self._sql(sql) 

1045 if tmin is None and tmax is None: 

1046 for row in self._conn.execute(sql, args): 

1047 row = (db.abspath(row[0]),) + row[1:] 

1048 nut = model.Nut(values_nocheck=row) 

1049 yield nut 

1050 else: 

1051 assert tmin is not None and tmax is not None 

1052 if tmin == tmax: 

1053 for row in self._conn.execute(sql, args): 

1054 row = (db.abspath(row[0]),) + row[1:] 

1055 nut = model.Nut(values_nocheck=row) 

1056 if (nut.tmin <= tmin < nut.tmax) \ 

1057 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1058 

1059 yield nut 

1060 else: 

1061 for row in self._conn.execute(sql, args): 

1062 row = (db.abspath(row[0]),) + row[1:] 

1063 nut = model.Nut(values_nocheck=row) 

1064 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1065 or (nut.tmin == nut.tmax 

1066 and tmin <= nut.tmin < tmax): 

1067 

1068 yield nut 

1069 

1070 def get_nuts(self, *args, **kwargs): 

1071 ''' 

1072 Get content entities matching given constraints. 

1073 

1074 Like :py:meth:`iter_nuts` but returns results as a list. 

1075 ''' 

1076 

1077 return list(self.iter_nuts(*args, **kwargs)) 

1078 

1079 def _split_nuts( 

1080 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1081 

1082 kind_id = to_kind_id(kind) 

1083 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1084 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1085 

1086 names_main_nuts = dict(self._names) 

1087 names_main_nuts.update(db='main', nuts='nuts') 

1088 

1089 db = self.get_database() 

1090 

1091 def main_nuts(s): 

1092 return s % names_main_nuts 

1093 

1094 with self.transaction('split nuts') as cursor: 

1095 # modify selection and main 

1096 for sql_subst in [ 

1097 self._sql, main_nuts]: 

1098 

1099 cond = [] 

1100 args = [] 

1101 

1102 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1103 

1104 if codes is not None: 

1105 self._codes_match_sql(True, kind_id, codes, cond, args) 

1106 

1107 if path is not None: 

1108 cond.append('files.path == ?') 

1109 args.append(db.relpath(abspath(path))) 

1110 

1111 sql = sql_subst(''' 

1112 SELECT 

1113 %(db)s.%(nuts)s.nut_id, 

1114 %(db)s.%(nuts)s.tmin_seconds, 

1115 %(db)s.%(nuts)s.tmin_offset, 

1116 %(db)s.%(nuts)s.tmax_seconds, 

1117 %(db)s.%(nuts)s.tmax_offset, 

1118 kind_codes.deltat 

1119 FROM files 

1120 INNER JOIN %(db)s.%(nuts)s 

1121 ON files.file_id == %(db)s.%(nuts)s.file_id 

1122 INNER JOIN kind_codes 

1123 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1124 WHERE ''' + ' AND '.join(cond)) # noqa 

1125 

1126 insert = [] 

1127 delete = [] 

1128 for row in cursor.execute(sql, args): 

1129 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1130 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1131 

1132 nut_tmin = model.tjoin( 

1133 nut_tmin_seconds, nut_tmin_offset) 

1134 nut_tmax = model.tjoin( 

1135 nut_tmax_seconds, nut_tmax_offset) 

1136 

1137 if nut_tmin < tmax and tmin < nut_tmax: 

1138 if nut_tmin < tmin: 

1139 insert.append(( 

1140 nut_tmin_seconds, nut_tmin_offset, 

1141 tmin_seconds, tmin_offset, 

1142 model.tscale_to_kscale( 

1143 tmin_seconds - nut_tmin_seconds), 

1144 nut_id)) 

1145 

1146 if tmax < nut_tmax: 

1147 insert.append(( 

1148 tmax_seconds, tmax_offset, 

1149 nut_tmax_seconds, nut_tmax_offset, 

1150 model.tscale_to_kscale( 

1151 nut_tmax_seconds - tmax_seconds), 

1152 nut_id)) 

1153 

1154 delete.append((nut_id,)) 

1155 

1156 sql_add = ''' 

1157 INSERT INTO %(db)s.%(nuts)s ( 

1158 file_id, file_segment, file_element, kind_id, 

1159 kind_codes_id, tmin_seconds, tmin_offset, 

1160 tmax_seconds, tmax_offset, kscale ) 

1161 SELECT 

1162 file_id, file_segment, file_element, 

1163 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1164 FROM %(db)s.%(nuts)s 

1165 WHERE nut_id == ? 

1166 ''' 

1167 cursor.executemany(sql_subst(sql_add), insert) 

1168 

1169 sql_delete = ''' 

1170 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1171 ''' 

1172 cursor.executemany(sql_subst(sql_delete), delete) 

1173 

1174 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1175 ''' 

1176 Get time interval over all content in selection. 

1177 

1178 :param kinds: 

1179 If not ``None``, restrict query to given content kinds. 

1180 :type kind: 

1181 list of str 

1182 

1183 :complexity: 

1184 O(1), independent of the number of nuts. 

1185 

1186 :returns: 

1187 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1188 ''' 

1189 

1190 sql_min = self._sql(''' 

1191 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1192 FROM %(db)s.%(nuts)s 

1193 WHERE kind_id == ? 

1194 AND tmin_seconds == ( 

1195 SELECT MIN(tmin_seconds) 

1196 FROM %(db)s.%(nuts)s 

1197 WHERE kind_id == ?) 

1198 ''') 

1199 

1200 sql_max = self._sql(''' 

1201 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1202 FROM %(db)s.%(nuts)s 

1203 WHERE kind_id == ? 

1204 AND tmax_seconds == ( 

1205 SELECT MAX(tmax_seconds) 

1206 FROM %(db)s.%(nuts)s 

1207 WHERE kind_id == ?) 

1208 ''') 

1209 

1210 gtmin = None 

1211 gtmax = None 

1212 

1213 if isinstance(kinds, str): 

1214 kinds = [kinds] 

1215 

1216 if kinds is None: 

1217 kind_ids = model.g_content_kind_ids 

1218 else: 

1219 kind_ids = model.to_kind_ids(kinds) 

1220 

1221 tmins = [] 

1222 tmaxs = [] 

1223 for kind_id in kind_ids: 

1224 for tmin_seconds, tmin_offset in self._conn.execute( 

1225 sql_min, (kind_id, kind_id)): 

1226 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1227 

1228 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1229 sql_max, (kind_id, kind_id)): 

1230 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1231 

1232 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1233 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1234 

1235 if tight: 

1236 gtmin = nonef(min, tmins) 

1237 gtmax = nonef(max, tmaxs) 

1238 else: 

1239 gtmin = None if None in tmins else nonef(min, tmins) 

1240 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1241 

1242 if dummy_limits: 

1243 if gtmin is None: 

1244 gtmin = model.g_tmin 

1245 if gtmax is None: 

1246 gtmax = model.g_tmax 

1247 

1248 return gtmin, gtmax 

1249 

1250 def has(self, kinds): 

1251 ''' 

1252 Check availability of given content kinds. 

1253 

1254 :param kinds: 

1255 Content kinds to query. 

1256 :type kind: 

1257 list of str 

1258 

1259 :returns: 

1260 ``True`` if any of the queried content kinds is available 

1261 in the selection. 

1262 ''' 

1263 self_tmin, self_tmax = self.get_time_span( 

1264 kinds, dummy_limits=False) 

1265 

1266 return None not in (self_tmin, self_tmax) 

1267 

1268 def get_deltat_span(self, kind): 

1269 ''' 

1270 Get min and max sampling interval of all content of given kind. 

1271 

1272 :param kind: 

1273 Content kind 

1274 :type kind: 

1275 str 

1276 

1277 :returns: ``(deltat_min, deltat_max)`` 

1278 ''' 

1279 

1280 deltats = [ 

1281 deltat for deltat in self.get_deltats(kind) 

1282 if deltat is not None] 

1283 

1284 if deltats: 

1285 return min(deltats), max(deltats) 

1286 else: 

1287 return None, None 

1288 

1289 def iter_kinds(self, codes=None): 

1290 ''' 

1291 Iterate over content types available in selection. 

1292 

1293 :param codes: 

1294 If given, get kinds only for selected codes identifier. 

1295 Only a single identifier may be given here and no pattern matching 

1296 is done, currently. 

1297 :type codes: 

1298 :py:class:`~pyrocko.squirrel.model.Codes` 

1299 

1300 :yields: 

1301 Available content kinds as :py:class:`str`. 

1302 

1303 :complexity: 

1304 O(1), independent of number of nuts. 

1305 ''' 

1306 

1307 return self._database._iter_kinds( 

1308 codes=codes, 

1309 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1310 

1311 def iter_deltats(self, kind=None): 

1312 ''' 

1313 Iterate over sampling intervals available in selection. 

1314 

1315 :param kind: 

1316 If given, get sampling intervals only for a given content type. 

1317 :type kind: 

1318 str 

1319 

1320 :yields: 

1321 :py:class:`float` values. 

1322 

1323 :complexity: 

1324 O(1), independent of number of nuts. 

1325 ''' 

1326 return self._database._iter_deltats( 

1327 kind=kind, 

1328 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1329 

1330 def iter_codes(self, kind=None): 

1331 ''' 

1332 Iterate over content identifier code sequences available in selection. 

1333 

1334 :param kind: 

1335 If given, get codes only for a given content type. 

1336 :type kind: 

1337 str 

1338 

1339 :yields: 

1340 :py:class:`tuple` of :py:class:`str` 

1341 

1342 :complexity: 

1343 O(1), independent of number of nuts. 

1344 ''' 

1345 return self._database._iter_codes( 

1346 kind=kind, 

1347 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1348 

1349 def _iter_codes_info(self, kind=None, codes=None): 

1350 ''' 

1351 Iterate over number of occurrences of any (kind, codes) combination. 

1352 

1353 :param kind: 

1354 If given, get counts only for selected content type. 

1355 :type kind: 

1356 str 

1357 

1358 :yields: 

1359 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1360 

1361 :complexity: 

1362 O(1), independent of number of nuts. 

1363 ''' 

1364 return self._database._iter_codes_info( 

1365 kind=kind, 

1366 codes=codes, 

1367 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1368 

1369 def get_kinds(self, codes=None): 

1370 ''' 

1371 Get content types available in selection. 

1372 

1373 :param codes: 

1374 If given, get kinds only for selected codes identifier. 

1375 Only a single identifier may be given here and no pattern matching 

1376 is done, currently. 

1377 :type codes: 

1378 :py:class:`~pyrocko.squirrel.model.Codes` 

1379 

1380 :returns: 

1381 Sorted list of available content types. 

1382 :rtype: 

1383 py:class:`list` of :py:class:`str` 

1384 

1385 :complexity: 

1386 O(1), independent of number of nuts. 

1387 

1388 ''' 

1389 return sorted(list(self.iter_kinds(codes=codes))) 

1390 

1391 def get_deltats(self, kind=None): 

1392 ''' 

1393 Get sampling intervals available in selection. 

1394 

1395 :param kind: 

1396 If given, get sampling intervals only for selected content type. 

1397 :type kind: 

1398 str 

1399 

1400 :complexity: 

1401 O(1), independent of number of nuts. 

1402 

1403 :returns: Sorted list of available sampling intervals. 

1404 ''' 

1405 return sorted(list(self.iter_deltats(kind=kind))) 

1406 

1407 def get_codes(self, kind=None): 

1408 ''' 

1409 Get identifier code sequences available in selection. 

1410 

1411 :param kind: 

1412 If given, get codes only for selected content type. 

1413 :type kind: 

1414 str 

1415 

1416 :complexity: 

1417 O(1), independent of number of nuts. 

1418 

1419 :returns: Sorted list of available codes as tuples of strings. 

1420 ''' 

1421 return sorted(list(self.iter_codes(kind=kind))) 

1422 

1423 def get_counts(self, kind=None): 

1424 ''' 

1425 Get number of occurrences of any (kind, codes) combination. 

1426 

1427 :param kind: 

1428 If given, get codes only for selected content type. 

1429 :type kind: 

1430 str 

1431 

1432 :complexity: 

1433 O(1), independent of number of nuts. 

1434 

1435 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1436 if kind is not ``None`` 

1437 ''' 

1438 d = {} 

1439 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1440 if kind_id not in d: 

1441 v = d[kind_id] = {} 

1442 else: 

1443 v = d[kind_id] 

1444 

1445 if codes not in v: 

1446 v[codes] = 0 

1447 

1448 v[codes] += count 

1449 

1450 if kind is not None: 

1451 return d[to_kind_id(kind)] 

1452 else: 

1453 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1454 

1455 def glob_codes(self, kind, codes): 

1456 ''' 

1457 Find codes matching given patterns. 

1458 

1459 :param kind: 

1460 Content kind to be queried. 

1461 :type kind: 

1462 str 

1463 

1464 :param codes: 

1465 List of code patterns to query. 

1466 :type codes: 

1467 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1468 objects appropriate for the queried content type, or anything which 

1469 can be converted to such objects. 

1470 

1471 :returns: 

1472 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1473 ''' 

1474 

1475 kind_id = to_kind_id(kind) 

1476 args = [kind_id] 

1477 pats = codes_patterns_for_kind(kind_id, codes) 

1478 

1479 if pats: 

1480 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1481 ('kind_codes.codes GLOB ?',) * len(pats)) 

1482 

1483 args.extend(pat.safe_str for pat in pats) 

1484 else: 

1485 codes_cond = '' 

1486 

1487 sql = self._sql(''' 

1488 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1489 WHERE 

1490 kind_id == ? ''' + codes_cond) 

1491 

1492 return list(map(list, self._conn.execute(sql, args))) 

1493 

1494 def update(self, constraint=None, **kwargs): 

1495 ''' 

1496 Update or partially update channel and event inventories. 

1497 

1498 :param constraint: 

1499 Selection of times or areas to be brought up to date. 

1500 :type constraint: 

1501 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1502 

1503 :param \\*\\*kwargs: 

1504 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1505 

1506 This function triggers all attached remote sources, to check for 

1507 updates in the meta-data. The sources will only submit queries when 

1508 their expiration date has passed, or if the selection spans into 

1509 previously unseen times or areas. 

1510 ''' 

1511 

1512 if constraint is None: 

1513 constraint = client.Constraint(**kwargs) 

1514 

1515 for source in self._sources: 

1516 source.update_channel_inventory(self, constraint) 

1517 source.update_event_inventory(self, constraint) 

1518 

1519 def update_waveform_promises(self, constraint=None, **kwargs): 

1520 ''' 

1521 Permit downloading of remote waveforms. 

1522 

1523 :param constraint: 

1524 Remote waveforms compatible with the given constraint are enabled 

1525 for download. 

1526 :type constraint: 

1527 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1528 

1529 :param \\*\\*kwargs: 

1530 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1531 

1532 Calling this method permits Squirrel to download waveforms from remote 

1533 sources when processing subsequent waveform requests. This works by 

1534 inserting so called waveform promises into the database. It will look 

1535 into the available channels for each remote source and create a promise 

1536 for each channel compatible with the given constraint. If the promise 

1537 then matches in a waveform request, Squirrel tries to download the 

1538 waveform. If the download is successful, the downloaded waveform is 

1539 added to the Squirrel and the promise is deleted. If the download 

1540 fails, the promise is kept if the reason of failure looks like being 

1541 temporary, e.g. because of a network failure. If the cause of failure 

1542 however seems to be permanent, the promise is deleted so that no 

1543 further attempts are made to download a waveform which might not be 

1544 available from that server at all. To force re-scheduling after a 

1545 permanent failure, call :py:meth:`update_waveform_promises` 

1546 yet another time. 

1547 ''' 

1548 

1549 if constraint is None: 

1550 constraint = client.Constraint(**kwargs) 

1551 

1552 for source in self._sources: 

1553 source.update_waveform_promises(self, constraint) 

1554 

1555 def remove_waveform_promises(self, from_database='selection'): 

1556 ''' 

1557 Remove waveform promises from live selection or global database. 

1558 

1559 Calling this function removes all waveform promises provided by the 

1560 attached sources. 

1561 

1562 :param from_database: 

1563 Remove from live selection ``'selection'`` or global database 

1564 ``'global'``. 

1565 ''' 

1566 for source in self._sources: 

1567 source.remove_waveform_promises(self, from_database=from_database) 

1568 

1569 def update_responses(self, constraint=None, **kwargs): 

1570 if constraint is None: 

1571 constraint = client.Constraint(**kwargs) 

1572 

1573 for source in self._sources: 

1574 source.update_response_inventory(self, constraint) 

1575 

1576 def get_nfiles(self): 

1577 ''' 

1578 Get number of files in selection. 

1579 ''' 

1580 

1581 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1582 for row in self._conn.execute(sql): 

1583 return row[0] 

1584 

1585 def get_nnuts(self): 

1586 ''' 

1587 Get number of nuts in selection. 

1588 ''' 

1589 

1590 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1591 for row in self._conn.execute(sql): 

1592 return row[0] 

1593 

1594 def get_total_size(self): 

1595 ''' 

1596 Get aggregated file size available in selection. 

1597 ''' 

1598 

1599 sql = self._sql(''' 

1600 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1601 INNER JOIN files 

1602 ON %(db)s.%(file_states)s.file_id = files.file_id 

1603 ''') 

1604 

1605 for row in self._conn.execute(sql): 

1606 return row[0] or 0 

1607 

1608 def get_stats(self): 

1609 ''' 

1610 Get statistics on contents available through this selection. 

1611 ''' 

1612 

1613 kinds = self.get_kinds() 

1614 time_spans = {} 

1615 for kind in kinds: 

1616 time_spans[kind] = self.get_time_span([kind]) 

1617 

1618 return SquirrelStats( 

1619 nfiles=self.get_nfiles(), 

1620 nnuts=self.get_nnuts(), 

1621 kinds=kinds, 

1622 codes=self.get_codes(), 

1623 total_size=self.get_total_size(), 

1624 counts=self.get_counts(), 

1625 time_spans=time_spans, 

1626 sources=[s.describe() for s in self._sources], 

1627 operators=[op.describe() for op in self._operators]) 

1628 

1629 @filldocs 

1630 def check( 

1631 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1632 ignore=[]): 

1633 ''' 

1634 Check for common data/metadata problems. 

1635 

1636 %(query_args)s 

1637 

1638 :param ignore: 

1639 Problem types to be ignored. 

1640 :type ignore: 

1641 :class:`list` of :class:`str` 

1642 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1643 

1644 :returns: 

1645 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1646 containing the results of the check. 

1647 

1648 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1649 ''' 

1650 

1651 from .check import do_check 

1652 tmin, tmax, codes = self._get_selection_args( 

1653 CHANNEL, obj, tmin, tmax, time, codes) 

1654 

1655 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1656 

1657 def get_content( 

1658 self, 

1659 nut, 

1660 cache_id='default', 

1661 accessor_id='default', 

1662 show_progress=False, 

1663 model='squirrel'): 

1664 

1665 ''' 

1666 Get and possibly load full content for a given index entry from file. 

1667 

1668 Loads the actual content objects (channel, station, waveform, ...) from 

1669 file. For efficiency, sibling content (all stuff in the same file 

1670 segment) will also be loaded as a side effect. The loaded contents are 

1671 cached in the Squirrel object. 

1672 ''' 

1673 

1674 content_cache = self._content_caches[cache_id] 

1675 if not content_cache.has(nut): 

1676 

1677 for nut_loaded in io.iload( 

1678 nut.file_path, 

1679 segment=nut.file_segment, 

1680 format=nut.file_format, 

1681 database=self._database, 

1682 update_selection=self, 

1683 show_progress=show_progress): 

1684 

1685 content_cache.put(nut_loaded) 

1686 

1687 try: 

1688 return content_cache.get(nut, accessor_id, model) 

1689 

1690 except KeyError: 

1691 raise error.NotAvailable( 

1692 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1693 

1694 def advance_accessor(self, accessor_id='default', cache_id=None): 

1695 ''' 

1696 Notify memory caches about consumer moving to a new data batch. 

1697 

1698 :param accessor_id: 

1699 Name of accessing consumer to be advanced. 

1700 :type accessor_id: 

1701 str 

1702 

1703 :param cache_id: 

1704 Name of cache to for which the accessor should be advanced. By 

1705 default the named accessor is advanced in all registered caches. 

1706 By default, two caches named ``'default'`` and ``'waveform'`` are 

1707 available. 

1708 :type cache_id: 

1709 str 

1710 

1711 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1712 Squirrel's memory caching works and can be tuned. Default behaviour is 

1713 to release data when it has not been used in the latest data 

1714 window/batch. If the accessor is never advanced, data is cached 

1715 indefinitely - which is often desired e.g. for station meta-data. 

1716 Methods for consecutive data traversal, like 

1717 :py:meth:`chopper_waveforms` automatically advance and clear 

1718 their accessor. 

1719 ''' 

1720 for cache_ in ( 

1721 self._content_caches.keys() 

1722 if cache_id is None 

1723 else [cache_id]): 

1724 

1725 self._content_caches[cache_].advance_accessor(accessor_id) 

1726 

1727 def clear_accessor(self, accessor_id, cache_id=None): 

1728 ''' 

1729 Notify memory caches about a consumer having finished. 

1730 

1731 :param accessor_id: 

1732 Name of accessor to be cleared. 

1733 :type accessor_id: 

1734 str 

1735 

1736 :param cache_id: 

1737 Name of cache for which the accessor should be cleared. By default 

1738 the named accessor is cleared from all registered caches. By 

1739 default, two caches named ``'default'`` and ``'waveform'`` are 

1740 available. 

1741 :type cache_id: 

1742 str 

1743 

1744 Calling this method clears all references to cache entries held by the 

1745 named accessor. Cache entries are then freed if not referenced by any 

1746 other accessor. 

1747 ''' 

1748 

1749 for cache_ in ( 

1750 self._content_caches.keys() 

1751 if cache_id is None 

1752 else [cache_id]): 

1753 

1754 self._content_caches[cache_].clear_accessor(accessor_id) 

1755 

1756 def get_cache_stats(self, cache_id): 

1757 return self._content_caches[cache_id].get_stats() 

1758 

1759 @filldocs 

1760 def get_stations( 

1761 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1762 model='squirrel'): 

1763 

1764 ''' 

1765 Get stations matching given constraints. 

1766 

1767 %(query_args)s 

1768 

1769 :param model: 

1770 Select object model for returned values: ``'squirrel'`` to get 

1771 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1772 objects with channel information attached. 

1773 :type model: 

1774 str 

1775 

1776 :returns: 

1777 List of :py:class:`pyrocko.squirrel.Station 

1778 <pyrocko.squirrel.model.Station>` objects by default or list of 

1779 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1780 objects if ``model='pyrocko'`` is requested. 

1781 

1782 See :py:meth:`iter_nuts` for details on time span matching. 

1783 ''' 

1784 

1785 if model == 'pyrocko': 

1786 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1787 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1788 args = self._get_selection_args( 

1789 STATION, obj, tmin, tmax, time, codes) 

1790 

1791 nuts = sorted( 

1792 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1793 

1794 return [self.get_content(nut, model=model) for nut in nuts] 

1795 else: 

1796 raise ValueError('Invalid station model: %s' % model) 

1797 

1798 @filldocs 

1799 def get_channels( 

1800 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1801 model='squirrel'): 

1802 

1803 ''' 

1804 Get channels matching given constraints. 

1805 

1806 %(query_args)s 

1807 

1808 :returns: 

1809 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1810 

1811 See :py:meth:`iter_nuts` for details on time span matching. 

1812 ''' 

1813 

1814 args = self._get_selection_args( 

1815 CHANNEL, obj, tmin, tmax, time, codes) 

1816 

1817 nuts = sorted( 

1818 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1819 

1820 return [self.get_content(nut, model=model) for nut in nuts] 

1821 

1822 @filldocs 

1823 def get_sensors( 

1824 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1825 

1826 ''' 

1827 Get sensors matching given constraints. 

1828 

1829 %(query_args)s 

1830 

1831 :returns: 

1832 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1833 

1834 See :py:meth:`iter_nuts` for details on time span matching. 

1835 ''' 

1836 

1837 tmin, tmax, codes = self._get_selection_args( 

1838 CHANNEL, obj, tmin, tmax, time, codes) 

1839 

1840 if codes is not None: 

1841 codes = codes_patterns_list( 

1842 (entry.replace(channel=entry.channel[:-1] + '?') 

1843 if entry.channel != '*' else entry) 

1844 for entry in codes) 

1845 

1846 nuts = sorted( 

1847 self.iter_nuts( 

1848 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1849 

1850 return [ 

1851 sensor for sensor in model.Sensor.from_channels( 

1852 self.get_content(nut) for nut in nuts) 

1853 if match_time_span(tmin, tmax, sensor)] 

1854 

1855 @filldocs 

1856 def get_responses( 

1857 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1858 model='squirrel'): 

1859 

1860 ''' 

1861 Get instrument responses matching given constraints. 

1862 

1863 %(query_args)s 

1864 

1865 :param model: 

1866 Select data model for returned objects. Choices: ``'squirrel'``, 

1867 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1868 :type model: 

1869 str 

1870 

1871 :returns: 

1872 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1873 'squirrel'`` or list of :py:class:`~pyrocko.io.fdsn.FDSNStationXML` 

1874 if ``model == 'stationxml'`` or list of 

1875 (:py:class:`~pyrocko.squirrel.model.Response`, 

1876 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1877 'stationxml+'``. 

1878 

1879 See :py:meth:`iter_nuts` for details on time span matching. 

1880 ''' 

1881 

1882 args = self._get_selection_args( 

1883 RESPONSE, obj, tmin, tmax, time, codes) 

1884 

1885 nuts = sorted( 

1886 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1887 

1888 return [self.get_content(nut, model=model) for nut in nuts] 

1889 

1890 @filldocs 

1891 def get_response( 

1892 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1893 model='squirrel', on_duplicate='raise'): 

1894 

1895 ''' 

1896 Get instrument response matching given constraints. 

1897 

1898 %(query_args)s 

1899 

1900 :param model: 

1901 Select data model for returned object. Choices: ``'squirrel'``, 

1902 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1903 :type model: 

1904 str 

1905 

1906 :param on_duplicate: 

1907 Determines how duplicates/multiple matching responses are handled. 

1908 Choices: ``'raise'`` - raise 

1909 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1910 warning and return first match, ``'ignore'`` - silently return 

1911 first match. 

1912 :type on_duplicate: 

1913 str 

1914 

1915 :returns: 

1916 :py:class:`~pyrocko.squirrel.model.Response` if 

1917 ``model == 'squirrel'`` or 

1918 :py:class:`~pyrocko.io.fdsn.FDSNStationXML` if ``model == 

1919 'stationxml'`` or 

1920 (:py:class:`~pyrocko.squirrel.model.Response`, 

1921 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1922 'stationxml+'``. 

1923 

1924 Same as :py:meth:`get_responses` but returning exactly one response. 

1925 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1926 available. Duplicates are handled according to the ``on_duplicate`` 

1927 argument. 

1928 

1929 See :py:meth:`iter_nuts` for details on time span matching. 

1930 ''' 

1931 

1932 if model == 'stationxml': 

1933 model_ = 'stationxml+' 

1934 else: 

1935 model_ = model 

1936 

1937 responses = self.get_responses( 

1938 obj, tmin, tmax, time, codes, model=model_) 

1939 if len(responses) == 0: 

1940 raise error.NotAvailable( 

1941 'No instrument response available (%s).' 

1942 % self._get_selection_args_str( 

1943 RESPONSE, obj, tmin, tmax, time, codes)) 

1944 

1945 elif len(responses) > 1: 

1946 

1947 if on_duplicate in ('raise', 'warn'): 

1948 if model_ == 'squirrel': 

1949 resps_sq = responses 

1950 elif model_ == 'stationxml+': 

1951 resps_sq = [resp[0] for resp in responses] 

1952 else: 

1953 raise ValueError('Invalid response model: %s' % model) 

1954 

1955 rinfo = ':\n' + '\n'.join( 

1956 ' ' + resp.summary for resp in resps_sq) 

1957 

1958 message = \ 

1959 'Multiple instrument responses matching given ' \ 

1960 'constraints (%s)%s%s' % ( 

1961 self._get_selection_args_str( 

1962 RESPONSE, obj, tmin, tmax, time, codes), 

1963 ' -> using first' if on_duplicate == 'warn' else '', 

1964 rinfo) 

1965 

1966 if on_duplicate == 'raise': 

1967 raise error.Duplicate(message) 

1968 

1969 elif on_duplicate == 'warn': 

1970 logger.warning(message) 

1971 

1972 elif on_duplicate == 'ignore': 

1973 pass 

1974 

1975 else: 

1976 ValueError( 

1977 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1978 

1979 if model == 'stationxml': 

1980 return responses[0][1] 

1981 else: 

1982 return responses[0] 

1983 

1984 @filldocs 

1985 def get_events( 

1986 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1987 

1988 ''' 

1989 Get events matching given constraints. 

1990 

1991 %(query_args)s 

1992 

1993 :returns: 

1994 List of :py:class:`~pyrocko.model.event.Event` objects. 

1995 

1996 See :py:meth:`iter_nuts` for details on time span matching. 

1997 ''' 

1998 

1999 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2000 nuts = sorted( 

2001 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2002 

2003 return [self.get_content(nut) for nut in nuts] 

2004 

2005 def _redeem_promises(self, *args, order_only=False): 

2006 

2007 def split_promise(order): 

2008 self._split_nuts( 

2009 'waveform_promise', 

2010 order.tmin, order.tmax, 

2011 codes=order.codes, 

2012 path=order.source_id) 

2013 

2014 tmin, tmax = args[:2] 

2015 

2016 waveforms = list(self.iter_nuts('waveform', *args)) 

2017 promises = list(self.iter_nuts('waveform_promise', *args)) 

2018 

2019 codes_to_avail = defaultdict(list) 

2020 for nut in waveforms: 

2021 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2022 

2023 def tts(x): 

2024 if isinstance(x, tuple): 

2025 return tuple(tts(e) for e in x) 

2026 elif isinstance(x, list): 

2027 return list(tts(e) for e in x) 

2028 else: 

2029 return util.time_to_str(x) 

2030 

2031 orders = [] 

2032 for promise in promises: 

2033 waveforms_avail = codes_to_avail[promise.codes] 

2034 for block_tmin, block_tmax in blocks( 

2035 max(tmin, promise.tmin), 

2036 min(tmax, promise.tmax), 

2037 promise.deltat): 

2038 

2039 orders.append( 

2040 WaveformOrder( 

2041 source_id=promise.file_path, 

2042 codes=promise.codes, 

2043 tmin=block_tmin, 

2044 tmax=block_tmax, 

2045 deltat=promise.deltat, 

2046 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

2047 

2048 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2049 

2050 order_keys_noop = set(order_key(order) for order in orders_noop) 

2051 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2052 logger.info( 

2053 'Waveform orders already satisified with cached/local data: ' 

2054 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2055 

2056 for order in orders_noop: 

2057 split_promise(order) 

2058 

2059 if order_only: 

2060 if orders: 

2061 self._pending_orders.extend(orders) 

2062 logger.info( 

2063 'Enqueuing %i waveform order%s.' 

2064 % len_plural(orders)) 

2065 return 

2066 else: 

2067 if self._pending_orders: 

2068 orders.extend(self._pending_orders) 

2069 logger.info( 

2070 'Adding %i previously enqueued order%s.' 

2071 % len_plural(self._pending_orders)) 

2072 

2073 self._pending_orders = [] 

2074 

2075 source_ids = [] 

2076 sources = {} 

2077 for source in self._sources: 

2078 if isinstance(source, fdsn.FDSNSource): 

2079 source_ids.append(source._source_id) 

2080 sources[source._source_id] = source 

2081 

2082 source_priority = dict( 

2083 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2084 

2085 order_groups = defaultdict(list) 

2086 for order in orders: 

2087 order_groups[order_key(order)].append(order) 

2088 

2089 for k, order_group in order_groups.items(): 

2090 order_group.sort( 

2091 key=lambda order: source_priority[order.source_id]) 

2092 

2093 n_order_groups = len(order_groups) 

2094 

2095 if len(order_groups) != 0 or len(orders) != 0: 

2096 logger.info( 

2097 'Waveform orders standing for download: %i (%i)' 

2098 % (len(order_groups), len(orders))) 

2099 

2100 task = make_task('Waveform orders processed', n_order_groups) 

2101 else: 

2102 task = None 

2103 

2104 def release_order_group(order): 

2105 okey = order_key(order) 

2106 for followup in order_groups[okey]: 

2107 split_promise(followup) 

2108 

2109 del order_groups[okey] 

2110 

2111 if task: 

2112 task.update(n_order_groups - len(order_groups)) 

2113 

2114 def noop(order): 

2115 pass 

2116 

2117 def success(order): 

2118 release_order_group(order) 

2119 split_promise(order) 

2120 

2121 def batch_add(paths): 

2122 self.add(paths) 

2123 

2124 calls = queue.Queue() 

2125 

2126 def enqueue(f): 

2127 def wrapper(*args): 

2128 calls.put((f, args)) 

2129 

2130 return wrapper 

2131 

2132 while order_groups: 

2133 

2134 orders_now = [] 

2135 empty = [] 

2136 for k, order_group in order_groups.items(): 

2137 try: 

2138 orders_now.append(order_group.pop(0)) 

2139 except IndexError: 

2140 empty.append(k) 

2141 

2142 for k in empty: 

2143 del order_groups[k] 

2144 

2145 by_source_id = defaultdict(list) 

2146 for order in orders_now: 

2147 by_source_id[order.source_id].append(order) 

2148 

2149 threads = [] 

2150 for source_id in by_source_id: 

2151 def download(): 

2152 try: 

2153 sources[source_id].download_waveforms( 

2154 by_source_id[source_id], 

2155 success=enqueue(success), 

2156 error_permanent=enqueue(split_promise), 

2157 error_temporary=noop, 

2158 batch_add=enqueue(batch_add)) 

2159 

2160 finally: 

2161 calls.put(None) 

2162 

2163 thread = threading.Thread(target=download) 

2164 thread.start() 

2165 threads.append(thread) 

2166 

2167 ndone = 0 

2168 while ndone < len(threads): 

2169 ret = calls.get() 

2170 if ret is None: 

2171 ndone += 1 

2172 else: 

2173 ret[0](*ret[1]) 

2174 

2175 for thread in threads: 

2176 thread.join() 

2177 

2178 if task: 

2179 task.update(n_order_groups - len(order_groups)) 

2180 

2181 if task: 

2182 task.done() 

2183 

2184 @filldocs 

2185 def get_waveform_nuts( 

2186 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2187 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2188 order_only=False): 

2189 

2190 ''' 

2191 Get waveform content entities matching given constraints. 

2192 

2193 %(query_args)s 

2194 

2195 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2196 resolves matching waveform promises (downloads waveforms from remote 

2197 sources). 

2198 

2199 See :py:meth:`iter_nuts` for details on time span matching. 

2200 ''' 

2201 

2202 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2203 

2204 self._redeem_promises( 

2205 *args, 

2206 codes_exclude, 

2207 sample_rate_min, 

2208 sample_rate_max, 

2209 order_only=order_only) 

2210 

2211 nuts = sorted( 

2212 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2213 

2214 return nuts 

2215 

2216 @filldocs 

2217 def have_waveforms( 

2218 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2219 

2220 ''' 

2221 Check if any waveforms or waveform promises are available for given 

2222 constraints. 

2223 

2224 %(query_args)s 

2225 ''' 

2226 

2227 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2228 return bool(list( 

2229 self.iter_nuts('waveform', *args, limit=1))) \ 

2230 or bool(list( 

2231 self.iter_nuts('waveform_promise', *args, limit=1))) 

2232 

2233 @filldocs 

2234 def get_waveforms( 

2235 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2236 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2237 uncut=False, want_incomplete=True, degap=True, 

2238 maxgap=5, maxlap=None, snap=None, include_last=False, 

2239 load_data=True, accessor_id='default', operator_params=None, 

2240 order_only=False, channel_priorities=None): 

2241 

2242 ''' 

2243 Get waveforms matching given constraints. 

2244 

2245 %(query_args)s 

2246 

2247 :param sample_rate_min: 

2248 Consider only waveforms with a sampling rate equal to or greater 

2249 than the given value [Hz]. 

2250 :type sample_rate_min: 

2251 float 

2252 

2253 :param sample_rate_max: 

2254 Consider only waveforms with a sampling rate equal to or less than 

2255 the given value [Hz]. 

2256 :type sample_rate_max: 

2257 float 

2258 

2259 :param uncut: 

2260 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2261 and to disable degapping/deoverlapping. Returns untouched traces as 

2262 they are read from file segment. File segments are always read in 

2263 their entirety. 

2264 :type uncut: 

2265 bool 

2266 

2267 :param want_incomplete: 

2268 If ``True``, gappy/incomplete traces are included in the result. 

2269 :type want_incomplete: 

2270 bool 

2271 

2272 :param degap: 

2273 If ``True``, connect traces and remove gaps and overlaps. 

2274 :type degap: 

2275 bool 

2276 

2277 :param maxgap: 

2278 Maximum gap size in samples which is filled with interpolated 

2279 samples when ``degap`` is ``True``. 

2280 :type maxgap: 

2281 int 

2282 

2283 :param maxlap: 

2284 Maximum overlap size in samples which is removed when ``degap`` is 

2285 ``True``. 

2286 :type maxlap: 

2287 int 

2288 

2289 :param snap: 

2290 Rounding functions used when computing sample index from time 

2291 instance, for trace start and trace end, respectively. By default, 

2292 ``(round, round)`` is used. 

2293 :type snap: 

2294 tuple of 2 callables 

2295 

2296 :param include_last: 

2297 If ``True``, add one more sample to the returned traces (the sample 

2298 which would be the first sample of a query with ``tmin`` set to the 

2299 current value of ``tmax``). 

2300 :type include_last: 

2301 bool 

2302 

2303 :param load_data: 

2304 If ``True``, waveform data samples are read from files (or cache). 

2305 If ``False``, meta-information-only traces are returned (dummy 

2306 traces with no data samples). 

2307 :type load_data: 

2308 bool 

2309 

2310 :param accessor_id: 

2311 Name of consumer on who's behalf data is accessed. Used in cache 

2312 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2313 to distinguish different points of extraction for the decision of 

2314 when to release cached waveform data. Should be used when data is 

2315 alternately extracted from more than one region / selection. 

2316 :type accessor_id: 

2317 str 

2318 

2319 :param channel_priorities: 

2320 List of band/instrument code combinations to try. For example, 

2321 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2322 then fallback to ``BH?`` if these are not available. The first 

2323 matching waveforms are returned. Use in combination with 

2324 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2325 rate. 

2326 :type channel_priorities: 

2327 list of str 

2328 

2329 See :py:meth:`iter_nuts` for details on time span matching. 

2330 

2331 Loaded data is kept in memory (at least) until 

2332 :py:meth:`clear_accessor` has been called or 

2333 :py:meth:`advance_accessor` has been called two consecutive times 

2334 without data being accessed between the two calls (by this accessor). 

2335 Data may still be further kept in the memory cache if held alive by 

2336 consumers with a different ``accessor_id``. 

2337 ''' 

2338 

2339 tmin, tmax, codes = self._get_selection_args( 

2340 WAVEFORM, obj, tmin, tmax, time, codes) 

2341 

2342 if channel_priorities is not None: 

2343 return self._get_waveforms_prioritized( 

2344 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2345 sample_rate_min=sample_rate_min, 

2346 sample_rate_max=sample_rate_max, 

2347 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2348 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2349 include_last=include_last, load_data=load_data, 

2350 accessor_id=accessor_id, operator_params=operator_params, 

2351 order_only=order_only, channel_priorities=channel_priorities) 

2352 

2353 self_tmin, self_tmax = self.get_time_span( 

2354 ['waveform', 'waveform_promise']) 

2355 

2356 if None in (self_tmin, self_tmax): 

2357 logger.warning( 

2358 'No waveforms available.') 

2359 return [] 

2360 

2361 tmin = tmin if tmin is not None else self_tmin 

2362 tmax = tmax if tmax is not None else self_tmax 

2363 

2364 if codes is not None and len(codes) == 1: 

2365 # TODO: fix for multiple / mixed codes 

2366 operator = self.get_operator(codes[0]) 

2367 if operator is not None: 

2368 return operator.get_waveforms( 

2369 self, codes[0], 

2370 tmin=tmin, tmax=tmax, 

2371 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2372 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2373 include_last=include_last, load_data=load_data, 

2374 accessor_id=accessor_id, params=operator_params) 

2375 

2376 nuts = self.get_waveform_nuts( 

2377 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2378 sample_rate_max, order_only=order_only) 

2379 

2380 if order_only: 

2381 return [] 

2382 

2383 if load_data: 

2384 traces = [ 

2385 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2386 

2387 else: 

2388 traces = [ 

2389 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2390 

2391 if uncut: 

2392 return traces 

2393 

2394 if snap is None: 

2395 snap = (round, round) 

2396 

2397 chopped = [] 

2398 for tr in traces: 

2399 if not load_data and tr.ydata is not None: 

2400 tr = tr.copy(data=False) 

2401 tr.ydata = None 

2402 

2403 try: 

2404 chopped.append(tr.chop( 

2405 tmin, tmax, 

2406 inplace=False, 

2407 snap=snap, 

2408 include_last=include_last)) 

2409 

2410 except trace.NoData: 

2411 pass 

2412 

2413 processed = self._process_chopped( 

2414 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2415 

2416 return processed 

2417 

2418 def _get_waveforms_prioritized( 

2419 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2420 channel_priorities=None, **kwargs): 

2421 

2422 trs_all = [] 

2423 codes_have = set() 

2424 for channel in channel_priorities: 

2425 assert len(channel) == 2 

2426 if codes is not None: 

2427 codes_now = [ 

2428 codes_.replace(channel=channel+'?') for codes_ in codes] 

2429 else: 

2430 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2431 

2432 codes_exclude_now = list(set( 

2433 codes_.replace(channel=channel+codes_.channel[-1]) 

2434 for codes_ in codes_have)) 

2435 

2436 if codes_exclude: 

2437 codes_exclude_now.extend(codes_exclude) 

2438 

2439 trs = self.get_waveforms( 

2440 tmin=tmin, 

2441 tmax=tmax, 

2442 codes=codes_now, 

2443 codes_exclude=codes_exclude_now, 

2444 **kwargs) 

2445 

2446 codes_have.update(set(tr.codes for tr in trs)) 

2447 trs_all.extend(trs) 

2448 

2449 return trs_all 

2450 

2451 @filldocs 

2452 def chopper_waveforms( 

2453 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2454 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2455 tinc=None, tpad=0., 

2456 want_incomplete=True, snap_window=False, 

2457 degap=True, maxgap=5, maxlap=None, 

2458 snap=None, include_last=False, load_data=True, 

2459 accessor_id=None, clear_accessor=True, operator_params=None, 

2460 grouping=None, channel_priorities=None): 

2461 

2462 ''' 

2463 Iterate window-wise over waveform archive. 

2464 

2465 %(query_args)s 

2466 

2467 :param tinc: 

2468 Time increment (window shift time) (default uses ``tmax-tmin``). 

2469 :type tinc: 

2470 timestamp 

2471 

2472 :param tpad: 

2473 Padding time appended on either side of the data window (window 

2474 overlap is ``2*tpad``). 

2475 :type tpad: 

2476 timestamp 

2477 

2478 :param want_incomplete: 

2479 If ``True``, gappy/incomplete traces are included in the result. 

2480 :type want_incomplete: 

2481 bool 

2482 

2483 :param snap_window: 

2484 If ``True``, start time windows at multiples of tinc with respect 

2485 to system time zero. 

2486 :type snap_window: 

2487 bool 

2488 

2489 :param degap: 

2490 If ``True``, connect traces and remove gaps and overlaps. 

2491 :type degap: 

2492 bool 

2493 

2494 :param maxgap: 

2495 Maximum gap size in samples which is filled with interpolated 

2496 samples when ``degap`` is ``True``. 

2497 :type maxgap: 

2498 int 

2499 

2500 :param maxlap: 

2501 Maximum overlap size in samples which is removed when ``degap`` is 

2502 ``True``. 

2503 :type maxlap: 

2504 int 

2505 

2506 :param snap: 

2507 Rounding functions used when computing sample index from time 

2508 instance, for trace start and trace end, respectively. By default, 

2509 ``(round, round)`` is used. 

2510 :type snap: 

2511 tuple of 2 callables 

2512 

2513 :param include_last: 

2514 If ``True``, add one more sample to the returned traces (the sample 

2515 which would be the first sample of a query with ``tmin`` set to the 

2516 current value of ``tmax``). 

2517 :type include_last: 

2518 bool 

2519 

2520 :param load_data: 

2521 If ``True``, waveform data samples are read from files (or cache). 

2522 If ``False``, meta-information-only traces are returned (dummy 

2523 traces with no data samples). 

2524 :type load_data: 

2525 bool 

2526 

2527 :param accessor_id: 

2528 Name of consumer on who's behalf data is accessed. Used in cache 

2529 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2530 to distinguish different points of extraction for the decision of 

2531 when to release cached waveform data. Should be used when data is 

2532 alternately extracted from more than one region / selection. 

2533 :type accessor_id: 

2534 str 

2535 

2536 :param clear_accessor: 

2537 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2538 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2539 memory when the generator returns. 

2540 :type clear_accessor: 

2541 bool 

2542 

2543 :param grouping: 

2544 By default, traversal over the data is over time and all matching 

2545 traces of a time window are yielded. Using this option, it is 

2546 possible to traverse the data first by group (e.g. station or 

2547 network) and second by time. This can reduce the number of traces 

2548 in each batch and thus reduce the memory footprint of the process. 

2549 :type grouping: 

2550 :py:class:`~pyrocko.squirrel.operator.Grouping` 

2551 

2552 :yields: 

2553 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2554 extracted time window. 

2555 

2556 See :py:meth:`iter_nuts` for details on time span matching. 

2557 ''' 

2558 

2559 tmin, tmax, codes = self._get_selection_args( 

2560 WAVEFORM, obj, tmin, tmax, time, codes) 

2561 

2562 self_tmin, self_tmax = self.get_time_span( 

2563 ['waveform', 'waveform_promise']) 

2564 

2565 if None in (self_tmin, self_tmax): 

2566 logger.warning( 

2567 'Content has undefined time span. No waveforms and no ' 

2568 'waveform promises?') 

2569 return 

2570 

2571 if snap_window and tinc is not None: 

2572 tmin = tmin if tmin is not None else self_tmin 

2573 tmax = tmax if tmax is not None else self_tmax 

2574 tmin = math.floor(tmin / tinc) * tinc 

2575 tmax = math.ceil(tmax / tinc) * tinc 

2576 else: 

2577 tmin = tmin if tmin is not None else self_tmin + tpad 

2578 tmax = tmax if tmax is not None else self_tmax - tpad 

2579 

2580 tinc = tinc if tinc is not None else tmax - tmin 

2581 

2582 try: 

2583 if accessor_id is None: 

2584 accessor_id = 'chopper%i' % self._n_choppers_active 

2585 

2586 self._n_choppers_active += 1 

2587 

2588 eps = tinc * 1e-6 

2589 if tinc != 0.0: 

2590 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2591 else: 

2592 nwin = 1 

2593 

2594 if grouping is None: 

2595 codes_list = [codes] 

2596 else: 

2597 operator = Operator( 

2598 filtering=CodesPatternFiltering(codes=codes), 

2599 grouping=grouping) 

2600 

2601 available = set(self.get_codes(kind='waveform')) 

2602 available.update(self.get_codes(kind='waveform_promise')) 

2603 operator.update_mappings(sorted(available)) 

2604 

2605 codes_list = [ 

2606 codes_patterns_list(scl) 

2607 for scl in operator.iter_in_codes()] 

2608 

2609 ngroups = len(codes_list) 

2610 for igroup, scl in enumerate(codes_list): 

2611 for iwin in range(nwin): 

2612 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2613 

2614 chopped = self.get_waveforms( 

2615 tmin=wmin-tpad, 

2616 tmax=wmax+tpad, 

2617 codes=scl, 

2618 codes_exclude=codes_exclude, 

2619 sample_rate_min=sample_rate_min, 

2620 sample_rate_max=sample_rate_max, 

2621 snap=snap, 

2622 include_last=include_last, 

2623 load_data=load_data, 

2624 want_incomplete=want_incomplete, 

2625 degap=degap, 

2626 maxgap=maxgap, 

2627 maxlap=maxlap, 

2628 accessor_id=accessor_id, 

2629 operator_params=operator_params, 

2630 channel_priorities=channel_priorities) 

2631 

2632 self.advance_accessor(accessor_id) 

2633 

2634 yield Batch( 

2635 tmin=wmin, 

2636 tmax=wmax, 

2637 i=iwin, 

2638 n=nwin, 

2639 igroup=igroup, 

2640 ngroups=ngroups, 

2641 traces=chopped) 

2642 

2643 finally: 

2644 self._n_choppers_active -= 1 

2645 if clear_accessor: 

2646 self.clear_accessor(accessor_id, 'waveform') 

2647 

2648 def _process_chopped( 

2649 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2650 

2651 chopped.sort(key=lambda a: a.full_id) 

2652 if degap: 

2653 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2654 

2655 if not want_incomplete: 

2656 chopped_weeded = [] 

2657 for tr in chopped: 

2658 emin = tr.tmin - tmin 

2659 emax = tr.tmax + tr.deltat - tmax 

2660 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2661 chopped_weeded.append(tr) 

2662 

2663 elif degap: 

2664 if (0. < emin <= 5. * tr.deltat 

2665 and -5. * tr.deltat <= emax < 0.): 

2666 

2667 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2668 chopped_weeded.append(tr) 

2669 

2670 chopped = chopped_weeded 

2671 

2672 return chopped 

2673 

2674 def _get_pyrocko_stations( 

2675 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2676 

2677 from pyrocko import model as pmodel 

2678 

2679 if codes is not None: 

2680 codes = codes_patterns_for_kind(STATION, codes) 

2681 

2682 by_nsl = defaultdict(lambda: (list(), list())) 

2683 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2684 sargs = station._get_pyrocko_station_args() 

2685 by_nsl[station.codes.nsl][0].append(sargs) 

2686 

2687 if codes is not None: 

2688 codes = [model.CodesNSLCE(c) for c in codes] 

2689 

2690 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2691 sargs = channel._get_pyrocko_station_args() 

2692 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2693 sargs_list.append(sargs) 

2694 channels_list.append(channel) 

2695 

2696 pstations = [] 

2697 nsls = list(by_nsl.keys()) 

2698 nsls.sort() 

2699 for nsl in nsls: 

2700 sargs_list, channels_list = by_nsl[nsl] 

2701 sargs = util.consistency_merge( 

2702 [('',) + x for x in sargs_list]) 

2703 

2704 by_c = defaultdict(list) 

2705 for ch in channels_list: 

2706 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2707 

2708 chas = list(by_c.keys()) 

2709 chas.sort() 

2710 pchannels = [] 

2711 for cha in chas: 

2712 list_of_cargs = by_c[cha] 

2713 cargs = util.consistency_merge( 

2714 [('',) + x for x in list_of_cargs]) 

2715 pchannels.append(pmodel.Channel(*cargs)) 

2716 

2717 pstations.append( 

2718 pmodel.Station(*sargs, channels=pchannels)) 

2719 

2720 return pstations 

2721 

2722 @property 

2723 def pile(self): 

2724 

2725 ''' 

2726 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2727 

2728 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2729 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2730 but uses the fluffy power of the Squirrel under the hood. 

2731 

2732 This interface can be used as a drop-in replacement for piles which are 

2733 used in existing scripts and programs for efficient waveform data 

2734 access. The Squirrel-based pile scales better for large datasets. Newer 

2735 scripts should use Squirrel's native methods to avoid the emulation 

2736 overhead. 

2737 ''' 

2738 from . import pile 

2739 

2740 if self._pile is None: 

2741 self._pile = pile.Pile(self) 

2742 

2743 return self._pile 

2744 

2745 def snuffle(self): 

2746 ''' 

2747 Look at dataset in Snuffler. 

2748 ''' 

2749 self.pile.snuffle() 

2750 

2751 def _gather_codes_keys(self, kind, gather, selector): 

2752 return set( 

2753 gather(codes) 

2754 for codes in self.iter_codes(kind) 

2755 if selector is None or selector(codes)) 

2756 

2757 def __str__(self): 

2758 return str(self.get_stats()) 

2759 

2760 def get_coverage( 

2761 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2762 

2763 ''' 

2764 Get coverage information. 

2765 

2766 Get information about strips of gapless data coverage. 

2767 

2768 :param kind: 

2769 Content kind to be queried. 

2770 :type kind: 

2771 str 

2772 

2773 :param tmin: 

2774 Start time of query interval. 

2775 :type tmin: 

2776 timestamp 

2777 

2778 :param tmax: 

2779 End time of query interval. 

2780 :type tmax: 

2781 timestamp 

2782 

2783 :param codes: 

2784 If given, restrict query to given content codes patterns. 

2785 :type codes: 

2786 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2787 objects appropriate for the queried content type, or anything which 

2788 can be converted to such objects. 

2789 

2790 :param limit: 

2791 Limit query to return only up to a given maximum number of entries 

2792 per matching time series (without setting this option, very gappy 

2793 data could cause the query to execute for a very long time). 

2794 :type limit: 

2795 int 

2796 

2797 :returns: 

2798 Information about time spans covered by the requested time series 

2799 data. 

2800 :rtype: 

2801 :py:class:`list` of :py:class:`Coverage` objects 

2802 ''' 

2803 

2804 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2805 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2806 kind_id = to_kind_id(kind) 

2807 

2808 codes_info = list(self._iter_codes_info(kind=kind)) 

2809 

2810 kdata_all = [] 

2811 if codes is None: 

2812 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2813 kdata_all.append( 

2814 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2815 

2816 else: 

2817 for codes_entry in codes: 

2818 pattern = to_codes(kind_id, codes_entry) 

2819 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2820 if model.match_codes(pattern, codes_entry): 

2821 kdata_all.append( 

2822 (pattern, kind_codes_id, codes_entry, deltat)) 

2823 

2824 kind_codes_ids = [x[1] for x in kdata_all] 

2825 

2826 counts_at_tmin = {} 

2827 if tmin is not None: 

2828 for nut in self.iter_nuts( 

2829 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2830 

2831 k = nut.codes, nut.deltat 

2832 if k not in counts_at_tmin: 

2833 counts_at_tmin[k] = 0 

2834 

2835 counts_at_tmin[k] += 1 

2836 

2837 coverages = [] 

2838 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2839 entry = [pattern, codes_entry, deltat, None, None, []] 

2840 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2841 sql = self._sql(''' 

2842 SELECT 

2843 time_seconds, 

2844 time_offset 

2845 FROM %(db)s.%(coverage)s 

2846 WHERE 

2847 kind_codes_id == ? 

2848 ORDER BY 

2849 kind_codes_id ''' + order + ''', 

2850 time_seconds ''' + order + ''', 

2851 time_offset ''' + order + ''' 

2852 LIMIT 1 

2853 ''') 

2854 

2855 for row in self._conn.execute(sql, [kind_codes_id]): 

2856 entry[3+i] = model.tjoin(row[0], row[1]) 

2857 

2858 if None in entry[3:5]: 

2859 continue 

2860 

2861 args = [kind_codes_id] 

2862 

2863 sql_time = '' 

2864 if tmin is not None: 

2865 # intentionally < because (== tmin) is queried from nuts 

2866 sql_time += ' AND ( ? < time_seconds ' \ 

2867 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2868 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2869 

2870 if tmax is not None: 

2871 sql_time += ' AND ( time_seconds < ? ' \ 

2872 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2873 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2874 

2875 sql_limit = '' 

2876 if limit is not None: 

2877 sql_limit = ' LIMIT ?' 

2878 args.append(limit) 

2879 

2880 sql = self._sql(''' 

2881 SELECT 

2882 time_seconds, 

2883 time_offset, 

2884 step 

2885 FROM %(db)s.%(coverage)s 

2886 WHERE 

2887 kind_codes_id == ? 

2888 ''' + sql_time + ''' 

2889 ORDER BY 

2890 kind_codes_id, 

2891 time_seconds, 

2892 time_offset 

2893 ''' + sql_limit) 

2894 

2895 rows = list(self._conn.execute(sql, args)) 

2896 

2897 if limit is not None and len(rows) == limit: 

2898 entry[-1] = None 

2899 else: 

2900 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2901 tlast = None 

2902 if tmin is not None: 

2903 entry[-1].append((tmin, counts)) 

2904 tlast = tmin 

2905 

2906 for row in rows: 

2907 t = model.tjoin(row[0], row[1]) 

2908 counts += row[2] 

2909 entry[-1].append((t, counts)) 

2910 tlast = t 

2911 

2912 if tmax is not None and (tlast is None or tlast != tmax): 

2913 entry[-1].append((tmax, counts)) 

2914 

2915 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2916 

2917 return coverages 

2918 

2919 def get_stationxml( 

2920 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2921 level='response', on_error='raise'): 

2922 

2923 ''' 

2924 Get station/channel/response metadata in StationXML representation. 

2925 

2926 %(query_args)s 

2927 

2928 :returns: 

2929 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2930 ''' 

2931 

2932 if level not in ('network', 'station', 'channel', 'response'): 

2933 raise ValueError('Invalid level: %s' % level) 

2934 

2935 tmin, tmax, codes = self._get_selection_args( 

2936 CHANNEL, obj, tmin, tmax, time, codes) 

2937 

2938 def tts(t): 

2939 if t is None: 

2940 return '<none>' 

2941 else: 

2942 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2943 

2944 if on_error == 'ignore': 

2945 def handle_error(exc): 

2946 pass 

2947 

2948 elif on_error == 'warn': 

2949 def handle_error(exc): 

2950 logger.warning(str(exc)) 

2951 

2952 elif on_error == 'raise': 

2953 def handle_error(exc): 

2954 raise exc 

2955 

2956 def use_first(node_type_name, codes, k, group): 

2957 if on_error == 'warn': 

2958 logger.warning( 

2959 'Duplicates for %s %s, %s - %s -> using first' % ( 

2960 node_type_name, 

2961 '.'.join(codes), 

2962 tts(k[0]), tts(k[1]))) 

2963 

2964 return group[0] 

2965 

2966 def deduplicate(node_type_name, codes, nodes): 

2967 groups = defaultdict(list) 

2968 for node in nodes: 

2969 k = (node.start_date, node.end_date) 

2970 groups[k].append(node) 

2971 

2972 return [ 

2973 use_first(node_type_name, codes, k, group) 

2974 for (k, group) in groups.items()] 

2975 

2976 filtering = CodesPatternFiltering(codes=codes) 

2977 

2978 nslcs = list(set( 

2979 codes.nslc for codes in 

2980 filtering.filter(self.get_codes(kind='channel')))) 

2981 

2982 from pyrocko.io import stationxml as sx 

2983 

2984 networks = [] 

2985 for net, stas in prefix_tree(nslcs): 

2986 network = sx.Network(code=net) 

2987 networks.append(network) 

2988 

2989 if level not in ('station', 'channel', 'response'): 

2990 continue 

2991 

2992 for sta, locs in stas: 

2993 stations = self.get_stations( 

2994 tmin=tmin, 

2995 tmax=tmax, 

2996 codes=(net, sta, '*'), 

2997 model='stationxml') 

2998 

2999 if on_error != 'raise': 

3000 stations = deduplicate( 

3001 'Station', (net, sta), stations) 

3002 

3003 errors = sx.check_overlaps( 

3004 'Station', (net, sta), stations) 

3005 

3006 if errors: 

3007 handle_error(error.Duplicate( 

3008 'Overlapping/duplicate station info:\n %s' 

3009 % '\n '.join(errors))) 

3010 

3011 network.station_list.extend(stations) 

3012 

3013 if level not in ('channel', 'response'): 

3014 continue 

3015 

3016 for loc, chas in locs: 

3017 for cha, _ in chas: 

3018 channels = self.get_channels( 

3019 tmin=tmin, 

3020 tmax=tmax, 

3021 codes=(net, sta, loc, cha), 

3022 model='stationxml') 

3023 

3024 if on_error != 'raise': 

3025 channels = deduplicate( 

3026 'Channel', (net, sta, loc, cha), channels) 

3027 

3028 errors = sx.check_overlaps( 

3029 'Channel', (net, sta, loc, cha), channels) 

3030 

3031 if errors: 

3032 handle_error(error.Duplicate( 

3033 'Overlapping/duplicate channel info:\n %s' 

3034 % '\n '.join(errors))) 

3035 

3036 for channel in channels: 

3037 station = sx.find_containing(stations, channel) 

3038 if station is not None: 

3039 station.channel_list.append(channel) 

3040 else: 

3041 handle_error(error.NotAvailable( 

3042 'No station or station epoch found ' 

3043 'for channel: %s' % '.'.join( 

3044 (net, sta, loc, cha)))) 

3045 

3046 continue 

3047 

3048 if level != 'response': 

3049 continue 

3050 

3051 try: 

3052 response_sq, response_sx = self.get_response( 

3053 codes=(net, sta, loc, cha), 

3054 tmin=channel.start_date, 

3055 tmax=channel.end_date, 

3056 model='stationxml+', 

3057 on_duplicate=on_error) 

3058 

3059 except error.NotAvailable as e: 

3060 handle_error(e) 

3061 continue 

3062 

3063 if not ( 

3064 sx.eq_open( 

3065 channel.start_date, response_sq.tmin) 

3066 and sx.eq_open( 

3067 channel.end_date, response_sq.tmax)): 

3068 

3069 handle_error(error.Inconsistencies( 

3070 'Response time span does not match ' 

3071 'channel time span: %s' % '.'.join( 

3072 (net, sta, loc, cha)))) 

3073 

3074 channel.response = response_sx 

3075 

3076 return sx.FDSNStationXML( 

3077 source='Generated by Pyrocko Squirrel.', 

3078 network_list=networks) 

3079 

3080 def add_operator(self, op): 

3081 self._operators.append(op) 

3082 

3083 def update_operator_mappings(self): 

3084 available = self.get_codes(kind=('channel')) 

3085 

3086 for operator in self._operators: 

3087 operator.update_mappings(available, self._operator_registry) 

3088 

3089 def iter_operator_mappings(self): 

3090 for operator in self._operators: 

3091 for in_codes, out_codes in operator.iter_mappings(): 

3092 yield operator, in_codes, out_codes 

3093 

3094 def get_operator_mappings(self): 

3095 return list(self.iter_operator_mappings()) 

3096 

3097 def get_operator(self, codes): 

3098 try: 

3099 return self._operator_registry[codes][0] 

3100 except KeyError: 

3101 return None 

3102 

3103 def get_operator_group(self, codes): 

3104 try: 

3105 return self._operator_registry[codes] 

3106 except KeyError: 

3107 return None, (None, None, None) 

3108 

3109 def iter_operator_codes(self): 

3110 for _, _, out_codes in self.iter_operator_mappings(): 

3111 for codes in out_codes: 

3112 yield codes 

3113 

3114 def get_operator_codes(self): 

3115 return list(self.iter_operator_codes()) 

3116 

3117 def print_tables(self, table_names=None, stream=None): 

3118 ''' 

3119 Dump raw database tables in textual form (for debugging purposes). 

3120 

3121 :param table_names: 

3122 Names of tables to be dumped or ``None`` to dump all. 

3123 :type table_names: 

3124 :py:class:`list` of :py:class:`str` 

3125 

3126 :param stream: 

3127 Open file or ``None`` to dump to standard output. 

3128 ''' 

3129 

3130 if stream is None: 

3131 stream = sys.stdout 

3132 

3133 if isinstance(table_names, str): 

3134 table_names = [table_names] 

3135 

3136 if table_names is None: 

3137 table_names = [ 

3138 'selection_file_states', 

3139 'selection_nuts', 

3140 'selection_kind_codes_count', 

3141 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3142 

3143 m = { 

3144 'selection_file_states': '%(db)s.%(file_states)s', 

3145 'selection_nuts': '%(db)s.%(nuts)s', 

3146 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3147 'files': 'files', 

3148 'nuts': 'nuts', 

3149 'kind_codes': 'kind_codes', 

3150 'kind_codes_count': 'kind_codes_count'} 

3151 

3152 for table_name in table_names: 

3153 self._database.print_table( 

3154 m[table_name] % self._names, stream=stream) 

3155 

3156 

3157class SquirrelStats(Object): 

3158 ''' 

3159 Container to hold statistics about contents available from a Squirrel. 

3160 

3161 See also :py:meth:`Squirrel.get_stats`. 

3162 ''' 

3163 

3164 nfiles = Int.T( 

3165 help='Number of files in selection.') 

3166 nnuts = Int.T( 

3167 help='Number of index nuts in selection.') 

3168 codes = List.T( 

3169 Tuple.T(content_t=String.T()), 

3170 help='Available code sequences in selection, e.g. ' 

3171 '(agency, network, station, location) for stations nuts.') 

3172 kinds = List.T( 

3173 String.T(), 

3174 help='Available content types in selection.') 

3175 total_size = Int.T( 

3176 help='Aggregated file size of files is selection.') 

3177 counts = Dict.T( 

3178 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3179 help='Breakdown of how many nuts of any content type and code ' 

3180 'sequence are available in selection, ``counts[kind][codes]``.') 

3181 time_spans = Dict.T( 

3182 String.T(), Tuple.T(content_t=Timestamp.T()), 

3183 help='Time spans by content type.') 

3184 sources = List.T( 

3185 String.T(), 

3186 help='Descriptions of attached sources.') 

3187 operators = List.T( 

3188 String.T(), 

3189 help='Descriptions of attached operators.') 

3190 

3191 def __str__(self): 

3192 kind_counts = dict( 

3193 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3194 

3195 scodes = model.codes_to_str_abbreviated(self.codes) 

3196 

3197 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3198 ' ' + s for s in self.sources) 

3199 

3200 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3201 ' ' + s for s in self.operators) 

3202 

3203 def stime(t): 

3204 return util.tts(t) if t is not None and t not in ( 

3205 model.g_tmin, model.g_tmax) else '<none>' 

3206 

3207 def stable(rows): 

3208 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3209 return '\n'.join( 

3210 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3211 for row in rows) 

3212 

3213 def indent(s): 

3214 return '\n'.join(' '+line for line in s.splitlines()) 

3215 

3216 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3217 kind + ':', 

3218 str(kind_counts[kind]), 

3219 stime(self.time_spans[kind][0]), 

3220 '-', 

3221 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3222 

3223 s = ''' 

3224Number of files: %i 

3225Total size of known files: %s 

3226Number of index nuts: %i 

3227Available content kinds: %s 

3228Available codes: %s 

3229Sources: %s 

3230Operators: %s''' % ( 

3231 self.nfiles, 

3232 util.human_bytesize(self.total_size), 

3233 self.nnuts, 

3234 stspans, scodes, ssources, soperators) 

3235 

3236 return s.lstrip() 

3237 

3238 

3239__all__ = [ 

3240 'Squirrel', 

3241 'SquirrelStats', 

3242]