1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8 

9import math 

10import logging 

11import threading 

12import queue 

13from collections import defaultdict 

14 

15from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

16from pyrocko import util, trace 

17from pyrocko.progress import progress 

18from pyrocko.plot import nice_time_tick_inc_approx_secs 

19 

20from . import model, io, cache, dataset 

21 

22from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

23 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

24 codes_patterns_for_kind 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from .operators.base import Operator, CodesPatternFiltering 

29from . import client, environment, error 

30 

31logger = logging.getLogger('psq.base') 

32 

33guts_prefix = 'squirrel' 

34 

35 

36def nonef(f, xs): 

37 xs_ = [x for x in xs if x is not None] 

38 if xs_: 

39 return f(xs_) 

40 else: 

41 return None 

42 

43 

44def make_task(*args): 

45 return progress.task(*args, logger=logger) 

46 

47 

48def lpick(condition, seq): 

49 ft = [], [] 

50 for ele in seq: 

51 ft[int(bool(condition(ele)))].append(ele) 

52 

53 return ft 

54 

55 

56def len_plural(obj): 

57 return len(obj), '' if len(obj) == 1 else 's' 

58 

59 

60def blocks(tmin, tmax, deltat, nsamples_block=100000): 

61 tblock = nice_time_tick_inc_approx_secs( 

62 util.to_time_float(deltat * nsamples_block)) 

63 iblock_min = int(math.floor(tmin / tblock)) 

64 iblock_max = int(math.ceil(tmax / tblock)) 

65 for iblock in range(iblock_min, iblock_max): 

66 yield iblock * tblock, (iblock+1) * tblock 

67 

68 

69def gaps(avail, tmin, tmax): 

70 assert tmin < tmax 

71 

72 data = [(tmax, 1), (tmin, -1)] 

73 for (tmin_a, tmax_a) in avail: 

74 assert tmin_a < tmax_a 

75 data.append((tmin_a, 1)) 

76 data.append((tmax_a, -1)) 

77 

78 data.sort() 

79 s = 1 

80 gaps = [] 

81 tmin_g = None 

82 for t, x in data: 

83 if s == 1 and x == -1: 

84 tmin_g = t 

85 elif s == 0 and x == 1 and tmin_g is not None: 

86 tmax_g = t 

87 if tmin_g != tmax_g: 

88 gaps.append((tmin_g, tmax_g)) 

89 

90 s += x 

91 

92 return gaps 

93 

94 

95def order_key(order): 

96 return (order.codes, order.tmin, order.tmax) 

97 

98 

99def _is_exact(pat): 

100 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

101 

102 

103def prefix_tree(tups): 

104 if not tups: 

105 return [] 

106 

107 if len(tups[0]) == 1: 

108 return sorted((tup[0], []) for tup in tups) 

109 

110 d = defaultdict(list) 

111 for tup in tups: 

112 d[tup[0]].append(tup[1:]) 

113 

114 sub = [] 

115 for k in sorted(d.keys()): 

116 sub.append((k, prefix_tree(d[k]))) 

117 

118 return sub 

119 

120 

121def match_time_span(tmin, tmax, obj): 

122 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

123 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

124 

125 

126class Batch(object): 

127 ''' 

128 Batch of waveforms from window-wise data extraction. 

129 

130 Encapsulates state and results yielded for each window in window-wise 

131 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

132 

133 *Attributes:* 

134 

135 .. py:attribute:: tmin 

136 

137 Start of this time window. 

138 

139 .. py:attribute:: tmax 

140 

141 End of this time window. 

142 

143 .. py:attribute:: i 

144 

145 Index of this time window in sequence. 

146 

147 .. py:attribute:: n 

148 

149 Total number of time windows in sequence. 

150 

151 .. py:attribute:: igroup 

152 

153 Index of this time window's sequence group. 

154 

155 .. py:attribute:: ngroups 

156 

157 Total number of sequence groups. 

158 

159 .. py:attribute:: traces 

160 

161 Extracted waveforms for this time window. 

162 ''' 

163 

164 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

165 self.tmin = tmin 

166 self.tmax = tmax 

167 self.i = i 

168 self.n = n 

169 self.igroup = igroup 

170 self.ngroups = ngroups 

171 self.traces = traces 

172 

173 

174class Squirrel(Selection): 

175 ''' 

176 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

177 

178 :param env: 

179 Squirrel environment instance or directory path to use as starting 

180 point for its detection. By default, the current directory is used as 

181 starting point. When searching for a usable environment the directory 

182 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

183 directory is used if it exists, otherwise the parent directories are 

184 search upwards for the existence of such a directory. If no such 

185 directory is found, the user's global Squirrel environment 

186 ``'$HOME/.pyrocko/squirrel'`` is used. 

187 :type env: 

188 :py:class:`~pyrocko.squirrel.environment.Environment` or 

189 :py:class:`str` 

190 

191 :param database: 

192 Database instance or path to database. By default the 

193 database found in the detected Squirrel environment is used. 

194 :type database: 

195 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

196 

197 :param cache_path: 

198 Directory path to use for data caching. By default, the ``'cache'`` 

199 directory in the detected Squirrel environment is used. 

200 :type cache_path: 

201 :py:class:`str` 

202 

203 :param persistent: 

204 If given a name, create a persistent selection. 

205 :type persistent: 

206 :py:class:`str` 

207 

208 This is the central class of the Squirrel framework. It provides a unified 

209 interface to query and access seismic waveforms, station meta-data and 

210 event information from local file collections and remote data sources. For 

211 prompt responses, a profound database setup is used under the hood. To 

212 speed up assemblage of ad-hoc data selections, files are indexed on first 

213 use and the extracted meta-data is remembered in the database for 

214 subsequent accesses. Bulk data is lazily loaded from disk and remote 

215 sources, just when requested. Once loaded, data is cached in memory to 

216 expedite typical access patterns. Files and data sources can be dynamically 

217 added to and removed from the Squirrel selection at runtime. 

218 

219 Queries are restricted to the contents of the files currently added to the 

220 Squirrel selection (usually a subset of the file meta-information 

221 collection in the database). This list of files is referred to here as the 

222 "selection". By default, temporary tables are created in the attached 

223 database to hold the names of the files in the selection as well as various 

224 indices and counters. These tables are only visible inside the application 

225 which created them and are deleted when the database connection is closed 

226 or the application exits. To create a selection which is not deleted at 

227 exit, supply a name to the ``persistent`` argument of the Squirrel 

228 constructor. Persistent selections are shared among applications using the 

229 same database. 

230 

231 **Method summary** 

232 

233 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

234 :py:class:`~pyrocko.squirrel.selection.Selection`. 

235 

236 .. autosummary:: 

237 

238 ~Squirrel.add 

239 ~Squirrel.add_source 

240 ~Squirrel.add_fdsn 

241 ~Squirrel.add_catalog 

242 ~Squirrel.add_dataset 

243 ~Squirrel.add_virtual 

244 ~Squirrel.update 

245 ~Squirrel.update_waveform_promises 

246 ~Squirrel.advance_accessor 

247 ~Squirrel.clear_accessor 

248 ~Squirrel.reload 

249 ~pyrocko.squirrel.selection.Selection.iter_paths 

250 ~Squirrel.iter_nuts 

251 ~Squirrel.iter_kinds 

252 ~Squirrel.iter_deltats 

253 ~Squirrel.iter_codes 

254 ~pyrocko.squirrel.selection.Selection.get_paths 

255 ~Squirrel.get_nuts 

256 ~Squirrel.get_kinds 

257 ~Squirrel.get_deltats 

258 ~Squirrel.get_codes 

259 ~Squirrel.get_counts 

260 ~Squirrel.get_time_span 

261 ~Squirrel.get_deltat_span 

262 ~Squirrel.get_nfiles 

263 ~Squirrel.get_nnuts 

264 ~Squirrel.get_total_size 

265 ~Squirrel.get_stats 

266 ~Squirrel.get_content 

267 ~Squirrel.get_stations 

268 ~Squirrel.get_channels 

269 ~Squirrel.get_responses 

270 ~Squirrel.get_events 

271 ~Squirrel.get_waveform_nuts 

272 ~Squirrel.get_waveforms 

273 ~Squirrel.chopper_waveforms 

274 ~Squirrel.get_coverage 

275 ~Squirrel.pile 

276 ~Squirrel.snuffle 

277 ~Squirrel.glob_codes 

278 ~pyrocko.squirrel.selection.Selection.get_database 

279 ~Squirrel.print_tables 

280 ''' 

281 

282 def __init__( 

283 self, env=None, database=None, cache_path=None, persistent=None): 

284 

285 if not isinstance(env, environment.Environment): 

286 env = environment.get_environment(env) 

287 

288 if database is None: 

289 database = env.expand_path(env.database_path) 

290 

291 if cache_path is None: 

292 cache_path = env.expand_path(env.cache_path) 

293 

294 if persistent is None: 

295 persistent = env.persistent 

296 

297 Selection.__init__( 

298 self, database=database, persistent=persistent) 

299 

300 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

301 

302 self._content_caches = { 

303 'waveform': cache.ContentCache(), 

304 'default': cache.ContentCache()} 

305 

306 self._cache_path = cache_path 

307 

308 self._sources = [] 

309 self._operators = [] 

310 self._operator_registry = {} 

311 

312 self._pending_orders = [] 

313 

314 self._pile = None 

315 self._n_choppers_active = 0 

316 

317 self._names.update({ 

318 'nuts': self.name + '_nuts', 

319 'kind_codes_count': self.name + '_kind_codes_count', 

320 'coverage': self.name + '_coverage'}) 

321 

322 with self.transaction('create tables') as cursor: 

323 self._create_tables_squirrel(cursor) 

324 

325 def _create_tables_squirrel(self, cursor): 

326 

327 cursor.execute(self._register_table(self._sql( 

328 ''' 

329 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

330 nut_id integer PRIMARY KEY, 

331 file_id integer, 

332 file_segment integer, 

333 file_element integer, 

334 kind_id integer, 

335 kind_codes_id integer, 

336 tmin_seconds integer, 

337 tmin_offset integer, 

338 tmax_seconds integer, 

339 tmax_offset integer, 

340 kscale integer) 

341 '''))) 

342 

343 cursor.execute(self._register_table(self._sql( 

344 ''' 

345 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

346 kind_codes_id integer PRIMARY KEY, 

347 count integer) 

348 '''))) 

349 

350 cursor.execute(self._sql( 

351 ''' 

352 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

353 ON %(nuts)s (file_id, file_segment, file_element) 

354 ''')) 

355 

356 cursor.execute(self._sql( 

357 ''' 

358 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

359 ON %(nuts)s (file_id) 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

365 ON %(nuts)s (kind_id, tmin_seconds) 

366 ''')) 

367 

368 cursor.execute(self._sql( 

369 ''' 

370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

371 ON %(nuts)s (kind_id, tmax_seconds) 

372 ''')) 

373 

374 cursor.execute(self._sql( 

375 ''' 

376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

377 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

383 BEFORE DELETE ON main.files FOR EACH ROW 

384 BEGIN 

385 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

386 END 

387 ''')) 

388 

389 # trigger only on size to make silent update of mtime possible 

390 cursor.execute(self._sql( 

391 ''' 

392 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

393 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

394 BEGIN 

395 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

396 END 

397 ''')) 

398 

399 cursor.execute(self._sql( 

400 ''' 

401 CREATE TRIGGER IF NOT EXISTS 

402 %(db)s.%(file_states)s_delete_files 

403 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

404 BEGIN 

405 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

406 END 

407 ''')) 

408 

409 cursor.execute(self._sql( 

410 ''' 

411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

412 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

413 BEGIN 

414 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

415 (new.kind_codes_id, 0); 

416 UPDATE %(kind_codes_count)s 

417 SET count = count + 1 

418 WHERE new.kind_codes_id 

419 == %(kind_codes_count)s.kind_codes_id; 

420 END 

421 ''')) 

422 

423 cursor.execute(self._sql( 

424 ''' 

425 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

426 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

427 BEGIN 

428 UPDATE %(kind_codes_count)s 

429 SET count = count - 1 

430 WHERE old.kind_codes_id 

431 == %(kind_codes_count)s.kind_codes_id; 

432 END 

433 ''')) 

434 

435 cursor.execute(self._register_table(self._sql( 

436 ''' 

437 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

438 kind_codes_id integer, 

439 time_seconds integer, 

440 time_offset integer, 

441 step integer) 

442 '''))) 

443 

444 cursor.execute(self._sql( 

445 ''' 

446 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

447 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

448 ''')) 

449 

450 cursor.execute(self._sql( 

451 ''' 

452 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

453 AFTER INSERT ON %(nuts)s FOR EACH ROW 

454 BEGIN 

455 INSERT OR IGNORE INTO %(coverage)s VALUES 

456 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

457 ; 

458 UPDATE %(coverage)s 

459 SET step = step + 1 

460 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

461 AND new.tmin_seconds == %(coverage)s.time_seconds 

462 AND new.tmin_offset == %(coverage)s.time_offset 

463 ; 

464 INSERT OR IGNORE INTO %(coverage)s VALUES 

465 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

466 ; 

467 UPDATE %(coverage)s 

468 SET step = step - 1 

469 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

470 AND new.tmax_seconds == %(coverage)s.time_seconds 

471 AND new.tmax_offset == %(coverage)s.time_offset 

472 ; 

473 DELETE FROM %(coverage)s 

474 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

475 AND new.tmin_seconds == %(coverage)s.time_seconds 

476 AND new.tmin_offset == %(coverage)s.time_offset 

477 AND step == 0 

478 ; 

479 DELETE FROM %(coverage)s 

480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

481 AND new.tmax_seconds == %(coverage)s.time_seconds 

482 AND new.tmax_offset == %(coverage)s.time_offset 

483 AND step == 0 

484 ; 

485 END 

486 ''')) 

487 

488 cursor.execute(self._sql( 

489 ''' 

490 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

491 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

492 BEGIN 

493 INSERT OR IGNORE INTO %(coverage)s VALUES 

494 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

495 ; 

496 UPDATE %(coverage)s 

497 SET step = step - 1 

498 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

499 AND old.tmin_seconds == %(coverage)s.time_seconds 

500 AND old.tmin_offset == %(coverage)s.time_offset 

501 ; 

502 INSERT OR IGNORE INTO %(coverage)s VALUES 

503 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

504 ; 

505 UPDATE %(coverage)s 

506 SET step = step + 1 

507 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

508 AND old.tmax_seconds == %(coverage)s.time_seconds 

509 AND old.tmax_offset == %(coverage)s.time_offset 

510 ; 

511 DELETE FROM %(coverage)s 

512 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

513 AND old.tmin_seconds == %(coverage)s.time_seconds 

514 AND old.tmin_offset == %(coverage)s.time_offset 

515 AND step == 0 

516 ; 

517 DELETE FROM %(coverage)s 

518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

519 AND old.tmax_seconds == %(coverage)s.time_seconds 

520 AND old.tmax_offset == %(coverage)s.time_offset 

521 AND step == 0 

522 ; 

523 END 

524 ''')) 

525 

526 def _delete(self): 

527 '''Delete database tables associated with this Squirrel.''' 

528 

529 with self.transaction('delete tables') as cursor: 

530 for s in ''' 

531 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

532 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

533 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

534 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

535 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

536 DROP TABLE %(db)s.%(nuts)s; 

537 DROP TABLE %(db)s.%(kind_codes_count)s; 

538 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

539 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

540 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

541 '''.strip().splitlines(): 

542 

543 cursor.execute(self._sql(s)) 

544 

545 Selection._delete(self) 

546 

547 @filldocs 

548 def add(self, 

549 paths, 

550 kinds=None, 

551 format='detect', 

552 include=None, 

553 exclude=None, 

554 check=True): 

555 

556 ''' 

557 Add files to the selection. 

558 

559 :param paths: 

560 Iterator yielding paths to files or directories to be added to the 

561 selection. Recurses into directories. If given a ``str``, it 

562 is treated as a single path to be added. 

563 :type paths: 

564 :py:class:`list` of :py:class:`str` 

565 

566 :param kinds: 

567 Content types to be made available through the Squirrel selection. 

568 By default, all known content types are accepted. 

569 :type kinds: 

570 :py:class:`list` of :py:class:`str` 

571 

572 :param format: 

573 File format identifier or ``'detect'`` to enable auto-detection 

574 (available: %(file_formats)s). 

575 :type format: 

576 str 

577 

578 :param include: 

579 If not ``None``, files are only included if their paths match the 

580 given regular expression pattern. 

581 :type format: 

582 str 

583 

584 :param exclude: 

585 If not ``None``, files are only included if their paths do not 

586 match the given regular expression pattern. 

587 :type format: 

588 str 

589 

590 :param check: 

591 If ``True``, all file modification times are checked to see if 

592 cached information has to be updated (slow). If ``False``, only 

593 previously unknown files are indexed and cached information is used 

594 for known files, regardless of file state (fast, corrresponds to 

595 Squirrel's ``--optimistic`` mode). File deletions will go 

596 undetected in the latter case. 

597 :type check: 

598 bool 

599 

600 :Complexity: 

601 O(log N) 

602 ''' 

603 

604 if isinstance(kinds, str): 

605 kinds = (kinds,) 

606 

607 if isinstance(paths, str): 

608 paths = [paths] 

609 

610 kind_mask = model.to_kind_mask(kinds) 

611 

612 with progress.view(): 

613 Selection.add( 

614 self, util.iter_select_files( 

615 paths, 

616 show_progress=False, 

617 include=include, 

618 exclude=exclude, 

619 pass_through=lambda path: path.startswith('virtual:') 

620 ), kind_mask, format) 

621 

622 self._load(check) 

623 self._update_nuts() 

624 

625 def reload(self): 

626 ''' 

627 Check for modifications and reindex modified files. 

628 

629 Based on file modification times. 

630 ''' 

631 

632 self._set_file_states_force_check() 

633 self._load(check=True) 

634 self._update_nuts() 

635 

636 def add_virtual(self, nuts, virtual_paths=None): 

637 ''' 

638 Add content which is not backed by files. 

639 

640 :param nuts: 

641 Content pieces to be added. 

642 :type nuts: 

643 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

644 

645 :param virtual_paths: 

646 List of virtual paths to prevent creating a temporary list of the 

647 nuts while aggregating the file paths for the selection. 

648 :type virtual_paths: 

649 :py:class:`list` of :py:class:`str` 

650 

651 Stores to the main database and the selection. 

652 ''' 

653 

654 if isinstance(virtual_paths, str): 

655 virtual_paths = [virtual_paths] 

656 

657 if virtual_paths is None: 

658 if not isinstance(nuts, list): 

659 nuts = list(nuts) 

660 virtual_paths = set(nut.file_path for nut in nuts) 

661 

662 Selection.add(self, virtual_paths) 

663 self.get_database().dig(nuts) 

664 self._update_nuts() 

665 

666 def add_volatile(self, nuts): 

667 if not isinstance(nuts, list): 

668 nuts = list(nuts) 

669 

670 paths = list(set(nut.file_path for nut in nuts)) 

671 io.backends.virtual.add_nuts(nuts) 

672 self.add_virtual(nuts, paths) 

673 self._volatile_paths.extend(paths) 

674 

675 def add_volatile_waveforms(self, traces): 

676 ''' 

677 Add in-memory waveforms which will be removed when the app closes. 

678 ''' 

679 

680 name = model.random_name() 

681 

682 path = 'virtual:volatile:%s' % name 

683 

684 nuts = [] 

685 for itr, tr in enumerate(traces): 

686 assert tr.tmin <= tr.tmax 

687 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

688 tmax_seconds, tmax_offset = model.tsplit( 

689 tr.tmin + tr.data_len()*tr.deltat) 

690 

691 nuts.append(model.Nut( 

692 file_path=path, 

693 file_format='virtual', 

694 file_segment=itr, 

695 file_element=0, 

696 file_mtime=0, 

697 codes=tr.codes, 

698 tmin_seconds=tmin_seconds, 

699 tmin_offset=tmin_offset, 

700 tmax_seconds=tmax_seconds, 

701 tmax_offset=tmax_offset, 

702 deltat=tr.deltat, 

703 kind_id=to_kind_id('waveform'), 

704 content=tr)) 

705 

706 self.add_volatile(nuts) 

707 return path 

708 

709 def _load(self, check): 

710 for _ in io.iload( 

711 self, 

712 content=[], 

713 skip_unchanged=True, 

714 check=check): 

715 pass 

716 

717 def _update_nuts(self, transaction=None): 

718 transaction = transaction or self.transaction('update nuts') 

719 with make_task('Aggregating selection') as task, \ 

720 transaction as cursor: 

721 

722 self._conn.set_progress_handler(task.update, 100000) 

723 nrows = cursor.execute(self._sql( 

724 ''' 

725 INSERT INTO %(db)s.%(nuts)s 

726 SELECT NULL, 

727 nuts.file_id, nuts.file_segment, nuts.file_element, 

728 nuts.kind_id, nuts.kind_codes_id, 

729 nuts.tmin_seconds, nuts.tmin_offset, 

730 nuts.tmax_seconds, nuts.tmax_offset, 

731 nuts.kscale 

732 FROM %(db)s.%(file_states)s 

733 INNER JOIN nuts 

734 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

735 INNER JOIN kind_codes 

736 ON nuts.kind_codes_id == 

737 kind_codes.kind_codes_id 

738 WHERE %(db)s.%(file_states)s.file_state != 2 

739 AND (((1 << kind_codes.kind_id) 

740 & %(db)s.%(file_states)s.kind_mask) != 0) 

741 ''')).rowcount 

742 

743 task.update(nrows) 

744 self._set_file_states_known(transaction) 

745 self._conn.set_progress_handler(None, 0) 

746 

747 def add_source(self, source, check=True): 

748 ''' 

749 Add remote resource. 

750 

751 :param source: 

752 Remote data access client instance. 

753 :type source: 

754 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

755 ''' 

756 

757 self._sources.append(source) 

758 source.setup(self, check=check) 

759 

760 def add_fdsn(self, *args, **kwargs): 

761 ''' 

762 Add FDSN site for transparent remote data access. 

763 

764 Arguments are passed to 

765 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

766 ''' 

767 

768 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

769 

770 def add_catalog(self, *args, **kwargs): 

771 ''' 

772 Add online catalog for transparent event data access. 

773 

774 Arguments are passed to 

775 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

776 ''' 

777 

778 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

779 

780 def add_dataset(self, ds, check=True): 

781 ''' 

782 Read dataset description from file and add its contents. 

783 

784 :param ds: 

785 Path to dataset description file or dataset description object 

786 . See :py:mod:`~pyrocko.squirrel.dataset`. 

787 :type ds: 

788 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

789 

790 :param check: 

791 If ``True``, all file modification times are checked to see if 

792 cached information has to be updated (slow). If ``False``, only 

793 previously unknown files are indexed and cached information is used 

794 for known files, regardless of file state (fast, corrresponds to 

795 Squirrel's ``--optimistic`` mode). File deletions will go 

796 undetected in the latter case. 

797 :type check: 

798 bool 

799 ''' 

800 if isinstance(ds, str): 

801 ds = dataset.read_dataset(ds) 

802 

803 ds.setup(self, check=check) 

804 

805 def _get_selection_args( 

806 self, kind_id, 

807 obj=None, tmin=None, tmax=None, time=None, codes=None): 

808 

809 if codes is not None: 

810 codes = codes_patterns_for_kind(kind_id, codes) 

811 

812 if time is not None: 

813 tmin = time 

814 tmax = time 

815 

816 if obj is not None: 

817 tmin = tmin if tmin is not None else obj.tmin 

818 tmax = tmax if tmax is not None else obj.tmax 

819 codes = codes if codes is not None else codes_patterns_for_kind( 

820 kind_id, obj.codes) 

821 

822 return tmin, tmax, codes 

823 

824 def _get_selection_args_str(self, *args, **kwargs): 

825 

826 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

827 return 'tmin: %s, tmax: %s, codes: %s' % ( 

828 util.time_to_str(tmin) if tmin is not None else 'none', 

829 util.time_to_str(tmax) if tmax is not None else 'none', 

830 ','.join(str(entry) for entry in codes)) 

831 

832 def _selection_args_to_kwargs( 

833 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

834 

835 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

836 

837 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

838 

839 tmin_seconds, tmin_offset = model.tsplit(tmin) 

840 tmax_seconds, tmax_offset = model.tsplit(tmax) 

841 if naiv: 

842 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

843 args.append(tmax_seconds) 

844 else: 

845 tscale_edges = model.tscale_edges 

846 tmin_cond = [] 

847 for kscale in range(tscale_edges.size + 1): 

848 if kscale != tscale_edges.size: 

849 tscale = int(tscale_edges[kscale]) 

850 tmin_cond.append(''' 

851 (%(db)s.%(nuts)s.kind_id = ? 

852 AND %(db)s.%(nuts)s.kscale == ? 

853 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

854 ''') 

855 args.extend( 

856 (to_kind_id(kind), kscale, 

857 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

858 

859 else: 

860 tmin_cond.append(''' 

861 (%(db)s.%(nuts)s.kind_id == ? 

862 AND %(db)s.%(nuts)s.kscale == ? 

863 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

864 ''') 

865 

866 args.extend( 

867 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

868 if tmin_cond: 

869 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

870 

871 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

872 args.append(tmin_seconds) 

873 

874 def _codes_match_sql(self, kind_id, codes, cond, args): 

875 pats = codes_patterns_for_kind(kind_id, codes) 

876 if pats is None: 

877 return 

878 

879 pats_exact = [] 

880 pats_nonexact = [] 

881 for pat in pats: 

882 spat = pat.safe_str 

883 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

884 

885 cond_exact = None 

886 if pats_exact: 

887 cond_exact = ' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

888 '?'*len(pats_exact)) 

889 

890 args.extend(pats_exact) 

891 

892 cond_nonexact = None 

893 if pats_nonexact: 

894 cond_nonexact = ' ( %s ) ' % ' OR '.join( 

895 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)) 

896 

897 args.extend(pats_nonexact) 

898 

899 if cond_exact and cond_nonexact: 

900 cond.append(' ( %s OR %s ) ' % (cond_exact, cond_nonexact)) 

901 

902 elif cond_exact: 

903 cond.append(cond_exact) 

904 

905 elif cond_nonexact: 

906 cond.append(cond_nonexact) 

907 

908 def iter_nuts( 

909 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, 

910 kind_codes_ids=None, path=None, limit=None): 

911 

912 ''' 

913 Iterate over content entities matching given constraints. 

914 

915 :param kind: 

916 Content kind (or kinds) to extract. 

917 :type kind: 

918 :py:class:`str`, :py:class:`list` of :py:class:`str` 

919 

920 :param tmin: 

921 Start time of query interval. 

922 :type tmin: 

923 timestamp 

924 

925 :param tmax: 

926 End time of query interval. 

927 :type tmax: 

928 timestamp 

929 

930 :param codes: 

931 List of code patterns to query. 

932 :type codes: 

933 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

934 objects appropriate for the queried content type, or anything which 

935 can be converted to such objects. 

936 

937 :param naiv: 

938 Bypass time span lookup through indices (slow, for testing). 

939 :type naiv: 

940 :py:class:`bool` 

941 

942 :param kind_codes_ids: 

943 Kind-codes IDs of contents to be retrieved (internal use). 

944 :type kind_codes_ids: 

945 :py:class:`list` of :py:class:`int` 

946 

947 :yields: 

948 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

949 intersecting content. 

950 

951 :complexity: 

952 O(log N) for the time selection part due to heavy use of database 

953 indices. 

954 

955 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

956 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

957 closed-interval so that content intersecting with the time instant ``t 

958 = tmin = tmax`` is returned (otherwise nothing would be returned as 

959 ``[t, t)`` never matches anything). 

960 

961 Time spans of content entities to be matched are also treated as half 

962 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

963 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

964 modified to closed-interval when the content time span is an empty 

965 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

966 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

967 ''' 

968 

969 if not isinstance(kind, str): 

970 if kind is None: 

971 kind = model.g_content_kinds 

972 for kind_ in kind: 

973 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

974 yield nut 

975 

976 return 

977 

978 kind_id = to_kind_id(kind) 

979 

980 cond = [] 

981 args = [] 

982 if tmin is not None or tmax is not None: 

983 assert kind is not None 

984 if tmin is None: 

985 tmin = self.get_time_span()[0] 

986 if tmax is None: 

987 tmax = self.get_time_span()[1] + 1.0 

988 

989 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

990 

991 cond.append('kind_codes.kind_id == ?') 

992 args.append(kind_id) 

993 

994 if codes is not None: 

995 self._codes_match_sql(kind_id, codes, cond, args) 

996 

997 if kind_codes_ids is not None: 

998 cond.append( 

999 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1000 '?'*len(kind_codes_ids))) 

1001 

1002 args.extend(kind_codes_ids) 

1003 

1004 db = self.get_database() 

1005 if path is not None: 

1006 cond.append('files.path == ?') 

1007 args.append(db.relpath(abspath(path))) 

1008 

1009 sql = (''' 

1010 SELECT 

1011 files.path, 

1012 files.format, 

1013 files.mtime, 

1014 files.size, 

1015 %(db)s.%(nuts)s.file_segment, 

1016 %(db)s.%(nuts)s.file_element, 

1017 kind_codes.kind_id, 

1018 kind_codes.codes, 

1019 %(db)s.%(nuts)s.tmin_seconds, 

1020 %(db)s.%(nuts)s.tmin_offset, 

1021 %(db)s.%(nuts)s.tmax_seconds, 

1022 %(db)s.%(nuts)s.tmax_offset, 

1023 kind_codes.deltat 

1024 FROM files 

1025 INNER JOIN %(db)s.%(nuts)s 

1026 ON files.file_id == %(db)s.%(nuts)s.file_id 

1027 INNER JOIN kind_codes 

1028 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1029 ''') 

1030 

1031 if cond: 

1032 sql += ''' WHERE ''' + ' AND '.join(cond) 

1033 

1034 if limit is not None: 

1035 sql += ''' LIMIT %i''' % limit 

1036 

1037 sql = self._sql(sql) 

1038 if tmin is None and tmax is None: 

1039 for row in self._conn.execute(sql, args): 

1040 row = (db.abspath(row[0]),) + row[1:] 

1041 nut = model.Nut(values_nocheck=row) 

1042 yield nut 

1043 else: 

1044 assert tmin is not None and tmax is not None 

1045 if tmin == tmax: 

1046 for row in self._conn.execute(sql, args): 

1047 row = (db.abspath(row[0]),) + row[1:] 

1048 nut = model.Nut(values_nocheck=row) 

1049 if (nut.tmin <= tmin < nut.tmax) \ 

1050 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1051 

1052 yield nut 

1053 else: 

1054 for row in self._conn.execute(sql, args): 

1055 row = (db.abspath(row[0]),) + row[1:] 

1056 nut = model.Nut(values_nocheck=row) 

1057 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1058 or (nut.tmin == nut.tmax 

1059 and tmin <= nut.tmin < tmax): 

1060 

1061 yield nut 

1062 

1063 def get_nuts(self, *args, **kwargs): 

1064 ''' 

1065 Get content entities matching given constraints. 

1066 

1067 Like :py:meth:`iter_nuts` but returns results as a list. 

1068 ''' 

1069 

1070 return list(self.iter_nuts(*args, **kwargs)) 

1071 

1072 def _split_nuts( 

1073 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1074 

1075 kind_id = to_kind_id(kind) 

1076 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1077 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1078 

1079 names_main_nuts = dict(self._names) 

1080 names_main_nuts.update(db='main', nuts='nuts') 

1081 

1082 db = self.get_database() 

1083 

1084 def main_nuts(s): 

1085 return s % names_main_nuts 

1086 

1087 with self.transaction('split nuts') as cursor: 

1088 # modify selection and main 

1089 for sql_subst in [ 

1090 self._sql, main_nuts]: 

1091 

1092 cond = [] 

1093 args = [] 

1094 

1095 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1096 

1097 if codes is not None: 

1098 self._codes_match_sql(kind_id, codes, cond, args) 

1099 

1100 if path is not None: 

1101 cond.append('files.path == ?') 

1102 args.append(db.relpath(abspath(path))) 

1103 

1104 sql = sql_subst(''' 

1105 SELECT 

1106 %(db)s.%(nuts)s.nut_id, 

1107 %(db)s.%(nuts)s.tmin_seconds, 

1108 %(db)s.%(nuts)s.tmin_offset, 

1109 %(db)s.%(nuts)s.tmax_seconds, 

1110 %(db)s.%(nuts)s.tmax_offset, 

1111 kind_codes.deltat 

1112 FROM files 

1113 INNER JOIN %(db)s.%(nuts)s 

1114 ON files.file_id == %(db)s.%(nuts)s.file_id 

1115 INNER JOIN kind_codes 

1116 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1117 WHERE ''' + ' AND '.join(cond)) # noqa 

1118 

1119 insert = [] 

1120 delete = [] 

1121 for row in cursor.execute(sql, args): 

1122 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1123 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1124 

1125 nut_tmin = model.tjoin( 

1126 nut_tmin_seconds, nut_tmin_offset) 

1127 nut_tmax = model.tjoin( 

1128 nut_tmax_seconds, nut_tmax_offset) 

1129 

1130 if nut_tmin < tmax and tmin < nut_tmax: 

1131 if nut_tmin < tmin: 

1132 insert.append(( 

1133 nut_tmin_seconds, nut_tmin_offset, 

1134 tmin_seconds, tmin_offset, 

1135 model.tscale_to_kscale( 

1136 tmin_seconds - nut_tmin_seconds), 

1137 nut_id)) 

1138 

1139 if tmax < nut_tmax: 

1140 insert.append(( 

1141 tmax_seconds, tmax_offset, 

1142 nut_tmax_seconds, nut_tmax_offset, 

1143 model.tscale_to_kscale( 

1144 nut_tmax_seconds - tmax_seconds), 

1145 nut_id)) 

1146 

1147 delete.append((nut_id,)) 

1148 

1149 sql_add = ''' 

1150 INSERT INTO %(db)s.%(nuts)s ( 

1151 file_id, file_segment, file_element, kind_id, 

1152 kind_codes_id, tmin_seconds, tmin_offset, 

1153 tmax_seconds, tmax_offset, kscale ) 

1154 SELECT 

1155 file_id, file_segment, file_element, 

1156 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1157 FROM %(db)s.%(nuts)s 

1158 WHERE nut_id == ? 

1159 ''' 

1160 cursor.executemany(sql_subst(sql_add), insert) 

1161 

1162 sql_delete = ''' 

1163 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1164 ''' 

1165 cursor.executemany(sql_subst(sql_delete), delete) 

1166 

1167 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1168 ''' 

1169 Get time interval over all content in selection. 

1170 

1171 :param kinds: 

1172 If not ``None``, restrict query to given content kinds. 

1173 :type kind: 

1174 list of str 

1175 

1176 :complexity: 

1177 O(1), independent of the number of nuts. 

1178 

1179 :returns: 

1180 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1181 ''' 

1182 

1183 sql_min = self._sql(''' 

1184 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1185 FROM %(db)s.%(nuts)s 

1186 WHERE kind_id == ? 

1187 AND tmin_seconds == ( 

1188 SELECT MIN(tmin_seconds) 

1189 FROM %(db)s.%(nuts)s 

1190 WHERE kind_id == ?) 

1191 ''') 

1192 

1193 sql_max = self._sql(''' 

1194 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1195 FROM %(db)s.%(nuts)s 

1196 WHERE kind_id == ? 

1197 AND tmax_seconds == ( 

1198 SELECT MAX(tmax_seconds) 

1199 FROM %(db)s.%(nuts)s 

1200 WHERE kind_id == ?) 

1201 ''') 

1202 

1203 gtmin = None 

1204 gtmax = None 

1205 

1206 if isinstance(kinds, str): 

1207 kinds = [kinds] 

1208 

1209 if kinds is None: 

1210 kind_ids = model.g_content_kind_ids 

1211 else: 

1212 kind_ids = model.to_kind_ids(kinds) 

1213 

1214 tmins = [] 

1215 tmaxs = [] 

1216 for kind_id in kind_ids: 

1217 for tmin_seconds, tmin_offset in self._conn.execute( 

1218 sql_min, (kind_id, kind_id)): 

1219 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1220 

1221 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1222 sql_max, (kind_id, kind_id)): 

1223 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1224 

1225 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1226 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1227 

1228 if tight: 

1229 gtmin = nonef(min, tmins) 

1230 gtmax = nonef(max, tmaxs) 

1231 else: 

1232 gtmin = None if None in tmins else nonef(min, tmins) 

1233 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1234 

1235 if dummy_limits: 

1236 if gtmin is None: 

1237 gtmin = model.g_tmin 

1238 if gtmax is None: 

1239 gtmax = model.g_tmax 

1240 

1241 return gtmin, gtmax 

1242 

1243 def has(self, kinds): 

1244 ''' 

1245 Check availability of given content kinds. 

1246 

1247 :param kinds: 

1248 Content kinds to query. 

1249 :type kind: 

1250 list of str 

1251 

1252 :returns: 

1253 ``True`` if any of the queried content kinds is available 

1254 in the selection. 

1255 ''' 

1256 self_tmin, self_tmax = self.get_time_span( 

1257 kinds, dummy_limits=False) 

1258 

1259 return None not in (self_tmin, self_tmax) 

1260 

1261 def get_deltat_span(self, kind): 

1262 ''' 

1263 Get min and max sampling interval of all content of given kind. 

1264 

1265 :param kind: 

1266 Content kind 

1267 :type kind: 

1268 str 

1269 

1270 :returns: ``(deltat_min, deltat_max)`` 

1271 ''' 

1272 

1273 deltats = [ 

1274 deltat for deltat in self.get_deltats(kind) 

1275 if deltat is not None] 

1276 

1277 if deltats: 

1278 return min(deltats), max(deltats) 

1279 else: 

1280 return None, None 

1281 

1282 def iter_kinds(self, codes=None): 

1283 ''' 

1284 Iterate over content types available in selection. 

1285 

1286 :param codes: 

1287 If given, get kinds only for selected codes identifier. 

1288 Only a single identifier may be given here and no pattern matching 

1289 is done, currently. 

1290 :type codes: 

1291 :py:class:`~pyrocko.squirrel.model.Codes` 

1292 

1293 :yields: 

1294 Available content kinds as :py:class:`str`. 

1295 

1296 :complexity: 

1297 O(1), independent of number of nuts. 

1298 ''' 

1299 

1300 return self._database._iter_kinds( 

1301 codes=codes, 

1302 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1303 

1304 def iter_deltats(self, kind=None): 

1305 ''' 

1306 Iterate over sampling intervals available in selection. 

1307 

1308 :param kind: 

1309 If given, get sampling intervals only for a given content type. 

1310 :type kind: 

1311 str 

1312 

1313 :yields: 

1314 :py:class:`float` values. 

1315 

1316 :complexity: 

1317 O(1), independent of number of nuts. 

1318 ''' 

1319 return self._database._iter_deltats( 

1320 kind=kind, 

1321 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1322 

1323 def iter_codes(self, kind=None): 

1324 ''' 

1325 Iterate over content identifier code sequences available in selection. 

1326 

1327 :param kind: 

1328 If given, get codes only for a given content type. 

1329 :type kind: 

1330 str 

1331 

1332 :yields: 

1333 :py:class:`tuple` of :py:class:`str` 

1334 

1335 :complexity: 

1336 O(1), independent of number of nuts. 

1337 ''' 

1338 return self._database._iter_codes( 

1339 kind=kind, 

1340 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1341 

1342 def _iter_codes_info(self, kind=None, codes=None): 

1343 ''' 

1344 Iterate over number of occurrences of any (kind, codes) combination. 

1345 

1346 :param kind: 

1347 If given, get counts only for selected content type. 

1348 :type kind: 

1349 str 

1350 

1351 :yields: 

1352 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1353 

1354 :complexity: 

1355 O(1), independent of number of nuts. 

1356 ''' 

1357 return self._database._iter_codes_info( 

1358 kind=kind, 

1359 codes=codes, 

1360 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1361 

1362 def get_kinds(self, codes=None): 

1363 ''' 

1364 Get content types available in selection. 

1365 

1366 :param codes: 

1367 If given, get kinds only for selected codes identifier. 

1368 Only a single identifier may be given here and no pattern matching 

1369 is done, currently. 

1370 :type codes: 

1371 :py:class:`~pyrocko.squirrel.model.Codes` 

1372 

1373 :returns: 

1374 Sorted list of available content types. 

1375 :rtype: 

1376 py:class:`list` of :py:class:`str` 

1377 

1378 :complexity: 

1379 O(1), independent of number of nuts. 

1380 

1381 ''' 

1382 return sorted(list(self.iter_kinds(codes=codes))) 

1383 

1384 def get_deltats(self, kind=None): 

1385 ''' 

1386 Get sampling intervals available in selection. 

1387 

1388 :param kind: 

1389 If given, get sampling intervals only for selected content type. 

1390 :type kind: 

1391 str 

1392 

1393 :complexity: 

1394 O(1), independent of number of nuts. 

1395 

1396 :returns: Sorted list of available sampling intervals. 

1397 ''' 

1398 return sorted(list(self.iter_deltats(kind=kind))) 

1399 

1400 def get_codes(self, kind=None): 

1401 ''' 

1402 Get identifier code sequences available in selection. 

1403 

1404 :param kind: 

1405 If given, get codes only for selected content type. 

1406 :type kind: 

1407 str 

1408 

1409 :complexity: 

1410 O(1), independent of number of nuts. 

1411 

1412 :returns: Sorted list of available codes as tuples of strings. 

1413 ''' 

1414 return sorted(list(self.iter_codes(kind=kind))) 

1415 

1416 def get_counts(self, kind=None): 

1417 ''' 

1418 Get number of occurrences of any (kind, codes) combination. 

1419 

1420 :param kind: 

1421 If given, get codes only for selected content type. 

1422 :type kind: 

1423 str 

1424 

1425 :complexity: 

1426 O(1), independent of number of nuts. 

1427 

1428 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1429 if kind is not ``None`` 

1430 ''' 

1431 d = {} 

1432 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1433 if kind_id not in d: 

1434 v = d[kind_id] = {} 

1435 else: 

1436 v = d[kind_id] 

1437 

1438 if codes not in v: 

1439 v[codes] = 0 

1440 

1441 v[codes] += count 

1442 

1443 if kind is not None: 

1444 return d[to_kind_id(kind)] 

1445 else: 

1446 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1447 

1448 def glob_codes(self, kind, codes): 

1449 ''' 

1450 Find codes matching given patterns. 

1451 

1452 :param kind: 

1453 Content kind to be queried. 

1454 :type kind: 

1455 str 

1456 

1457 :param codes: 

1458 List of code patterns to query. 

1459 :type codes: 

1460 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1461 objects appropriate for the queried content type, or anything which 

1462 can be converted to such objects. 

1463 

1464 :returns: 

1465 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1466 ''' 

1467 

1468 kind_id = to_kind_id(kind) 

1469 args = [kind_id] 

1470 pats = codes_patterns_for_kind(kind_id, codes) 

1471 

1472 if pats: 

1473 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1474 ('kind_codes.codes GLOB ?',) * len(pats)) 

1475 

1476 args.extend(pat.safe_str for pat in pats) 

1477 else: 

1478 codes_cond = '' 

1479 

1480 sql = self._sql(''' 

1481 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1482 WHERE 

1483 kind_id == ? ''' + codes_cond) 

1484 

1485 return list(map(list, self._conn.execute(sql, args))) 

1486 

1487 def update(self, constraint=None, **kwargs): 

1488 ''' 

1489 Update or partially update channel and event inventories. 

1490 

1491 :param constraint: 

1492 Selection of times or areas to be brought up to date. 

1493 :type constraint: 

1494 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1495 

1496 :param \\*\\*kwargs: 

1497 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1498 

1499 This function triggers all attached remote sources, to check for 

1500 updates in the meta-data. The sources will only submit queries when 

1501 their expiration date has passed, or if the selection spans into 

1502 previously unseen times or areas. 

1503 ''' 

1504 

1505 if constraint is None: 

1506 constraint = client.Constraint(**kwargs) 

1507 

1508 for source in self._sources: 

1509 source.update_channel_inventory(self, constraint) 

1510 source.update_event_inventory(self, constraint) 

1511 

1512 def update_waveform_promises(self, constraint=None, **kwargs): 

1513 ''' 

1514 Permit downloading of remote waveforms. 

1515 

1516 :param constraint: 

1517 Remote waveforms compatible with the given constraint are enabled 

1518 for download. 

1519 :type constraint: 

1520 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1521 

1522 :param \\*\\*kwargs: 

1523 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1524 

1525 Calling this method permits Squirrel to download waveforms from remote 

1526 sources when processing subsequent waveform requests. This works by 

1527 inserting so called waveform promises into the database. It will look 

1528 into the available channels for each remote source and create a promise 

1529 for each channel compatible with the given constraint. If the promise 

1530 then matches in a waveform request, Squirrel tries to download the 

1531 waveform. If the download is successful, the downloaded waveform is 

1532 added to the Squirrel and the promise is deleted. If the download 

1533 fails, the promise is kept if the reason of failure looks like being 

1534 temporary, e.g. because of a network failure. If the cause of failure 

1535 however seems to be permanent, the promise is deleted so that no 

1536 further attempts are made to download a waveform which might not be 

1537 available from that server at all. To force re-scheduling after a 

1538 permanent failure, call :py:meth:`update_waveform_promises` 

1539 yet another time. 

1540 ''' 

1541 

1542 if constraint is None: 

1543 constraint = client.Constraint(**kwargs) 

1544 

1545 for source in self._sources: 

1546 source.update_waveform_promises(self, constraint) 

1547 

1548 def remove_waveform_promises(self, from_database='selection'): 

1549 ''' 

1550 Remove waveform promises from live selection or global database. 

1551 

1552 Calling this function removes all waveform promises provided by the 

1553 attached sources. 

1554 

1555 :param from_database: 

1556 Remove from live selection ``'selection'`` or global database 

1557 ``'global'``. 

1558 ''' 

1559 for source in self._sources: 

1560 source.remove_waveform_promises(self, from_database=from_database) 

1561 

1562 def update_responses(self, constraint=None, **kwargs): 

1563 if constraint is None: 

1564 constraint = client.Constraint(**kwargs) 

1565 

1566 for source in self._sources: 

1567 source.update_response_inventory(self, constraint) 

1568 

1569 def get_nfiles(self): 

1570 ''' 

1571 Get number of files in selection. 

1572 ''' 

1573 

1574 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1575 for row in self._conn.execute(sql): 

1576 return row[0] 

1577 

1578 def get_nnuts(self): 

1579 ''' 

1580 Get number of nuts in selection. 

1581 ''' 

1582 

1583 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1584 for row in self._conn.execute(sql): 

1585 return row[0] 

1586 

1587 def get_total_size(self): 

1588 ''' 

1589 Get aggregated file size available in selection. 

1590 ''' 

1591 

1592 sql = self._sql(''' 

1593 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1594 INNER JOIN files 

1595 ON %(db)s.%(file_states)s.file_id = files.file_id 

1596 ''') 

1597 

1598 for row in self._conn.execute(sql): 

1599 return row[0] or 0 

1600 

1601 def get_stats(self): 

1602 ''' 

1603 Get statistics on contents available through this selection. 

1604 ''' 

1605 

1606 kinds = self.get_kinds() 

1607 time_spans = {} 

1608 for kind in kinds: 

1609 time_spans[kind] = self.get_time_span([kind]) 

1610 

1611 return SquirrelStats( 

1612 nfiles=self.get_nfiles(), 

1613 nnuts=self.get_nnuts(), 

1614 kinds=kinds, 

1615 codes=self.get_codes(), 

1616 total_size=self.get_total_size(), 

1617 counts=self.get_counts(), 

1618 time_spans=time_spans, 

1619 sources=[s.describe() for s in self._sources], 

1620 operators=[op.describe() for op in self._operators]) 

1621 

1622 @filldocs 

1623 def check( 

1624 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1625 ignore=[]): 

1626 ''' 

1627 Check for common data/metadata problems. 

1628 

1629 %(query_args)s 

1630 

1631 :param ignore: 

1632 Problem types to be ignored. 

1633 :type ignore: 

1634 :class:`list` of :class:`str` 

1635 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1636 

1637 :returns: 

1638 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1639 containing the results of the check. 

1640 

1641 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1642 ''' 

1643 

1644 from .check import do_check 

1645 tmin, tmax, codes = self._get_selection_args( 

1646 CHANNEL, obj, tmin, tmax, time, codes) 

1647 

1648 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1649 

1650 def get_content( 

1651 self, 

1652 nut, 

1653 cache_id='default', 

1654 accessor_id='default', 

1655 show_progress=False, 

1656 model='squirrel'): 

1657 

1658 ''' 

1659 Get and possibly load full content for a given index entry from file. 

1660 

1661 Loads the actual content objects (channel, station, waveform, ...) from 

1662 file. For efficiency, sibling content (all stuff in the same file 

1663 segment) will also be loaded as a side effect. The loaded contents are 

1664 cached in the Squirrel object. 

1665 ''' 

1666 

1667 content_cache = self._content_caches[cache_id] 

1668 if not content_cache.has(nut): 

1669 

1670 for nut_loaded in io.iload( 

1671 nut.file_path, 

1672 segment=nut.file_segment, 

1673 format=nut.file_format, 

1674 database=self._database, 

1675 update_selection=self, 

1676 show_progress=show_progress): 

1677 

1678 content_cache.put(nut_loaded) 

1679 

1680 try: 

1681 return content_cache.get(nut, accessor_id, model) 

1682 

1683 except KeyError: 

1684 raise error.NotAvailable( 

1685 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1686 

1687 def advance_accessor(self, accessor_id='default', cache_id=None): 

1688 ''' 

1689 Notify memory caches about consumer moving to a new data batch. 

1690 

1691 :param accessor_id: 

1692 Name of accessing consumer to be advanced. 

1693 :type accessor_id: 

1694 str 

1695 

1696 :param cache_id: 

1697 Name of cache to for which the accessor should be advanced. By 

1698 default the named accessor is advanced in all registered caches. 

1699 By default, two caches named ``'default'`` and ``'waveform'`` are 

1700 available. 

1701 :type cache_id: 

1702 str 

1703 

1704 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1705 Squirrel's memory caching works and can be tuned. Default behaviour is 

1706 to release data when it has not been used in the latest data 

1707 window/batch. If the accessor is never advanced, data is cached 

1708 indefinitely - which is often desired e.g. for station meta-data. 

1709 Methods for consecutive data traversal, like 

1710 :py:meth:`chopper_waveforms` automatically advance and clear 

1711 their accessor. 

1712 ''' 

1713 for cache_ in ( 

1714 self._content_caches.keys() 

1715 if cache_id is None 

1716 else [cache_id]): 

1717 

1718 self._content_caches[cache_].advance_accessor(accessor_id) 

1719 

1720 def clear_accessor(self, accessor_id, cache_id=None): 

1721 ''' 

1722 Notify memory caches about a consumer having finished. 

1723 

1724 :param accessor_id: 

1725 Name of accessor to be cleared. 

1726 :type accessor_id: 

1727 str 

1728 

1729 :param cache_id: 

1730 Name of cache for which the accessor should be cleared. By default 

1731 the named accessor is cleared from all registered caches. By 

1732 default, two caches named ``'default'`` and ``'waveform'`` are 

1733 available. 

1734 :type cache_id: 

1735 str 

1736 

1737 Calling this method clears all references to cache entries held by the 

1738 named accessor. Cache entries are then freed if not referenced by any 

1739 other accessor. 

1740 ''' 

1741 

1742 for cache_ in ( 

1743 self._content_caches.keys() 

1744 if cache_id is None 

1745 else [cache_id]): 

1746 

1747 self._content_caches[cache_].clear_accessor(accessor_id) 

1748 

1749 def get_cache_stats(self, cache_id): 

1750 return self._content_caches[cache_id].get_stats() 

1751 

1752 @filldocs 

1753 def get_stations( 

1754 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1755 model='squirrel'): 

1756 

1757 ''' 

1758 Get stations matching given constraints. 

1759 

1760 %(query_args)s 

1761 

1762 :param model: 

1763 Select object model for returned values: ``'squirrel'`` to get 

1764 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1765 objects with channel information attached. 

1766 :type model: 

1767 str 

1768 

1769 :returns: 

1770 List of :py:class:`pyrocko.squirrel.Station 

1771 <pyrocko.squirrel.model.Station>` objects by default or list of 

1772 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1773 objects if ``model='pyrocko'`` is requested. 

1774 

1775 See :py:meth:`iter_nuts` for details on time span matching. 

1776 ''' 

1777 

1778 if model == 'pyrocko': 

1779 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes) 

1780 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1781 args = self._get_selection_args( 

1782 STATION, obj, tmin, tmax, time, codes) 

1783 

1784 nuts = sorted( 

1785 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1786 

1787 return [self.get_content(nut, model=model) for nut in nuts] 

1788 else: 

1789 raise ValueError('Invalid station model: %s' % model) 

1790 

1791 @filldocs 

1792 def get_channels( 

1793 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1794 model='squirrel'): 

1795 

1796 ''' 

1797 Get channels matching given constraints. 

1798 

1799 %(query_args)s 

1800 

1801 :returns: 

1802 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1803 

1804 See :py:meth:`iter_nuts` for details on time span matching. 

1805 ''' 

1806 

1807 args = self._get_selection_args( 

1808 CHANNEL, obj, tmin, tmax, time, codes) 

1809 

1810 nuts = sorted( 

1811 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1812 

1813 return [self.get_content(nut, model=model) for nut in nuts] 

1814 

1815 @filldocs 

1816 def get_sensors( 

1817 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1818 

1819 ''' 

1820 Get sensors matching given constraints. 

1821 

1822 %(query_args)s 

1823 

1824 :returns: 

1825 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1826 

1827 See :py:meth:`iter_nuts` for details on time span matching. 

1828 ''' 

1829 

1830 tmin, tmax, codes = self._get_selection_args( 

1831 CHANNEL, obj, tmin, tmax, time, codes) 

1832 

1833 if codes is not None: 

1834 codes = codes_patterns_list( 

1835 (entry.replace(channel=entry.channel[:-1] + '?') 

1836 if entry.channel != '*' else entry) 

1837 for entry in codes) 

1838 

1839 nuts = sorted( 

1840 self.iter_nuts( 

1841 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1842 

1843 return [ 

1844 sensor for sensor in model.Sensor.from_channels( 

1845 self.get_content(nut) for nut in nuts) 

1846 if match_time_span(tmin, tmax, sensor)] 

1847 

1848 @filldocs 

1849 def get_responses( 

1850 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1851 model='squirrel'): 

1852 

1853 ''' 

1854 Get instrument responses matching given constraints. 

1855 

1856 %(query_args)s 

1857 

1858 :param model: 

1859 Select data model for returned objects. Choices: ``'squirrel'``, 

1860 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1861 :type model: 

1862 str 

1863 

1864 :returns: 

1865 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1866 'squirrel'`` or list of :py:class:`~pyrocko.io.fdsn.FDSNStationXML` 

1867 if ``model == 'stationxml'`` or list of 

1868 (:py:class:`~pyrocko.squirrel.model.Response`, 

1869 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1870 'stationxml+'``. 

1871 

1872 See :py:meth:`iter_nuts` for details on time span matching. 

1873 ''' 

1874 

1875 args = self._get_selection_args( 

1876 RESPONSE, obj, tmin, tmax, time, codes) 

1877 

1878 nuts = sorted( 

1879 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1880 

1881 return [self.get_content(nut, model=model) for nut in nuts] 

1882 

1883 @filldocs 

1884 def get_response( 

1885 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1886 model='squirrel', on_duplicate='raise'): 

1887 

1888 ''' 

1889 Get instrument response matching given constraints. 

1890 

1891 %(query_args)s 

1892 

1893 :param model: 

1894 Select data model for returned object. Choices: ``'squirrel'``, 

1895 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1896 :type model: 

1897 str 

1898 

1899 :param on_duplicate: 

1900 Determines how duplicates/multiple matching responses are handled. 

1901 Choices: ``'raise'`` - raise 

1902 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1903 warning and return first match, ``'ignore'`` - silently return 

1904 first match. 

1905 :type on_duplicate: 

1906 str 

1907 

1908 :returns: 

1909 :py:class:`~pyrocko.squirrel.model.Response` if 

1910 ``model == 'squirrel'`` or 

1911 :py:class:`~pyrocko.io.fdsn.FDSNStationXML` if ``model == 

1912 'stationxml'`` or 

1913 (:py:class:`~pyrocko.squirrel.model.Response`, 

1914 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1915 'stationxml+'``. 

1916 

1917 Same as :py:meth:`get_responses` but returning exactly one response. 

1918 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1919 available. Duplicates are handled according to the ``on_duplicate`` 

1920 argument. 

1921 

1922 See :py:meth:`iter_nuts` for details on time span matching. 

1923 ''' 

1924 

1925 if model == 'stationxml': 

1926 model_ = 'stationxml+' 

1927 else: 

1928 model_ = model 

1929 

1930 responses = self.get_responses( 

1931 obj, tmin, tmax, time, codes, model=model_) 

1932 if len(responses) == 0: 

1933 raise error.NotAvailable( 

1934 'No instrument response available (%s).' 

1935 % self._get_selection_args_str( 

1936 RESPONSE, obj, tmin, tmax, time, codes)) 

1937 

1938 elif len(responses) > 1: 

1939 

1940 if on_duplicate in ('raise', 'warn'): 

1941 if model_ == 'squirrel': 

1942 resps_sq = responses 

1943 elif model_ == 'stationxml+': 

1944 resps_sq = [resp[0] for resp in responses] 

1945 else: 

1946 raise ValueError('Invalid response model: %s' % model) 

1947 

1948 rinfo = ':\n' + '\n'.join( 

1949 ' ' + resp.summary for resp in resps_sq) 

1950 

1951 message = \ 

1952 'Multiple instrument responses matching given ' \ 

1953 'constraints (%s)%s%s' % ( 

1954 self._get_selection_args_str( 

1955 RESPONSE, obj, tmin, tmax, time, codes), 

1956 ' -> using first' if on_duplicate == 'warn' else '', 

1957 rinfo) 

1958 

1959 if on_duplicate == 'raise': 

1960 raise error.Duplicate(message) 

1961 

1962 elif on_duplicate == 'warn': 

1963 logger.warning(message) 

1964 

1965 elif on_duplicate == 'ignore': 

1966 pass 

1967 

1968 else: 

1969 ValueError( 

1970 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1971 

1972 if model == 'stationxml': 

1973 return responses[0][1] 

1974 else: 

1975 return responses[0] 

1976 

1977 @filldocs 

1978 def get_events( 

1979 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1980 

1981 ''' 

1982 Get events matching given constraints. 

1983 

1984 %(query_args)s 

1985 

1986 :returns: 

1987 List of :py:class:`~pyrocko.model.event.Event` objects. 

1988 

1989 See :py:meth:`iter_nuts` for details on time span matching. 

1990 ''' 

1991 

1992 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

1993 nuts = sorted( 

1994 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

1995 

1996 return [self.get_content(nut) for nut in nuts] 

1997 

1998 def _redeem_promises(self, *args, codes_exclude=None, order_only=False): 

1999 

2000 def split_promise(order): 

2001 self._split_nuts( 

2002 'waveform_promise', 

2003 order.tmin, order.tmax, 

2004 codes=order.codes, 

2005 path=order.source_id) 

2006 

2007 tmin, tmax, _ = args 

2008 

2009 waveforms = list(self.iter_nuts('waveform', *args)) 

2010 promises = list(self.iter_nuts('waveform_promise', *args)) 

2011 if codes_exclude is not None: 

2012 promises = [ 

2013 promise for promise in promises 

2014 if promise.codes not in codes_exclude] 

2015 

2016 codes_to_avail = defaultdict(list) 

2017 for nut in waveforms: 

2018 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2019 

2020 def tts(x): 

2021 if isinstance(x, tuple): 

2022 return tuple(tts(e) for e in x) 

2023 elif isinstance(x, list): 

2024 return list(tts(e) for e in x) 

2025 else: 

2026 return util.time_to_str(x) 

2027 

2028 orders = [] 

2029 for promise in promises: 

2030 waveforms_avail = codes_to_avail[promise.codes] 

2031 for block_tmin, block_tmax in blocks( 

2032 max(tmin, promise.tmin), 

2033 min(tmax, promise.tmax), 

2034 promise.deltat): 

2035 

2036 orders.append( 

2037 WaveformOrder( 

2038 source_id=promise.file_path, 

2039 codes=promise.codes, 

2040 tmin=block_tmin, 

2041 tmax=block_tmax, 

2042 deltat=promise.deltat, 

2043 gaps=gaps(waveforms_avail, block_tmin, block_tmax))) 

2044 

2045 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2046 

2047 order_keys_noop = set(order_key(order) for order in orders_noop) 

2048 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2049 logger.info( 

2050 'Waveform orders already satisified with cached/local data: ' 

2051 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2052 

2053 for order in orders_noop: 

2054 split_promise(order) 

2055 

2056 if order_only: 

2057 if orders: 

2058 self._pending_orders.extend(orders) 

2059 logger.info( 

2060 'Enqueuing %i waveform order%s.' 

2061 % len_plural(orders)) 

2062 return 

2063 else: 

2064 if self._pending_orders: 

2065 orders.extend(self._pending_orders) 

2066 logger.info( 

2067 'Adding %i previously enqueued order%s.' 

2068 % len_plural(self._pending_orders)) 

2069 

2070 self._pending_orders = [] 

2071 

2072 source_ids = [] 

2073 sources = {} 

2074 for source in self._sources: 

2075 if isinstance(source, fdsn.FDSNSource): 

2076 source_ids.append(source._source_id) 

2077 sources[source._source_id] = source 

2078 

2079 source_priority = dict( 

2080 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2081 

2082 order_groups = defaultdict(list) 

2083 for order in orders: 

2084 order_groups[order_key(order)].append(order) 

2085 

2086 for k, order_group in order_groups.items(): 

2087 order_group.sort( 

2088 key=lambda order: source_priority[order.source_id]) 

2089 

2090 n_order_groups = len(order_groups) 

2091 

2092 if len(order_groups) != 0 or len(orders) != 0: 

2093 logger.info( 

2094 'Waveform orders standing for download: %i (%i)' 

2095 % (len(order_groups), len(orders))) 

2096 

2097 task = make_task('Waveform orders processed', n_order_groups) 

2098 else: 

2099 task = None 

2100 

2101 def release_order_group(order): 

2102 okey = order_key(order) 

2103 for followup in order_groups[okey]: 

2104 split_promise(followup) 

2105 

2106 del order_groups[okey] 

2107 

2108 if task: 

2109 task.update(n_order_groups - len(order_groups)) 

2110 

2111 def noop(order): 

2112 pass 

2113 

2114 def success(order): 

2115 release_order_group(order) 

2116 split_promise(order) 

2117 

2118 def batch_add(paths): 

2119 self.add(paths) 

2120 

2121 calls = queue.Queue() 

2122 

2123 def enqueue(f): 

2124 def wrapper(*args): 

2125 calls.put((f, args)) 

2126 

2127 return wrapper 

2128 

2129 while order_groups: 

2130 

2131 orders_now = [] 

2132 empty = [] 

2133 for k, order_group in order_groups.items(): 

2134 try: 

2135 orders_now.append(order_group.pop(0)) 

2136 except IndexError: 

2137 empty.append(k) 

2138 

2139 for k in empty: 

2140 del order_groups[k] 

2141 

2142 by_source_id = defaultdict(list) 

2143 for order in orders_now: 

2144 by_source_id[order.source_id].append(order) 

2145 

2146 threads = [] 

2147 for source_id in by_source_id: 

2148 def download(): 

2149 try: 

2150 sources[source_id].download_waveforms( 

2151 by_source_id[source_id], 

2152 success=enqueue(success), 

2153 error_permanent=enqueue(split_promise), 

2154 error_temporary=noop, 

2155 batch_add=enqueue(batch_add)) 

2156 

2157 finally: 

2158 calls.put(None) 

2159 

2160 thread = threading.Thread(target=download) 

2161 thread.start() 

2162 threads.append(thread) 

2163 

2164 ndone = 0 

2165 while ndone < len(threads): 

2166 ret = calls.get() 

2167 if ret is None: 

2168 ndone += 1 

2169 else: 

2170 ret[0](*ret[1]) 

2171 

2172 for thread in threads: 

2173 thread.join() 

2174 

2175 if task: 

2176 task.update(n_order_groups - len(order_groups)) 

2177 

2178 if task: 

2179 task.done() 

2180 

2181 @filldocs 

2182 def get_waveform_nuts( 

2183 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2184 codes_exclude=None, order_only=False): 

2185 

2186 ''' 

2187 Get waveform content entities matching given constraints. 

2188 

2189 %(query_args)s 

2190 

2191 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2192 resolves matching waveform promises (downloads waveforms from remote 

2193 sources). 

2194 

2195 See :py:meth:`iter_nuts` for details on time span matching. 

2196 ''' 

2197 

2198 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2199 self._redeem_promises( 

2200 *args, codes_exclude=codes_exclude, order_only=order_only) 

2201 nuts = sorted( 

2202 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2203 

2204 if codes_exclude is not None: 

2205 nuts = [nut for nut in nuts if nut.codes not in codes_exclude] 

2206 

2207 return nuts 

2208 

2209 @filldocs 

2210 def have_waveforms( 

2211 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2212 

2213 ''' 

2214 Check if any waveforms or waveform promises are available for given 

2215 constraints. 

2216 

2217 %(query_args)s 

2218 ''' 

2219 

2220 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2221 return bool(list( 

2222 self.iter_nuts('waveform', *args, limit=1))) \ 

2223 or bool(list( 

2224 self.iter_nuts('waveform_promise', *args, limit=1))) 

2225 

2226 @filldocs 

2227 def get_waveforms( 

2228 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2229 codes_exclude=None, uncut=False, want_incomplete=True, degap=True, 

2230 maxgap=5, maxlap=None, snap=None, include_last=False, 

2231 load_data=True, accessor_id='default', operator_params=None, 

2232 order_only=False, channel_priorities=None, target_deltat=None): 

2233 

2234 ''' 

2235 Get waveforms matching given constraints. 

2236 

2237 %(query_args)s 

2238 

2239 :param uncut: 

2240 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2241 and to disable degapping/deoverlapping. Returns untouched traces as 

2242 they are read from file segment. File segments are always read in 

2243 their entirety. 

2244 :type uncut: 

2245 bool 

2246 

2247 :param want_incomplete: 

2248 If ``True``, gappy/incomplete traces are included in the result. 

2249 :type want_incomplete: 

2250 bool 

2251 

2252 :param degap: 

2253 If ``True``, connect traces and remove gaps and overlaps. 

2254 :type degap: 

2255 bool 

2256 

2257 :param maxgap: 

2258 Maximum gap size in samples which is filled with interpolated 

2259 samples when ``degap`` is ``True``. 

2260 :type maxgap: 

2261 int 

2262 

2263 :param maxlap: 

2264 Maximum overlap size in samples which is removed when ``degap`` is 

2265 ``True``. 

2266 :type maxlap: 

2267 int 

2268 

2269 :param snap: 

2270 Rounding functions used when computing sample index from time 

2271 instance, for trace start and trace end, respectively. By default, 

2272 ``(round, round)`` is used. 

2273 :type snap: 

2274 tuple of 2 callables 

2275 

2276 :param include_last: 

2277 If ``True``, add one more sample to the returned traces (the sample 

2278 which would be the first sample of a query with ``tmin`` set to the 

2279 current value of ``tmax``). 

2280 :type include_last: 

2281 bool 

2282 

2283 :param load_data: 

2284 If ``True``, waveform data samples are read from files (or cache). 

2285 If ``False``, meta-information-only traces are returned (dummy 

2286 traces with no data samples). 

2287 :type load_data: 

2288 bool 

2289 

2290 :param accessor_id: 

2291 Name of consumer on who's behalf data is accessed. Used in cache 

2292 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2293 to distinguish different points of extraction for the decision of 

2294 when to release cached waveform data. Should be used when data is 

2295 alternately extracted from more than one region / selection. 

2296 :type accessor_id: 

2297 str 

2298 

2299 See :py:meth:`iter_nuts` for details on time span matching. 

2300 

2301 Loaded data is kept in memory (at least) until 

2302 :py:meth:`clear_accessor` has been called or 

2303 :py:meth:`advance_accessor` has been called two consecutive times 

2304 without data being accessed between the two calls (by this accessor). 

2305 Data may still be further kept in the memory cache if held alive by 

2306 consumers with a different ``accessor_id``. 

2307 ''' 

2308 

2309 tmin, tmax, codes = self._get_selection_args( 

2310 WAVEFORM, obj, tmin, tmax, time, codes) 

2311 

2312 if channel_priorities is not None: 

2313 return self._get_waveforms_prioritized( 

2314 tmin=tmin, tmax=tmax, codes=codes, 

2315 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2316 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2317 include_last=include_last, load_data=load_data, 

2318 accessor_id=accessor_id, operator_params=operator_params, 

2319 order_only=order_only, channel_priorities=channel_priorities, 

2320 target_deltat=target_deltat) 

2321 

2322 self_tmin, self_tmax = self.get_time_span( 

2323 ['waveform', 'waveform_promise']) 

2324 

2325 if None in (self_tmin, self_tmax): 

2326 logger.warning( 

2327 'No waveforms available.') 

2328 return [] 

2329 

2330 tmin = tmin if tmin is not None else self_tmin 

2331 tmax = tmax if tmax is not None else self_tmax 

2332 

2333 if codes is not None and len(codes) == 1: 

2334 # TODO: fix for multiple / mixed codes 

2335 operator = self.get_operator(codes[0]) 

2336 if operator is not None: 

2337 return operator.get_waveforms( 

2338 self, codes[0], 

2339 tmin=tmin, tmax=tmax, 

2340 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2341 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2342 include_last=include_last, load_data=load_data, 

2343 accessor_id=accessor_id, params=operator_params) 

2344 

2345 nuts = self.get_waveform_nuts( 

2346 obj, tmin, tmax, time, codes, codes_exclude=codes_exclude, 

2347 order_only=order_only) 

2348 

2349 if order_only: 

2350 return [] 

2351 

2352 if load_data: 

2353 traces = [ 

2354 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2355 

2356 else: 

2357 traces = [ 

2358 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2359 

2360 if uncut: 

2361 return traces 

2362 

2363 if snap is None: 

2364 snap = (round, round) 

2365 

2366 chopped = [] 

2367 for tr in traces: 

2368 if not load_data and tr.ydata is not None: 

2369 tr = tr.copy(data=False) 

2370 tr.ydata = None 

2371 

2372 try: 

2373 chopped.append(tr.chop( 

2374 tmin, tmax, 

2375 inplace=False, 

2376 snap=snap, 

2377 include_last=include_last)) 

2378 

2379 except trace.NoData: 

2380 pass 

2381 

2382 processed = self._process_chopped( 

2383 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2384 

2385 return processed 

2386 

2387 def _get_waveforms_prioritized( 

2388 self, tmin=None, tmax=None, codes=None, 

2389 channel_priorities=None, target_deltat=None, **kwargs): 

2390 

2391 trs_all = [] 

2392 codes_have = set() 

2393 for channel in channel_priorities: 

2394 assert len(channel) == 2 

2395 if codes is not None: 

2396 codes_now = [ 

2397 codes_.replace(channel=channel+'?') for codes_ in codes] 

2398 else: 

2399 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2400 

2401 codes_exclude_now = set( 

2402 codes_.replace(channel=channel+codes_.channel[-1]) 

2403 for codes_ in codes_have) 

2404 

2405 trs = self.get_waveforms( 

2406 tmin=tmin, 

2407 tmax=tmax, 

2408 codes=codes_now, 

2409 codes_exclude=codes_exclude_now, 

2410 **kwargs) 

2411 

2412 codes_have.update(set(tr.codes for tr in trs)) 

2413 trs_all.extend(trs) 

2414 

2415 return trs_all 

2416 

2417 @filldocs 

2418 def chopper_waveforms( 

2419 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2420 tinc=None, tpad=0., 

2421 want_incomplete=True, snap_window=False, 

2422 degap=True, maxgap=5, maxlap=None, 

2423 snap=None, include_last=False, load_data=True, 

2424 accessor_id=None, clear_accessor=True, operator_params=None, 

2425 grouping=None, channel_priorities=None, target_deltat=None): 

2426 

2427 ''' 

2428 Iterate window-wise over waveform archive. 

2429 

2430 %(query_args)s 

2431 

2432 :param tinc: 

2433 Time increment (window shift time) (default uses ``tmax-tmin``). 

2434 :type tinc: 

2435 timestamp 

2436 

2437 :param tpad: 

2438 Padding time appended on either side of the data window (window 

2439 overlap is ``2*tpad``). 

2440 :type tpad: 

2441 timestamp 

2442 

2443 :param want_incomplete: 

2444 If ``True``, gappy/incomplete traces are included in the result. 

2445 :type want_incomplete: 

2446 bool 

2447 

2448 :param snap_window: 

2449 If ``True``, start time windows at multiples of tinc with respect 

2450 to system time zero. 

2451 :type snap_window: 

2452 bool 

2453 

2454 :param degap: 

2455 If ``True``, connect traces and remove gaps and overlaps. 

2456 :type degap: 

2457 bool 

2458 

2459 :param maxgap: 

2460 Maximum gap size in samples which is filled with interpolated 

2461 samples when ``degap`` is ``True``. 

2462 :type maxgap: 

2463 int 

2464 

2465 :param maxlap: 

2466 Maximum overlap size in samples which is removed when ``degap`` is 

2467 ``True``. 

2468 :type maxlap: 

2469 int 

2470 

2471 :param snap: 

2472 Rounding functions used when computing sample index from time 

2473 instance, for trace start and trace end, respectively. By default, 

2474 ``(round, round)`` is used. 

2475 :type snap: 

2476 tuple of 2 callables 

2477 

2478 :param include_last: 

2479 If ``True``, add one more sample to the returned traces (the sample 

2480 which would be the first sample of a query with ``tmin`` set to the 

2481 current value of ``tmax``). 

2482 :type include_last: 

2483 bool 

2484 

2485 :param load_data: 

2486 If ``True``, waveform data samples are read from files (or cache). 

2487 If ``False``, meta-information-only traces are returned (dummy 

2488 traces with no data samples). 

2489 :type load_data: 

2490 bool 

2491 

2492 :param accessor_id: 

2493 Name of consumer on who's behalf data is accessed. Used in cache 

2494 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2495 to distinguish different points of extraction for the decision of 

2496 when to release cached waveform data. Should be used when data is 

2497 alternately extracted from more than one region / selection. 

2498 :type accessor_id: 

2499 str 

2500 

2501 :param clear_accessor: 

2502 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2503 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2504 memory when the generator returns. 

2505 :type clear_accessor: 

2506 bool 

2507 

2508 :param grouping: 

2509 By default, traversal over the data is over time and all matching 

2510 traces of a time window are yielded. Using this option, it is 

2511 possible to traverse the data first by group (e.g. station or 

2512 network) and second by time. This can reduce the number of traces 

2513 in each batch and thus reduce the memory footprint of the process. 

2514 :type grouping: 

2515 :py:class:`~pyrocko.squirrel.operator.Grouping` 

2516 

2517 :yields: 

2518 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2519 extracted time window. 

2520 

2521 See :py:meth:`iter_nuts` for details on time span matching. 

2522 ''' 

2523 

2524 tmin, tmax, codes = self._get_selection_args( 

2525 WAVEFORM, obj, tmin, tmax, time, codes) 

2526 

2527 self_tmin, self_tmax = self.get_time_span( 

2528 ['waveform', 'waveform_promise']) 

2529 

2530 if None in (self_tmin, self_tmax): 

2531 logger.warning( 

2532 'Content has undefined time span. No waveforms and no ' 

2533 'waveform promises?') 

2534 return 

2535 

2536 if snap_window and tinc is not None: 

2537 tmin = tmin if tmin is not None else self_tmin 

2538 tmax = tmax if tmax is not None else self_tmax 

2539 tmin = math.floor(tmin / tinc) * tinc 

2540 tmax = math.ceil(tmax / tinc) * tinc 

2541 else: 

2542 tmin = tmin if tmin is not None else self_tmin + tpad 

2543 tmax = tmax if tmax is not None else self_tmax - tpad 

2544 

2545 tinc = tinc if tinc is not None else tmax - tmin 

2546 

2547 try: 

2548 if accessor_id is None: 

2549 accessor_id = 'chopper%i' % self._n_choppers_active 

2550 

2551 self._n_choppers_active += 1 

2552 

2553 eps = tinc * 1e-6 

2554 if tinc != 0.0: 

2555 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2556 else: 

2557 nwin = 1 

2558 

2559 if grouping is None: 

2560 codes_list = [codes] 

2561 else: 

2562 operator = Operator( 

2563 filtering=CodesPatternFiltering(codes=codes), 

2564 grouping=grouping) 

2565 

2566 available = set(self.get_codes(kind='waveform')) 

2567 available.update(self.get_codes(kind='waveform_promise')) 

2568 operator.update_mappings(sorted(available)) 

2569 

2570 codes_list = [ 

2571 codes_patterns_list(scl) 

2572 for scl in operator.iter_in_codes()] 

2573 

2574 ngroups = len(codes_list) 

2575 for igroup, scl in enumerate(codes_list): 

2576 for iwin in range(nwin): 

2577 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2578 

2579 chopped = self.get_waveforms( 

2580 tmin=wmin-tpad, 

2581 tmax=wmax+tpad, 

2582 codes=scl, 

2583 snap=snap, 

2584 include_last=include_last, 

2585 load_data=load_data, 

2586 want_incomplete=want_incomplete, 

2587 degap=degap, 

2588 maxgap=maxgap, 

2589 maxlap=maxlap, 

2590 accessor_id=accessor_id, 

2591 operator_params=operator_params, 

2592 channel_priorities=channel_priorities, 

2593 target_deltat=target_deltat) 

2594 

2595 self.advance_accessor(accessor_id) 

2596 

2597 yield Batch( 

2598 tmin=wmin, 

2599 tmax=wmax, 

2600 i=iwin, 

2601 n=nwin, 

2602 igroup=igroup, 

2603 ngroups=ngroups, 

2604 traces=chopped) 

2605 

2606 finally: 

2607 self._n_choppers_active -= 1 

2608 if clear_accessor: 

2609 self.clear_accessor(accessor_id, 'waveform') 

2610 

2611 def _process_chopped( 

2612 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2613 

2614 chopped.sort(key=lambda a: a.full_id) 

2615 if degap: 

2616 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2617 

2618 if not want_incomplete: 

2619 chopped_weeded = [] 

2620 for tr in chopped: 

2621 emin = tr.tmin - tmin 

2622 emax = tr.tmax + tr.deltat - tmax 

2623 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2624 chopped_weeded.append(tr) 

2625 

2626 elif degap: 

2627 if (0. < emin <= 5. * tr.deltat 

2628 and -5. * tr.deltat <= emax < 0.): 

2629 

2630 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2631 chopped_weeded.append(tr) 

2632 

2633 chopped = chopped_weeded 

2634 

2635 return chopped 

2636 

2637 def _get_pyrocko_stations( 

2638 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2639 

2640 from pyrocko import model as pmodel 

2641 

2642 if codes is not None: 

2643 codes = codes_patterns_for_kind(STATION, codes) 

2644 

2645 by_nsl = defaultdict(lambda: (list(), list())) 

2646 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2647 sargs = station._get_pyrocko_station_args() 

2648 by_nsl[station.codes.nsl][0].append(sargs) 

2649 

2650 if codes is not None: 

2651 codes = [model.CodesNSLCE(c) for c in codes] 

2652 

2653 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2654 sargs = channel._get_pyrocko_station_args() 

2655 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2656 sargs_list.append(sargs) 

2657 channels_list.append(channel) 

2658 

2659 pstations = [] 

2660 nsls = list(by_nsl.keys()) 

2661 nsls.sort() 

2662 for nsl in nsls: 

2663 sargs_list, channels_list = by_nsl[nsl] 

2664 sargs = util.consistency_merge( 

2665 [('',) + x for x in sargs_list]) 

2666 

2667 by_c = defaultdict(list) 

2668 for ch in channels_list: 

2669 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2670 

2671 chas = list(by_c.keys()) 

2672 chas.sort() 

2673 pchannels = [] 

2674 for cha in chas: 

2675 list_of_cargs = by_c[cha] 

2676 cargs = util.consistency_merge( 

2677 [('',) + x for x in list_of_cargs]) 

2678 pchannels.append(pmodel.Channel(*cargs)) 

2679 

2680 pstations.append( 

2681 pmodel.Station(*sargs, channels=pchannels)) 

2682 

2683 return pstations 

2684 

2685 @property 

2686 def pile(self): 

2687 

2688 ''' 

2689 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2690 

2691 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2692 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2693 but uses the fluffy power of the Squirrel under the hood. 

2694 

2695 This interface can be used as a drop-in replacement for piles which are 

2696 used in existing scripts and programs for efficient waveform data 

2697 access. The Squirrel-based pile scales better for large datasets. Newer 

2698 scripts should use Squirrel's native methods to avoid the emulation 

2699 overhead. 

2700 ''' 

2701 from . import pile 

2702 

2703 if self._pile is None: 

2704 self._pile = pile.Pile(self) 

2705 

2706 return self._pile 

2707 

2708 def snuffle(self): 

2709 ''' 

2710 Look at dataset in Snuffler. 

2711 ''' 

2712 self.pile.snuffle() 

2713 

2714 def _gather_codes_keys(self, kind, gather, selector): 

2715 return set( 

2716 gather(codes) 

2717 for codes in self.iter_codes(kind) 

2718 if selector is None or selector(codes)) 

2719 

2720 def __str__(self): 

2721 return str(self.get_stats()) 

2722 

2723 def get_coverage( 

2724 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2725 

2726 ''' 

2727 Get coverage information. 

2728 

2729 Get information about strips of gapless data coverage. 

2730 

2731 :param kind: 

2732 Content kind to be queried. 

2733 :type kind: 

2734 str 

2735 

2736 :param tmin: 

2737 Start time of query interval. 

2738 :type tmin: 

2739 timestamp 

2740 

2741 :param tmax: 

2742 End time of query interval. 

2743 :type tmax: 

2744 timestamp 

2745 

2746 :param codes: 

2747 If given, restrict query to given content codes patterns. 

2748 :type codes: 

2749 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2750 objects appropriate for the queried content type, or anything which 

2751 can be converted to such objects. 

2752 

2753 :param limit: 

2754 Limit query to return only up to a given maximum number of entries 

2755 per matching time series (without setting this option, very gappy 

2756 data could cause the query to execute for a very long time). 

2757 :type limit: 

2758 int 

2759 

2760 :returns: 

2761 Information about time spans covered by the requested time series 

2762 data. 

2763 :rtype: 

2764 :py:class:`list` of :py:class:`Coverage` objects 

2765 ''' 

2766 

2767 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2768 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2769 kind_id = to_kind_id(kind) 

2770 

2771 codes_info = list(self._iter_codes_info(kind=kind)) 

2772 

2773 kdata_all = [] 

2774 if codes is None: 

2775 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2776 kdata_all.append( 

2777 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2778 

2779 else: 

2780 for codes_entry in codes: 

2781 pattern = to_codes(kind_id, codes_entry) 

2782 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2783 if model.match_codes(pattern, codes_entry): 

2784 kdata_all.append( 

2785 (pattern, kind_codes_id, codes_entry, deltat)) 

2786 

2787 kind_codes_ids = [x[1] for x in kdata_all] 

2788 

2789 counts_at_tmin = {} 

2790 if tmin is not None: 

2791 for nut in self.iter_nuts( 

2792 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2793 

2794 k = nut.codes, nut.deltat 

2795 if k not in counts_at_tmin: 

2796 counts_at_tmin[k] = 0 

2797 

2798 counts_at_tmin[k] += 1 

2799 

2800 coverages = [] 

2801 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2802 entry = [pattern, codes_entry, deltat, None, None, []] 

2803 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2804 sql = self._sql(''' 

2805 SELECT 

2806 time_seconds, 

2807 time_offset 

2808 FROM %(db)s.%(coverage)s 

2809 WHERE 

2810 kind_codes_id == ? 

2811 ORDER BY 

2812 kind_codes_id ''' + order + ''', 

2813 time_seconds ''' + order + ''', 

2814 time_offset ''' + order + ''' 

2815 LIMIT 1 

2816 ''') 

2817 

2818 for row in self._conn.execute(sql, [kind_codes_id]): 

2819 entry[3+i] = model.tjoin(row[0], row[1]) 

2820 

2821 if None in entry[3:5]: 

2822 continue 

2823 

2824 args = [kind_codes_id] 

2825 

2826 sql_time = '' 

2827 if tmin is not None: 

2828 # intentionally < because (== tmin) is queried from nuts 

2829 sql_time += ' AND ( ? < time_seconds ' \ 

2830 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2831 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2832 

2833 if tmax is not None: 

2834 sql_time += ' AND ( time_seconds < ? ' \ 

2835 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2836 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2837 

2838 sql_limit = '' 

2839 if limit is not None: 

2840 sql_limit = ' LIMIT ?' 

2841 args.append(limit) 

2842 

2843 sql = self._sql(''' 

2844 SELECT 

2845 time_seconds, 

2846 time_offset, 

2847 step 

2848 FROM %(db)s.%(coverage)s 

2849 WHERE 

2850 kind_codes_id == ? 

2851 ''' + sql_time + ''' 

2852 ORDER BY 

2853 kind_codes_id, 

2854 time_seconds, 

2855 time_offset 

2856 ''' + sql_limit) 

2857 

2858 rows = list(self._conn.execute(sql, args)) 

2859 

2860 if limit is not None and len(rows) == limit: 

2861 entry[-1] = None 

2862 else: 

2863 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2864 tlast = None 

2865 if tmin is not None: 

2866 entry[-1].append((tmin, counts)) 

2867 tlast = tmin 

2868 

2869 for row in rows: 

2870 t = model.tjoin(row[0], row[1]) 

2871 counts += row[2] 

2872 entry[-1].append((t, counts)) 

2873 tlast = t 

2874 

2875 if tmax is not None and (tlast is None or tlast != tmax): 

2876 entry[-1].append((tmax, counts)) 

2877 

2878 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2879 

2880 return coverages 

2881 

2882 def get_stationxml( 

2883 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2884 level='response', on_error='raise'): 

2885 

2886 ''' 

2887 Get station/channel/response metadata in StationXML representation. 

2888 

2889 %(query_args)s 

2890 

2891 :returns: 

2892 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2893 ''' 

2894 

2895 if level not in ('network', 'station', 'channel', 'response'): 

2896 raise ValueError('Invalid level: %s' % level) 

2897 

2898 tmin, tmax, codes = self._get_selection_args( 

2899 CHANNEL, obj, tmin, tmax, time, codes) 

2900 

2901 def tts(t): 

2902 if t is None: 

2903 return '<none>' 

2904 else: 

2905 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2906 

2907 if on_error == 'ignore': 

2908 def handle_error(exc): 

2909 pass 

2910 

2911 elif on_error == 'warn': 

2912 def handle_error(exc): 

2913 logger.warning(str(exc)) 

2914 

2915 elif on_error == 'raise': 

2916 def handle_error(exc): 

2917 raise exc 

2918 

2919 def use_first(node_type_name, codes, k, group): 

2920 if on_error == 'warn': 

2921 logger.warning( 

2922 'Duplicates for %s %s, %s - %s -> using first' % ( 

2923 node_type_name, 

2924 '.'.join(codes), 

2925 tts(k[0]), tts(k[1]))) 

2926 

2927 return group[0] 

2928 

2929 def deduplicate(node_type_name, codes, nodes): 

2930 groups = defaultdict(list) 

2931 for node in nodes: 

2932 k = (node.start_date, node.end_date) 

2933 groups[k].append(node) 

2934 

2935 return [ 

2936 use_first(node_type_name, codes, k, group) 

2937 for (k, group) in groups.items()] 

2938 

2939 filtering = CodesPatternFiltering(codes=codes) 

2940 

2941 nslcs = list(set( 

2942 codes.nslc for codes in 

2943 filtering.filter(self.get_codes(kind='channel')))) 

2944 

2945 from pyrocko.io import stationxml as sx 

2946 

2947 networks = [] 

2948 for net, stas in prefix_tree(nslcs): 

2949 network = sx.Network(code=net) 

2950 networks.append(network) 

2951 

2952 if level not in ('station', 'channel', 'response'): 

2953 continue 

2954 

2955 for sta, locs in stas: 

2956 stations = self.get_stations( 

2957 tmin=tmin, 

2958 tmax=tmax, 

2959 codes=(net, sta, '*'), 

2960 model='stationxml') 

2961 

2962 if on_error != 'raise': 

2963 stations = deduplicate( 

2964 'Station', (net, sta), stations) 

2965 

2966 errors = sx.check_overlaps( 

2967 'Station', (net, sta), stations) 

2968 

2969 if errors: 

2970 handle_error(error.Duplicate( 

2971 'Overlapping/duplicate station info:\n %s' 

2972 % '\n '.join(errors))) 

2973 

2974 network.station_list.extend(stations) 

2975 

2976 if level not in ('channel', 'response'): 

2977 continue 

2978 

2979 for loc, chas in locs: 

2980 for cha, _ in chas: 

2981 channels = self.get_channels( 

2982 tmin=tmin, 

2983 tmax=tmax, 

2984 codes=(net, sta, loc, cha), 

2985 model='stationxml') 

2986 

2987 if on_error != 'raise': 

2988 channels = deduplicate( 

2989 'Channel', (net, sta, loc, cha), channels) 

2990 

2991 errors = sx.check_overlaps( 

2992 'Channel', (net, sta, loc, cha), channels) 

2993 

2994 if errors: 

2995 handle_error(error.Duplicate( 

2996 'Overlapping/duplicate channel info:\n %s' 

2997 % '\n '.join(errors))) 

2998 

2999 for channel in channels: 

3000 station = sx.find_containing(stations, channel) 

3001 if station is not None: 

3002 station.channel_list.append(channel) 

3003 else: 

3004 handle_error(error.NotAvailable( 

3005 'No station or station epoch found ' 

3006 'for channel: %s' % '.'.join( 

3007 (net, sta, loc, cha)))) 

3008 

3009 continue 

3010 

3011 if level != 'response': 

3012 continue 

3013 

3014 try: 

3015 response_sq, response_sx = self.get_response( 

3016 codes=(net, sta, loc, cha), 

3017 tmin=channel.start_date, 

3018 tmax=channel.end_date, 

3019 model='stationxml+', 

3020 on_duplicate=on_error) 

3021 

3022 except error.NotAvailable as e: 

3023 handle_error(e) 

3024 continue 

3025 

3026 if not ( 

3027 sx.eq_open( 

3028 channel.start_date, response_sq.tmin) 

3029 and sx.eq_open( 

3030 channel.end_date, response_sq.tmax)): 

3031 

3032 handle_error(error.Inconsistencies( 

3033 'Response time span does not match ' 

3034 'channel time span: %s' % '.'.join( 

3035 (net, sta, loc, cha)))) 

3036 

3037 channel.response = response_sx 

3038 

3039 return sx.FDSNStationXML( 

3040 source='Generated by Pyrocko Squirrel.', 

3041 network_list=networks) 

3042 

3043 def add_operator(self, op): 

3044 self._operators.append(op) 

3045 

3046 def update_operator_mappings(self): 

3047 available = self.get_codes(kind=('channel')) 

3048 

3049 for operator in self._operators: 

3050 operator.update_mappings(available, self._operator_registry) 

3051 

3052 def iter_operator_mappings(self): 

3053 for operator in self._operators: 

3054 for in_codes, out_codes in operator.iter_mappings(): 

3055 yield operator, in_codes, out_codes 

3056 

3057 def get_operator_mappings(self): 

3058 return list(self.iter_operator_mappings()) 

3059 

3060 def get_operator(self, codes): 

3061 try: 

3062 return self._operator_registry[codes][0] 

3063 except KeyError: 

3064 return None 

3065 

3066 def get_operator_group(self, codes): 

3067 try: 

3068 return self._operator_registry[codes] 

3069 except KeyError: 

3070 return None, (None, None, None) 

3071 

3072 def iter_operator_codes(self): 

3073 for _, _, out_codes in self.iter_operator_mappings(): 

3074 for codes in out_codes: 

3075 yield codes 

3076 

3077 def get_operator_codes(self): 

3078 return list(self.iter_operator_codes()) 

3079 

3080 def print_tables(self, table_names=None, stream=None): 

3081 ''' 

3082 Dump raw database tables in textual form (for debugging purposes). 

3083 

3084 :param table_names: 

3085 Names of tables to be dumped or ``None`` to dump all. 

3086 :type table_names: 

3087 :py:class:`list` of :py:class:`str` 

3088 

3089 :param stream: 

3090 Open file or ``None`` to dump to standard output. 

3091 ''' 

3092 

3093 if stream is None: 

3094 stream = sys.stdout 

3095 

3096 if isinstance(table_names, str): 

3097 table_names = [table_names] 

3098 

3099 if table_names is None: 

3100 table_names = [ 

3101 'selection_file_states', 

3102 'selection_nuts', 

3103 'selection_kind_codes_count', 

3104 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3105 

3106 m = { 

3107 'selection_file_states': '%(db)s.%(file_states)s', 

3108 'selection_nuts': '%(db)s.%(nuts)s', 

3109 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3110 'files': 'files', 

3111 'nuts': 'nuts', 

3112 'kind_codes': 'kind_codes', 

3113 'kind_codes_count': 'kind_codes_count'} 

3114 

3115 for table_name in table_names: 

3116 self._database.print_table( 

3117 m[table_name] % self._names, stream=stream) 

3118 

3119 

3120class SquirrelStats(Object): 

3121 ''' 

3122 Container to hold statistics about contents available from a Squirrel. 

3123 

3124 See also :py:meth:`Squirrel.get_stats`. 

3125 ''' 

3126 

3127 nfiles = Int.T( 

3128 help='Number of files in selection.') 

3129 nnuts = Int.T( 

3130 help='Number of index nuts in selection.') 

3131 codes = List.T( 

3132 Tuple.T(content_t=String.T()), 

3133 help='Available code sequences in selection, e.g. ' 

3134 '(agency, network, station, location) for stations nuts.') 

3135 kinds = List.T( 

3136 String.T(), 

3137 help='Available content types in selection.') 

3138 total_size = Int.T( 

3139 help='Aggregated file size of files is selection.') 

3140 counts = Dict.T( 

3141 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3142 help='Breakdown of how many nuts of any content type and code ' 

3143 'sequence are available in selection, ``counts[kind][codes]``.') 

3144 time_spans = Dict.T( 

3145 String.T(), Tuple.T(content_t=Timestamp.T()), 

3146 help='Time spans by content type.') 

3147 sources = List.T( 

3148 String.T(), 

3149 help='Descriptions of attached sources.') 

3150 operators = List.T( 

3151 String.T(), 

3152 help='Descriptions of attached operators.') 

3153 

3154 def __str__(self): 

3155 kind_counts = dict( 

3156 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3157 

3158 scodes = model.codes_to_str_abbreviated(self.codes) 

3159 

3160 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3161 ' ' + s for s in self.sources) 

3162 

3163 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3164 ' ' + s for s in self.operators) 

3165 

3166 def stime(t): 

3167 return util.tts(t) if t is not None and t not in ( 

3168 model.g_tmin, model.g_tmax) else '<none>' 

3169 

3170 def stable(rows): 

3171 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3172 return '\n'.join( 

3173 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3174 for row in rows) 

3175 

3176 def indent(s): 

3177 return '\n'.join(' '+line for line in s.splitlines()) 

3178 

3179 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3180 kind + ':', 

3181 str(kind_counts[kind]), 

3182 stime(self.time_spans[kind][0]), 

3183 '-', 

3184 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3185 

3186 s = ''' 

3187Number of files: %i 

3188Total size of known files: %s 

3189Number of index nuts: %i 

3190Available content kinds: %s 

3191Available codes: %s 

3192Sources: %s 

3193Operators: %s''' % ( 

3194 self.nfiles, 

3195 util.human_bytesize(self.total_size), 

3196 self.nnuts, 

3197 stspans, scodes, ssources, soperators) 

3198 

3199 return s.lstrip() 

3200 

3201 

3202__all__ = [ 

3203 'Squirrel', 

3204 'SquirrelStats', 

3205]