Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%

877 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-02-05 09:37 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel main classes. 

8''' 

9 

10import sys 

11import os 

12import time 

13import math 

14import logging 

15import threading 

16import queue 

17from collections import defaultdict 

18 

19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

20from pyrocko import util, trace 

21from pyrocko import progress 

22from pyrocko.plot import nice_time_tick_inc_approx_secs 

23 

24from . import model, io, cache, dataset 

25 

26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

28 codes_patterns_for_kind 

29from .client import fdsn, catalog 

30from .selection import Selection, filldocs 

31from .database import abspath 

32from .operators.base import Operator, CodesPatternFiltering 

33from . import client, environment, error 

34 

35logger = logging.getLogger('psq.base') 

36 

37guts_prefix = 'squirrel' 

38 

39 

40def nonef(f, xs): 

41 xs_ = [x for x in xs if x is not None] 

42 if xs_: 

43 return f(xs_) 

44 else: 

45 return None 

46 

47 

48def make_task(*args): 

49 return progress.task(*args, logger=logger) 

50 

51 

52def lpick(condition, seq): 

53 ft = [], [] 

54 for ele in seq: 

55 ft[int(bool(condition(ele)))].append(ele) 

56 

57 return ft 

58 

59 

60def len_plural(obj): 

61 return len(obj), '' if len(obj) == 1 else 's' 

62 

63 

64def blocks(tmin, tmax, deltat, nsamples_block=100000): 

65 tblock = nice_time_tick_inc_approx_secs( 

66 util.to_time_float(deltat * nsamples_block)) 

67 iblock_min = int(math.floor(tmin / tblock)) 

68 iblock_max = int(math.ceil(tmax / tblock)) 

69 for iblock in range(iblock_min, iblock_max): 

70 yield iblock * tblock, (iblock+1) * tblock 

71 

72 

73def gaps(avail, tmin, tmax): 

74 assert tmin < tmax 

75 

76 data = [(tmax, 1), (tmin, -1)] 

77 for (tmin_a, tmax_a) in avail: 

78 assert tmin_a < tmax_a 

79 data.append((tmin_a, 1)) 

80 data.append((tmax_a, -1)) 

81 

82 data.sort() 

83 s = 1 

84 gaps = [] 

85 tmin_g = None 

86 for t, x in data: 

87 if s == 1 and x == -1: 

88 tmin_g = t 

89 elif s == 0 and x == 1 and tmin_g is not None: 

90 tmax_g = t 

91 if tmin_g != tmax_g: 

92 gaps.append((tmin_g, tmax_g)) 

93 

94 s += x 

95 

96 return gaps 

97 

98 

99def order_key(order): 

100 return (order.codes, order.tmin, order.tmax) 

101 

102 

103def _is_exact(pat): 

104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

105 

106 

107def prefix_tree(tups): 

108 if not tups: 

109 return [] 

110 

111 if len(tups[0]) == 1: 

112 return sorted((tup[0], []) for tup in tups) 

113 

114 d = defaultdict(list) 

115 for tup in tups: 

116 d[tup[0]].append(tup[1:]) 

117 

118 sub = [] 

119 for k in sorted(d.keys()): 

120 sub.append((k, prefix_tree(d[k]))) 

121 

122 return sub 

123 

124 

125def match_time_span(tmin, tmax, obj): 

126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

127 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

128 

129 

130class Batch(object): 

131 ''' 

132 Batch of waveforms from window-wise data extraction. 

133 

134 Encapsulates state and results yielded for each window in window-wise 

135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

136 

137 *Attributes:* 

138 

139 .. py:attribute:: tmin 

140 

141 Start of this time window. 

142 

143 .. py:attribute:: tmax 

144 

145 End of this time window. 

146 

147 .. py:attribute:: i 

148 

149 Index of this time window in sequence. 

150 

151 .. py:attribute:: n 

152 

153 Total number of time windows in sequence. 

154 

155 .. py:attribute:: igroup 

156 

157 Index of this time window's sequence group. 

158 

159 .. py:attribute:: ngroups 

160 

161 Total number of sequence groups. 

162 

163 .. py:attribute:: traces 

164 

165 Extracted waveforms for this time window. 

166 ''' 

167 

168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

169 self.tmin = tmin 

170 self.tmax = tmax 

171 self.i = i 

172 self.n = n 

173 self.igroup = igroup 

174 self.ngroups = ngroups 

175 self.traces = traces 

176 

177 def as_multitrace(self): 

178 from pyrocko import multitrace 

179 

180 data, codes, tmin, deltat = trace.merge_traces_data_as_array( 

181 self.traces, tmin=self.tmin, tmax=self.tmax) 

182 

183 return multitrace.MultiTrace( 

184 data=data, 

185 codes=codes, 

186 tmin=tmin, 

187 deltat=deltat) 

188 

189 

190class Squirrel(Selection): 

191 ''' 

192 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

193 

194 :param env: 

195 Squirrel environment instance or directory path to use as starting 

196 point for its detection. By default, the current directory is used as 

197 starting point. When searching for a usable environment the directory 

198 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

199 directory is used if it exists, otherwise the parent directories are 

200 search upwards for the existence of such a directory. If no such 

201 directory is found, the user's global Squirrel environment 

202 ``'$HOME/.pyrocko/squirrel'`` is used. 

203 :type env: 

204 :py:class:`~pyrocko.squirrel.environment.Environment` or 

205 :py:class:`str` 

206 

207 :param database: 

208 Database instance or path to database. By default the 

209 database found in the detected Squirrel environment is used. 

210 :type database: 

211 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

212 

213 :param cache_path: 

214 Directory path to use for data caching. By default, the ``'cache'`` 

215 directory in the detected Squirrel environment is used. 

216 :type cache_path: 

217 :py:class:`str` 

218 

219 :param persistent: 

220 If given a name, create a persistent selection. 

221 :type persistent: 

222 :py:class:`str` 

223 

224 This is the central class of the Squirrel framework. It provides a unified 

225 interface to query and access seismic waveforms, station meta-data and 

226 event information from local file collections and remote data sources. For 

227 prompt responses, a profound database setup is used under the hood. To 

228 speed up assemblage of ad-hoc data selections, files are indexed on first 

229 use and the extracted meta-data is remembered in the database for 

230 subsequent accesses. Bulk data is lazily loaded from disk and remote 

231 sources, just when requested. Once loaded, data is cached in memory to 

232 expedite typical access patterns. Files and data sources can be dynamically 

233 added to and removed from the Squirrel selection at runtime. 

234 

235 Queries are restricted to the contents of the files currently added to the 

236 Squirrel selection (usually a subset of the file meta-information 

237 collection in the database). This list of files is referred to here as the 

238 "selection". By default, temporary tables are created in the attached 

239 database to hold the names of the files in the selection as well as various 

240 indices and counters. These tables are only visible inside the application 

241 which created them and are deleted when the database connection is closed 

242 or the application exits. To create a selection which is not deleted at 

243 exit, supply a name to the ``persistent`` argument of the Squirrel 

244 constructor. Persistent selections are shared among applications using the 

245 same database. 

246 

247 **Method summary** 

248 

249 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

250 :py:class:`~pyrocko.squirrel.selection.Selection`. 

251 

252 .. autosummary:: 

253 

254 ~Squirrel.add 

255 ~Squirrel.add_source 

256 ~Squirrel.add_fdsn 

257 ~Squirrel.add_catalog 

258 ~Squirrel.add_dataset 

259 ~Squirrel.add_virtual 

260 ~Squirrel.update 

261 ~Squirrel.update_waveform_promises 

262 ~Squirrel.advance_accessor 

263 ~Squirrel.clear_accessor 

264 ~Squirrel.reload 

265 ~pyrocko.squirrel.selection.Selection.iter_paths 

266 ~Squirrel.iter_nuts 

267 ~Squirrel.iter_kinds 

268 ~Squirrel.iter_deltats 

269 ~Squirrel.iter_codes 

270 ~pyrocko.squirrel.selection.Selection.get_paths 

271 ~Squirrel.get_nuts 

272 ~Squirrel.get_kinds 

273 ~Squirrel.get_deltats 

274 ~Squirrel.get_codes 

275 ~Squirrel.get_counts 

276 ~Squirrel.get_time_span 

277 ~Squirrel.get_deltat_span 

278 ~Squirrel.get_nfiles 

279 ~Squirrel.get_nnuts 

280 ~Squirrel.get_total_size 

281 ~Squirrel.get_stats 

282 ~Squirrel.get_content 

283 ~Squirrel.get_stations 

284 ~Squirrel.get_channels 

285 ~Squirrel.get_responses 

286 ~Squirrel.get_events 

287 ~Squirrel.get_waveform_nuts 

288 ~Squirrel.get_waveforms 

289 ~Squirrel.chopper_waveforms 

290 ~Squirrel.get_coverage 

291 ~Squirrel.pile 

292 ~Squirrel.snuffle 

293 ~Squirrel.glob_codes 

294 ~pyrocko.squirrel.selection.Selection.get_database 

295 ~Squirrel.print_tables 

296 ''' 

297 

298 def __init__( 

299 self, env=None, database=None, cache_path=None, persistent=None): 

300 

301 if not isinstance(env, environment.Environment): 

302 env = environment.get_environment(env) 

303 

304 if database is None: 

305 database = env.expand_path(env.database_path) 

306 

307 if cache_path is None: 

308 cache_path = env.expand_path(env.cache_path) 

309 

310 if persistent is None: 

311 persistent = env.persistent 

312 

313 Selection.__init__( 

314 self, database=database, persistent=persistent) 

315 

316 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

317 

318 self._content_caches = { 

319 'waveform': cache.ContentCache(), 

320 'default': cache.ContentCache()} 

321 

322 self._cache_path = cache_path 

323 

324 self._sources = [] 

325 self._operators = [] 

326 self._operator_registry = {} 

327 

328 self._pending_orders = [] 

329 

330 self._pile = None 

331 self._n_choppers_active = 0 

332 

333 self.downloads_enabled = True 

334 

335 self._names.update({ 

336 'nuts': self.name + '_nuts', 

337 'kind_codes_count': self.name + '_kind_codes_count', 

338 'coverage': self.name + '_coverage'}) 

339 

340 with self.transaction('create tables') as cursor: 

341 self._create_tables_squirrel(cursor) 

342 

343 def _create_tables_squirrel(self, cursor): 

344 

345 cursor.execute(self._register_table(self._sql( 

346 ''' 

347 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

348 nut_id integer PRIMARY KEY, 

349 file_id integer, 

350 file_segment integer, 

351 file_element integer, 

352 kind_id integer, 

353 kind_codes_id integer, 

354 tmin_seconds integer, 

355 tmin_offset integer, 

356 tmax_seconds integer, 

357 tmax_offset integer, 

358 kscale integer) 

359 '''))) 

360 

361 cursor.execute(self._register_table(self._sql( 

362 ''' 

363 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

364 kind_codes_id integer PRIMARY KEY, 

365 count integer) 

366 '''))) 

367 

368 cursor.execute(self._sql( 

369 ''' 

370 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

371 ON %(nuts)s (file_id, file_segment, file_element) 

372 ''')) 

373 

374 cursor.execute(self._sql( 

375 ''' 

376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

377 ON %(nuts)s (file_id) 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

383 ON %(nuts)s (kind_id, tmin_seconds) 

384 ''')) 

385 

386 cursor.execute(self._sql( 

387 ''' 

388 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

389 ON %(nuts)s (kind_id, tmax_seconds) 

390 ''')) 

391 

392 cursor.execute(self._sql( 

393 ''' 

394 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

395 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

396 ''')) 

397 

398 cursor.execute(self._sql( 

399 ''' 

400 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

401 BEFORE DELETE ON main.files FOR EACH ROW 

402 BEGIN 

403 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

404 END 

405 ''')) 

406 

407 # trigger only on size to make silent update of mtime possible 

408 cursor.execute(self._sql( 

409 ''' 

410 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

411 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

412 BEGIN 

413 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

414 END 

415 ''')) 

416 

417 cursor.execute(self._sql( 

418 ''' 

419 CREATE TRIGGER IF NOT EXISTS 

420 %(db)s.%(file_states)s_delete_files 

421 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

422 BEGIN 

423 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

424 END 

425 ''')) 

426 

427 cursor.execute(self._sql( 

428 ''' 

429 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

430 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

431 BEGIN 

432 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

433 (new.kind_codes_id, 0); 

434 UPDATE %(kind_codes_count)s 

435 SET count = count + 1 

436 WHERE new.kind_codes_id 

437 == %(kind_codes_count)s.kind_codes_id; 

438 END 

439 ''')) 

440 

441 cursor.execute(self._sql( 

442 ''' 

443 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

444 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

445 BEGIN 

446 UPDATE %(kind_codes_count)s 

447 SET count = count - 1 

448 WHERE old.kind_codes_id 

449 == %(kind_codes_count)s.kind_codes_id; 

450 END 

451 ''')) 

452 

453 cursor.execute(self._register_table(self._sql( 

454 ''' 

455 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

456 kind_codes_id integer, 

457 time_seconds integer, 

458 time_offset integer, 

459 step integer) 

460 '''))) 

461 

462 cursor.execute(self._sql( 

463 ''' 

464 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

465 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

466 ''')) 

467 

468 cursor.execute(self._sql( 

469 ''' 

470 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

471 AFTER INSERT ON %(nuts)s FOR EACH ROW 

472 BEGIN 

473 INSERT OR IGNORE INTO %(coverage)s VALUES 

474 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

475 ; 

476 UPDATE %(coverage)s 

477 SET step = step + 1 

478 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

479 AND new.tmin_seconds == %(coverage)s.time_seconds 

480 AND new.tmin_offset == %(coverage)s.time_offset 

481 ; 

482 INSERT OR IGNORE INTO %(coverage)s VALUES 

483 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

484 ; 

485 UPDATE %(coverage)s 

486 SET step = step - 1 

487 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

488 AND new.tmax_seconds == %(coverage)s.time_seconds 

489 AND new.tmax_offset == %(coverage)s.time_offset 

490 ; 

491 DELETE FROM %(coverage)s 

492 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

493 AND new.tmin_seconds == %(coverage)s.time_seconds 

494 AND new.tmin_offset == %(coverage)s.time_offset 

495 AND step == 0 

496 ; 

497 DELETE FROM %(coverage)s 

498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

499 AND new.tmax_seconds == %(coverage)s.time_seconds 

500 AND new.tmax_offset == %(coverage)s.time_offset 

501 AND step == 0 

502 ; 

503 END 

504 ''')) 

505 

506 cursor.execute(self._sql( 

507 ''' 

508 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

509 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

510 BEGIN 

511 INSERT OR IGNORE INTO %(coverage)s VALUES 

512 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

513 ; 

514 UPDATE %(coverage)s 

515 SET step = step - 1 

516 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

517 AND old.tmin_seconds == %(coverage)s.time_seconds 

518 AND old.tmin_offset == %(coverage)s.time_offset 

519 ; 

520 INSERT OR IGNORE INTO %(coverage)s VALUES 

521 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

522 ; 

523 UPDATE %(coverage)s 

524 SET step = step + 1 

525 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

526 AND old.tmax_seconds == %(coverage)s.time_seconds 

527 AND old.tmax_offset == %(coverage)s.time_offset 

528 ; 

529 DELETE FROM %(coverage)s 

530 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

531 AND old.tmin_seconds == %(coverage)s.time_seconds 

532 AND old.tmin_offset == %(coverage)s.time_offset 

533 AND step == 0 

534 ; 

535 DELETE FROM %(coverage)s 

536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

537 AND old.tmax_seconds == %(coverage)s.time_seconds 

538 AND old.tmax_offset == %(coverage)s.time_offset 

539 AND step == 0 

540 ; 

541 END 

542 ''')) 

543 

544 def _delete(self): 

545 '''Delete database tables associated with this Squirrel.''' 

546 

547 with self.transaction('delete tables') as cursor: 

548 for s in ''' 

549 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

550 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

551 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

552 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

553 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

554 DROP TABLE %(db)s.%(nuts)s; 

555 DROP TABLE %(db)s.%(kind_codes_count)s; 

556 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

557 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

558 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

559 '''.strip().splitlines(): 

560 

561 cursor.execute(self._sql(s)) 

562 

563 Selection._delete(self) 

564 

565 @filldocs 

566 def add(self, 

567 paths, 

568 kinds=None, 

569 format='detect', 

570 include=None, 

571 exclude=None, 

572 check=True): 

573 

574 ''' 

575 Add files to the selection. 

576 

577 :param paths: 

578 Iterator yielding paths to files or directories to be added to the 

579 selection. Recurses into directories. If given a ``str``, it 

580 is treated as a single path to be added. 

581 :type paths: 

582 :py:class:`list` of :py:class:`str` 

583 

584 :param kinds: 

585 Content types to be made available through the Squirrel selection. 

586 By default, all known content types are accepted. 

587 :type kinds: 

588 :py:class:`list` of :py:class:`str` 

589 

590 :param format: 

591 File format identifier or ``'detect'`` to enable auto-detection 

592 (available: %(file_formats)s). 

593 :type format: 

594 str 

595 

596 :param include: 

597 If not ``None``, files are only included if their paths match the 

598 given regular expression pattern. 

599 :type format: 

600 str 

601 

602 :param exclude: 

603 If not ``None``, files are only included if their paths do not 

604 match the given regular expression pattern. 

605 :type format: 

606 str 

607 

608 :param check: 

609 If ``True``, all file modification times are checked to see if 

610 cached information has to be updated (slow). If ``False``, only 

611 previously unknown files are indexed and cached information is used 

612 for known files, regardless of file state (fast, corrresponds to 

613 Squirrel's ``--optimistic`` mode). File deletions will go 

614 undetected in the latter case. 

615 :type check: 

616 bool 

617 

618 :Complexity: 

619 O(log N) 

620 ''' 

621 

622 if isinstance(kinds, str): 

623 kinds = (kinds,) 

624 

625 if isinstance(paths, str): 

626 paths = [paths] 

627 

628 kind_mask = model.to_kind_mask(kinds) 

629 

630 Selection.add( 

631 self, util.iter_select_files( 

632 paths, 

633 show_progress=False, 

634 include=include, 

635 exclude=exclude, 

636 pass_through=lambda path: path.startswith('virtual:') 

637 ), kind_mask, format) 

638 

639 self._load(check) 

640 self._update_nuts() 

641 

642 def reload(self): 

643 ''' 

644 Check for modifications and reindex modified files. 

645 

646 Based on file modification times. 

647 ''' 

648 

649 self._set_file_states_force_check() 

650 self._load(check=True) 

651 self._update_nuts() 

652 

653 def add_virtual(self, nuts, virtual_paths=None): 

654 ''' 

655 Add content which is not backed by files. 

656 

657 :param nuts: 

658 Content pieces to be added. 

659 :type nuts: 

660 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

661 

662 :param virtual_paths: 

663 List of virtual paths to prevent creating a temporary list of the 

664 nuts while aggregating the file paths for the selection. 

665 :type virtual_paths: 

666 :py:class:`list` of :py:class:`str` 

667 

668 Stores to the main database and the selection. 

669 ''' 

670 

671 if isinstance(virtual_paths, str): 

672 virtual_paths = [virtual_paths] 

673 

674 if virtual_paths is None: 

675 if not isinstance(nuts, list): 

676 nuts = list(nuts) 

677 virtual_paths = set(nut.file_path for nut in nuts) 

678 

679 Selection.add(self, virtual_paths) 

680 self.get_database().dig(nuts) 

681 self._update_nuts() 

682 

683 def add_volatile(self, nuts): 

684 if not isinstance(nuts, list): 

685 nuts = list(nuts) 

686 

687 paths = list(set(nut.file_path for nut in nuts)) 

688 io.backends.virtual.add_nuts(nuts) 

689 self.add_virtual(nuts, paths) 

690 self._volatile_paths.extend(paths) 

691 

692 def add_volatile_waveforms(self, traces): 

693 ''' 

694 Add in-memory waveforms which will be removed when the app closes. 

695 ''' 

696 

697 name = model.random_name() 

698 

699 path = 'virtual:volatile:%s' % name 

700 

701 nuts = [] 

702 for itr, tr in enumerate(traces): 

703 assert tr.tmin <= tr.tmax 

704 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

705 tmax_seconds, tmax_offset = model.tsplit( 

706 tr.tmin + tr.data_len()*tr.deltat) 

707 

708 nuts.append(model.Nut( 

709 file_path=path, 

710 file_format='virtual', 

711 file_segment=itr, 

712 file_element=0, 

713 file_mtime=0, 

714 codes=tr.codes, 

715 tmin_seconds=tmin_seconds, 

716 tmin_offset=tmin_offset, 

717 tmax_seconds=tmax_seconds, 

718 tmax_offset=tmax_offset, 

719 deltat=tr.deltat, 

720 kind_id=to_kind_id('waveform'), 

721 content=tr)) 

722 

723 self.add_volatile(nuts) 

724 return path 

725 

726 def _load(self, check): 

727 for _ in io.iload( 

728 self, 

729 content=[], 

730 skip_unchanged=True, 

731 check=check): 

732 pass 

733 

734 def _update_nuts(self, transaction=None): 

735 transaction = transaction or self.transaction('update nuts') 

736 with make_task('Aggregating selection') as task, \ 

737 transaction as cursor: 

738 

739 self._conn.set_progress_handler(task.update, 100000) 

740 nrows = cursor.execute(self._sql( 

741 ''' 

742 INSERT INTO %(db)s.%(nuts)s 

743 SELECT NULL, 

744 nuts.file_id, nuts.file_segment, nuts.file_element, 

745 nuts.kind_id, nuts.kind_codes_id, 

746 nuts.tmin_seconds, nuts.tmin_offset, 

747 nuts.tmax_seconds, nuts.tmax_offset, 

748 nuts.kscale 

749 FROM %(db)s.%(file_states)s 

750 INNER JOIN nuts 

751 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

752 INNER JOIN kind_codes 

753 ON nuts.kind_codes_id == 

754 kind_codes.kind_codes_id 

755 WHERE %(db)s.%(file_states)s.file_state != 2 

756 AND (((1 << kind_codes.kind_id) 

757 & %(db)s.%(file_states)s.kind_mask) != 0) 

758 ''')).rowcount 

759 

760 task.update(nrows) 

761 self._set_file_states_known(transaction) 

762 self._conn.set_progress_handler(None, 0) 

763 

764 def add_source(self, source, check=True): 

765 ''' 

766 Add remote resource. 

767 

768 :param source: 

769 Remote data access client instance. 

770 :type source: 

771 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

772 ''' 

773 

774 self._sources.append(source) 

775 source.setup(self, check=check) 

776 

777 def add_fdsn(self, *args, **kwargs): 

778 ''' 

779 Add FDSN site for transparent remote data access. 

780 

781 Arguments are passed to 

782 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

783 ''' 

784 

785 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

786 

787 def add_catalog(self, *args, **kwargs): 

788 ''' 

789 Add online catalog for transparent event data access. 

790 

791 Arguments are passed to 

792 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

793 ''' 

794 

795 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

796 

797 def add_dataset(self, ds, check=True): 

798 ''' 

799 Read dataset description from file and add its contents. 

800 

801 :param ds: 

802 Path to dataset description file, dataset description object 

803 or name of a built-in dataset. See 

804 :py:mod:`~pyrocko.squirrel.dataset`. 

805 :type ds: 

806 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

807 

808 :param check: 

809 If ``True``, all file modification times are checked to see if 

810 cached information has to be updated (slow). If ``False``, only 

811 previously unknown files are indexed and cached information is used 

812 for known files, regardless of file state (fast, corrresponds to 

813 Squirrel's ``--optimistic`` mode). File deletions will go 

814 undetected in the latter case. 

815 :type check: 

816 bool 

817 ''' 

818 if isinstance(ds, str): 

819 ds = dataset.read_dataset(ds) 

820 

821 ds.setup(self, check=check) 

822 

823 def _get_selection_args( 

824 self, kind_id, 

825 obj=None, tmin=None, tmax=None, time=None, codes=None): 

826 

827 if codes is not None: 

828 codes = codes_patterns_for_kind(kind_id, codes) 

829 

830 if time is not None: 

831 tmin = time 

832 tmax = time 

833 

834 if obj is not None: 

835 tmin = tmin if tmin is not None else obj.tmin 

836 tmax = tmax if tmax is not None else obj.tmax 

837 codes = codes if codes is not None else codes_patterns_for_kind( 

838 kind_id, obj.codes) 

839 

840 return tmin, tmax, codes 

841 

842 def _get_selection_args_str(self, *args, **kwargs): 

843 

844 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

845 return 'tmin: %s, tmax: %s, codes: %s' % ( 

846 util.time_to_str(tmin) if tmin is not None else 'none', 

847 util.time_to_str(tmax) if tmax is not None else 'none', 

848 ','.join(str(entry) for entry in codes)) 

849 

850 def _selection_args_to_kwargs( 

851 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

852 

853 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

854 

855 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

856 

857 tmin_seconds, tmin_offset = model.tsplit(tmin) 

858 tmax_seconds, tmax_offset = model.tsplit(tmax) 

859 if naiv: 

860 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

861 args.append(tmax_seconds) 

862 else: 

863 tscale_edges = model.tscale_edges 

864 tmin_cond = [] 

865 for kscale in range(tscale_edges.size + 1): 

866 if kscale != tscale_edges.size: 

867 tscale = int(tscale_edges[kscale]) 

868 tmin_cond.append(''' 

869 (%(db)s.%(nuts)s.kind_id = ? 

870 AND %(db)s.%(nuts)s.kscale == ? 

871 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

872 ''') 

873 args.extend( 

874 (to_kind_id(kind), kscale, 

875 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

876 

877 else: 

878 tmin_cond.append(''' 

879 (%(db)s.%(nuts)s.kind_id == ? 

880 AND %(db)s.%(nuts)s.kscale == ? 

881 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

882 ''') 

883 

884 args.extend( 

885 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

886 if tmin_cond: 

887 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

888 

889 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

890 args.append(tmin_seconds) 

891 

892 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

893 pats = codes_patterns_for_kind(kind_id, codes) 

894 if pats is None: 

895 return 

896 

897 pats_exact = [] 

898 pats_nonexact = [] 

899 for pat in pats: 

900 spat = pat.safe_str 

901 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

902 

903 codes_cond = [] 

904 if pats_exact: 

905 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

906 '?'*len(pats_exact))) 

907 

908 args.extend(pats_exact) 

909 

910 if pats_nonexact: 

911 codes_cond.append(' ( %s ) ' % ' OR '.join( 

912 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

913 

914 args.extend(pats_nonexact) 

915 

916 if codes_cond: 

917 cond.append('%s ( %s )' % ( 

918 'NOT' if not positive else '', 

919 ' OR '.join(codes_cond))) 

920 

921 def iter_nuts( 

922 self, kind=None, tmin=None, tmax=None, codes=None, 

923 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

924 naiv=False, kind_codes_ids=None, path=None, limit=None): 

925 

926 ''' 

927 Iterate over content entities matching given constraints. 

928 

929 :param kind: 

930 Content kind (or kinds) to extract. 

931 :type kind: 

932 :py:class:`str`, :py:class:`list` of :py:class:`str` 

933 

934 :param tmin: 

935 Start time of query interval. 

936 :type tmin: 

937 :py:func:`~pyrocko.util.get_time_float` 

938 

939 :param tmax: 

940 End time of query interval. 

941 :type tmax: 

942 :py:func:`~pyrocko.util.get_time_float` 

943 

944 :param codes: 

945 List of code patterns to query. 

946 :type codes: 

947 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

948 objects appropriate for the queried content type, or anything which 

949 can be converted to such objects. 

950 

951 :param naiv: 

952 Bypass time span lookup through indices (slow, for testing). 

953 :type naiv: 

954 :py:class:`bool` 

955 

956 :param kind_codes_ids: 

957 Kind-codes IDs of contents to be retrieved (internal use). 

958 :type kind_codes_ids: 

959 :py:class:`list` of :py:class:`int` 

960 

961 :yields: 

962 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

963 intersecting content. 

964 

965 :complexity: 

966 O(log N) for the time selection part due to heavy use of database 

967 indices. 

968 

969 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

970 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

971 closed-interval so that content intersecting with the time instant ``t 

972 = tmin = tmax`` is returned (otherwise nothing would be returned as 

973 ``[t, t)`` never matches anything). 

974 

975 Time spans of content entities to be matched are also treated as half 

976 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

977 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

978 modified to closed-interval when the content time span is an empty 

979 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

980 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

981 ''' 

982 

983 if not isinstance(kind, str): 

984 if kind is None: 

985 kind = model.g_content_kinds 

986 for kind_ in kind: 

987 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

988 yield nut 

989 

990 return 

991 

992 kind_id = to_kind_id(kind) 

993 

994 cond = [] 

995 args = [] 

996 if tmin is not None or tmax is not None: 

997 assert kind is not None 

998 if tmin is None: 

999 tmin = self.get_time_span()[0] 

1000 if tmax is None: 

1001 tmax = self.get_time_span()[1] + 1.0 

1002 

1003 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

1004 

1005 cond.append('kind_codes.kind_id == ?') 

1006 args.append(kind_id) 

1007 

1008 if codes is not None: 

1009 self._codes_match_sql(True, kind_id, codes, cond, args) 

1010 

1011 if codes_exclude is not None: 

1012 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

1013 

1014 if sample_rate_min is not None: 

1015 cond.append('kind_codes.deltat <= ?') 

1016 args.append(1.0/sample_rate_min) 

1017 

1018 if sample_rate_max is not None: 

1019 cond.append('? <= kind_codes.deltat') 

1020 args.append(1.0/sample_rate_max) 

1021 

1022 if kind_codes_ids is not None: 

1023 cond.append( 

1024 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1025 '?'*len(kind_codes_ids))) 

1026 

1027 args.extend(kind_codes_ids) 

1028 

1029 db = self.get_database() 

1030 if path is not None: 

1031 cond.append('files.path == ?') 

1032 args.append(db.relpath(abspath(path))) 

1033 

1034 sql = (''' 

1035 SELECT 

1036 files.path, 

1037 files.format, 

1038 files.mtime, 

1039 files.size, 

1040 %(db)s.%(nuts)s.file_segment, 

1041 %(db)s.%(nuts)s.file_element, 

1042 kind_codes.kind_id, 

1043 kind_codes.codes, 

1044 %(db)s.%(nuts)s.tmin_seconds, 

1045 %(db)s.%(nuts)s.tmin_offset, 

1046 %(db)s.%(nuts)s.tmax_seconds, 

1047 %(db)s.%(nuts)s.tmax_offset, 

1048 kind_codes.deltat 

1049 FROM files 

1050 INNER JOIN %(db)s.%(nuts)s 

1051 ON files.file_id == %(db)s.%(nuts)s.file_id 

1052 INNER JOIN kind_codes 

1053 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1054 ''') 

1055 

1056 if cond: 

1057 sql += ''' WHERE ''' + ' AND '.join(cond) 

1058 

1059 if limit is not None: 

1060 sql += ''' LIMIT %i''' % limit 

1061 

1062 sql = self._sql(sql) 

1063 if tmin is None and tmax is None: 

1064 for row in self._conn.execute(sql, args): 

1065 row = (db.abspath(row[0]),) + row[1:] 

1066 nut = model.Nut(values_nocheck=row) 

1067 yield nut 

1068 else: 

1069 assert tmin is not None and tmax is not None 

1070 if tmin == tmax: 

1071 for row in self._conn.execute(sql, args): 

1072 row = (db.abspath(row[0]),) + row[1:] 

1073 nut = model.Nut(values_nocheck=row) 

1074 if (nut.tmin <= tmin < nut.tmax) \ 

1075 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1076 

1077 yield nut 

1078 else: 

1079 for row in self._conn.execute(sql, args): 

1080 row = (db.abspath(row[0]),) + row[1:] 

1081 nut = model.Nut(values_nocheck=row) 

1082 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1083 or (nut.tmin == nut.tmax 

1084 and tmin <= nut.tmin < tmax): 

1085 

1086 yield nut 

1087 

1088 def get_nuts(self, *args, **kwargs): 

1089 ''' 

1090 Get content entities matching given constraints. 

1091 

1092 Like :py:meth:`iter_nuts` but returns results as a list. 

1093 ''' 

1094 

1095 return list(self.iter_nuts(*args, **kwargs)) 

1096 

1097 def _split_nuts( 

1098 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1099 

1100 kind_id = to_kind_id(kind) 

1101 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1102 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1103 

1104 names_main_nuts = dict(self._names) 

1105 names_main_nuts.update(db='main', nuts='nuts') 

1106 

1107 db = self.get_database() 

1108 

1109 def main_nuts(s): 

1110 return s % names_main_nuts 

1111 

1112 with self.transaction('split nuts') as cursor: 

1113 # modify selection and main 

1114 for sql_subst in [ 

1115 self._sql, main_nuts]: 

1116 

1117 cond = [] 

1118 args = [] 

1119 

1120 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1121 

1122 if codes is not None: 

1123 self._codes_match_sql(True, kind_id, codes, cond, args) 

1124 

1125 if path is not None: 

1126 cond.append('files.path == ?') 

1127 args.append(db.relpath(abspath(path))) 

1128 

1129 sql = sql_subst(''' 

1130 SELECT 

1131 %(db)s.%(nuts)s.nut_id, 

1132 %(db)s.%(nuts)s.tmin_seconds, 

1133 %(db)s.%(nuts)s.tmin_offset, 

1134 %(db)s.%(nuts)s.tmax_seconds, 

1135 %(db)s.%(nuts)s.tmax_offset, 

1136 kind_codes.deltat 

1137 FROM files 

1138 INNER JOIN %(db)s.%(nuts)s 

1139 ON files.file_id == %(db)s.%(nuts)s.file_id 

1140 INNER JOIN kind_codes 

1141 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1142 WHERE ''' + ' AND '.join(cond)) # noqa 

1143 

1144 insert = [] 

1145 delete = [] 

1146 for row in cursor.execute(sql, args): 

1147 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1148 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1149 

1150 nut_tmin = model.tjoin( 

1151 nut_tmin_seconds, nut_tmin_offset) 

1152 nut_tmax = model.tjoin( 

1153 nut_tmax_seconds, nut_tmax_offset) 

1154 

1155 if nut_tmin < tmax and tmin < nut_tmax: 

1156 if nut_tmin < tmin: 

1157 insert.append(( 

1158 nut_tmin_seconds, nut_tmin_offset, 

1159 tmin_seconds, tmin_offset, 

1160 model.tscale_to_kscale( 

1161 tmin_seconds - nut_tmin_seconds), 

1162 nut_id)) 

1163 

1164 if tmax < nut_tmax: 

1165 insert.append(( 

1166 tmax_seconds, tmax_offset, 

1167 nut_tmax_seconds, nut_tmax_offset, 

1168 model.tscale_to_kscale( 

1169 nut_tmax_seconds - tmax_seconds), 

1170 nut_id)) 

1171 

1172 delete.append((nut_id,)) 

1173 

1174 sql_add = ''' 

1175 INSERT INTO %(db)s.%(nuts)s ( 

1176 file_id, file_segment, file_element, kind_id, 

1177 kind_codes_id, tmin_seconds, tmin_offset, 

1178 tmax_seconds, tmax_offset, kscale ) 

1179 SELECT 

1180 file_id, file_segment, file_element, 

1181 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1182 FROM %(db)s.%(nuts)s 

1183 WHERE nut_id == ? 

1184 ''' 

1185 cursor.executemany(sql_subst(sql_add), insert) 

1186 

1187 sql_delete = ''' 

1188 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1189 ''' 

1190 cursor.executemany(sql_subst(sql_delete), delete) 

1191 

1192 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1193 ''' 

1194 Get time interval over all content in selection. 

1195 

1196 :param kinds: 

1197 If not ``None``, restrict query to given content kinds. 

1198 :type kind: 

1199 list of str 

1200 

1201 :complexity: 

1202 O(1), independent of the number of nuts. 

1203 

1204 :returns: 

1205 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1206 ''' 

1207 

1208 sql_min = self._sql(''' 

1209 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1210 FROM %(db)s.%(nuts)s 

1211 WHERE kind_id == ? 

1212 AND tmin_seconds == ( 

1213 SELECT MIN(tmin_seconds) 

1214 FROM %(db)s.%(nuts)s 

1215 WHERE kind_id == ?) 

1216 ''') 

1217 

1218 sql_max = self._sql(''' 

1219 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1220 FROM %(db)s.%(nuts)s 

1221 WHERE kind_id == ? 

1222 AND tmax_seconds == ( 

1223 SELECT MAX(tmax_seconds) 

1224 FROM %(db)s.%(nuts)s 

1225 WHERE kind_id == ?) 

1226 ''') 

1227 

1228 gtmin = None 

1229 gtmax = None 

1230 

1231 if isinstance(kinds, str): 

1232 kinds = [kinds] 

1233 

1234 if kinds is None: 

1235 kind_ids = model.g_content_kind_ids 

1236 else: 

1237 kind_ids = model.to_kind_ids(kinds) 

1238 

1239 tmins = [] 

1240 tmaxs = [] 

1241 for kind_id in kind_ids: 

1242 for tmin_seconds, tmin_offset in self._conn.execute( 

1243 sql_min, (kind_id, kind_id)): 

1244 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1245 

1246 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1247 sql_max, (kind_id, kind_id)): 

1248 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1249 

1250 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1251 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1252 

1253 if tight: 

1254 gtmin = nonef(min, tmins) 

1255 gtmax = nonef(max, tmaxs) 

1256 else: 

1257 gtmin = None if None in tmins else nonef(min, tmins) 

1258 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1259 

1260 if dummy_limits: 

1261 if gtmin is None: 

1262 gtmin = model.g_tmin 

1263 if gtmax is None: 

1264 gtmax = model.g_tmax 

1265 

1266 return gtmin, gtmax 

1267 

1268 def has(self, kinds): 

1269 ''' 

1270 Check availability of given content kinds. 

1271 

1272 :param kinds: 

1273 Content kinds to query. 

1274 :type kind: 

1275 list of str 

1276 

1277 :returns: 

1278 ``True`` if any of the queried content kinds is available 

1279 in the selection. 

1280 ''' 

1281 self_tmin, self_tmax = self.get_time_span( 

1282 kinds, dummy_limits=False) 

1283 

1284 return None not in (self_tmin, self_tmax) 

1285 

1286 def get_deltat_span(self, kind): 

1287 ''' 

1288 Get min and max sampling interval of all content of given kind. 

1289 

1290 :param kind: 

1291 Content kind 

1292 :type kind: 

1293 str 

1294 

1295 :returns: ``(deltat_min, deltat_max)`` 

1296 ''' 

1297 

1298 deltats = [ 

1299 deltat for deltat in self.get_deltats(kind) 

1300 if deltat is not None] 

1301 

1302 if deltats: 

1303 return min(deltats), max(deltats) 

1304 else: 

1305 return None, None 

1306 

1307 def iter_kinds(self, codes=None): 

1308 ''' 

1309 Iterate over content types available in selection. 

1310 

1311 :param codes: 

1312 If given, get kinds only for selected codes identifier. 

1313 Only a single identifier may be given here and no pattern matching 

1314 is done, currently. 

1315 :type codes: 

1316 :py:class:`~pyrocko.squirrel.model.Codes` 

1317 

1318 :yields: 

1319 Available content kinds as :py:class:`str`. 

1320 

1321 :complexity: 

1322 O(1), independent of number of nuts. 

1323 ''' 

1324 

1325 return self._database._iter_kinds( 

1326 codes=codes, 

1327 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1328 

1329 def iter_deltats(self, kind=None): 

1330 ''' 

1331 Iterate over sampling intervals available in selection. 

1332 

1333 :param kind: 

1334 If given, get sampling intervals only for a given content type. 

1335 :type kind: 

1336 str 

1337 

1338 :yields: 

1339 :py:class:`float` values. 

1340 

1341 :complexity: 

1342 O(1), independent of number of nuts. 

1343 ''' 

1344 return self._database._iter_deltats( 

1345 kind=kind, 

1346 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1347 

1348 def iter_codes(self, kind=None): 

1349 ''' 

1350 Iterate over content identifier code sequences available in selection. 

1351 

1352 :param kind: 

1353 If given, get codes only for a given content type. 

1354 :type kind: 

1355 str 

1356 

1357 :yields: 

1358 :py:class:`tuple` of :py:class:`str` 

1359 

1360 :complexity: 

1361 O(1), independent of number of nuts. 

1362 ''' 

1363 return self._database._iter_codes( 

1364 kind=kind, 

1365 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1366 

1367 def _iter_codes_info(self, kind=None, codes=None): 

1368 ''' 

1369 Iterate over number of occurrences of any (kind, codes) combination. 

1370 

1371 :param kind: 

1372 If given, get counts only for selected content type. 

1373 :type kind: 

1374 str 

1375 

1376 :yields: 

1377 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1378 

1379 :complexity: 

1380 O(1), independent of number of nuts. 

1381 ''' 

1382 return self._database._iter_codes_info( 

1383 kind=kind, 

1384 codes=codes, 

1385 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1386 

1387 def get_kinds(self, codes=None): 

1388 ''' 

1389 Get content types available in selection. 

1390 

1391 :param codes: 

1392 If given, get kinds only for selected codes identifier. 

1393 Only a single identifier may be given here and no pattern matching 

1394 is done, currently. 

1395 :type codes: 

1396 :py:class:`~pyrocko.squirrel.model.Codes` 

1397 

1398 :returns: 

1399 Sorted list of available content types. 

1400 :rtype: 

1401 py:class:`list` of :py:class:`str` 

1402 

1403 :complexity: 

1404 O(1), independent of number of nuts. 

1405 

1406 ''' 

1407 return sorted(list(self.iter_kinds(codes=codes))) 

1408 

1409 def get_deltats(self, kind=None): 

1410 ''' 

1411 Get sampling intervals available in selection. 

1412 

1413 :param kind: 

1414 If given, get sampling intervals only for selected content type. 

1415 :type kind: 

1416 str 

1417 

1418 :complexity: 

1419 O(1), independent of number of nuts. 

1420 

1421 :returns: Sorted list of available sampling intervals. 

1422 ''' 

1423 return sorted(list(self.iter_deltats(kind=kind))) 

1424 

1425 def get_codes(self, kind=None): 

1426 ''' 

1427 Get identifier code sequences available in selection. 

1428 

1429 :param kind: 

1430 If given, get codes only for selected content type. 

1431 :type kind: 

1432 str 

1433 

1434 :complexity: 

1435 O(1), independent of number of nuts. 

1436 

1437 :returns: Sorted list of available codes as tuples of strings. 

1438 ''' 

1439 return sorted(list(self.iter_codes(kind=kind))) 

1440 

1441 def get_counts(self, kind=None): 

1442 ''' 

1443 Get number of occurrences of any (kind, codes) combination. 

1444 

1445 :param kind: 

1446 If given, get codes only for selected content type. 

1447 :type kind: 

1448 str 

1449 

1450 :complexity: 

1451 O(1), independent of number of nuts. 

1452 

1453 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1454 if kind is not ``None`` 

1455 ''' 

1456 d = {} 

1457 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1458 if kind_id not in d: 

1459 v = d[kind_id] = {} 

1460 else: 

1461 v = d[kind_id] 

1462 

1463 if codes not in v: 

1464 v[codes] = 0 

1465 

1466 v[codes] += count 

1467 

1468 if kind is not None: 

1469 return d[to_kind_id(kind)] 

1470 else: 

1471 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1472 

1473 def glob_codes(self, kind, codes): 

1474 ''' 

1475 Find codes matching given patterns. 

1476 

1477 :param kind: 

1478 Content kind to be queried. 

1479 :type kind: 

1480 str 

1481 

1482 :param codes: 

1483 List of code patterns to query. 

1484 :type codes: 

1485 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1486 objects appropriate for the queried content type, or anything which 

1487 can be converted to such objects. 

1488 

1489 :returns: 

1490 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1491 ''' 

1492 

1493 kind_id = to_kind_id(kind) 

1494 args = [kind_id] 

1495 pats = codes_patterns_for_kind(kind_id, codes) 

1496 

1497 if pats: 

1498 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1499 ('kind_codes.codes GLOB ?',) * len(pats)) 

1500 

1501 args.extend(pat.safe_str for pat in pats) 

1502 else: 

1503 codes_cond = '' 

1504 

1505 sql = self._sql(''' 

1506 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1507 WHERE 

1508 kind_id == ? ''' + codes_cond) 

1509 

1510 return list(map(list, self._conn.execute(sql, args))) 

1511 

1512 def update(self, constraint=None, **kwargs): 

1513 ''' 

1514 Update or partially update channel and event inventories. 

1515 

1516 :param constraint: 

1517 Selection of times or areas to be brought up to date. 

1518 :type constraint: 

1519 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1520 

1521 :param \\*\\*kwargs: 

1522 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1523 

1524 This function triggers all attached remote sources, to check for 

1525 updates in the meta-data. The sources will only submit queries when 

1526 their expiration date has passed, or if the selection spans into 

1527 previously unseen times or areas. 

1528 ''' 

1529 

1530 if constraint is None: 

1531 constraint = client.Constraint(**kwargs) 

1532 

1533 task = make_task('Updating sources') 

1534 for source in task(self._sources): 

1535 source.update_channel_inventory(self, constraint) 

1536 source.update_event_inventory(self, constraint) 

1537 

1538 def update_waveform_promises(self, constraint=None, **kwargs): 

1539 ''' 

1540 Permit downloading of remote waveforms. 

1541 

1542 :param constraint: 

1543 Remote waveforms compatible with the given constraint are enabled 

1544 for download. 

1545 :type constraint: 

1546 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1547 

1548 :param \\*\\*kwargs: 

1549 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1550 

1551 Calling this method permits Squirrel to download waveforms from remote 

1552 sources when processing subsequent waveform requests. This works by 

1553 inserting so called waveform promises into the database. It will look 

1554 into the available channels for each remote source and create a promise 

1555 for each channel compatible with the given constraint. If the promise 

1556 then matches in a waveform request, Squirrel tries to download the 

1557 waveform. If the download is successful, the downloaded waveform is 

1558 added to the Squirrel and the promise is deleted. If the download 

1559 fails, the promise is kept if the reason of failure looks like being 

1560 temporary, e.g. because of a network failure. If the cause of failure 

1561 however seems to be permanent, the promise is deleted so that no 

1562 further attempts are made to download a waveform which might not be 

1563 available from that server at all. To force re-scheduling after a 

1564 permanent failure, call :py:meth:`update_waveform_promises` 

1565 yet another time. 

1566 ''' 

1567 

1568 if constraint is None: 

1569 constraint = client.Constraint(**kwargs) 

1570 

1571 for source in self._sources: 

1572 source.update_waveform_promises(self, constraint) 

1573 

1574 def remove_waveform_promises(self, from_database='selection'): 

1575 ''' 

1576 Remove waveform promises from live selection or global database. 

1577 

1578 Calling this function removes all waveform promises provided by the 

1579 attached sources. 

1580 

1581 :param from_database: 

1582 Remove from live selection ``'selection'`` or global database 

1583 ``'global'``. 

1584 ''' 

1585 for source in self._sources: 

1586 source.remove_waveform_promises(self, from_database=from_database) 

1587 

1588 def update_responses(self, constraint=None, **kwargs): 

1589 if constraint is None: 

1590 constraint = client.Constraint(**kwargs) 

1591 

1592 for source in self._sources: 

1593 source.update_response_inventory(self, constraint) 

1594 

1595 def get_nfiles(self): 

1596 ''' 

1597 Get number of files in selection. 

1598 ''' 

1599 

1600 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1601 for row in self._conn.execute(sql): 

1602 return row[0] 

1603 

1604 def get_nnuts(self): 

1605 ''' 

1606 Get number of nuts in selection. 

1607 ''' 

1608 

1609 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1610 for row in self._conn.execute(sql): 

1611 return row[0] 

1612 

1613 def get_total_size(self): 

1614 ''' 

1615 Get aggregated file size available in selection. 

1616 ''' 

1617 

1618 sql = self._sql(''' 

1619 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1620 INNER JOIN files 

1621 ON %(db)s.%(file_states)s.file_id = files.file_id 

1622 ''') 

1623 

1624 for row in self._conn.execute(sql): 

1625 return row[0] or 0 

1626 

1627 def get_stats(self): 

1628 ''' 

1629 Get statistics on contents available through this selection. 

1630 ''' 

1631 

1632 kinds = self.get_kinds() 

1633 time_spans = {} 

1634 for kind in kinds: 

1635 time_spans[kind] = self.get_time_span([kind]) 

1636 

1637 return SquirrelStats( 

1638 nfiles=self.get_nfiles(), 

1639 nnuts=self.get_nnuts(), 

1640 kinds=kinds, 

1641 codes=self.get_codes(), 

1642 total_size=self.get_total_size(), 

1643 counts=self.get_counts(), 

1644 time_spans=time_spans, 

1645 sources=[s.describe() for s in self._sources], 

1646 operators=[op.describe() for op in self._operators]) 

1647 

1648 @filldocs 

1649 def check( 

1650 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1651 ignore=[]): 

1652 ''' 

1653 Check for common data/metadata problems. 

1654 

1655 %(query_args)s 

1656 

1657 :param ignore: 

1658 Problem types to be ignored. 

1659 :type ignore: 

1660 :class:`list` of :class:`str` 

1661 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1662 

1663 :returns: 

1664 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1665 containing the results of the check. 

1666 

1667 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1668 ''' 

1669 

1670 from .check import do_check 

1671 tmin, tmax, codes = self._get_selection_args( 

1672 CHANNEL, obj, tmin, tmax, time, codes) 

1673 

1674 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1675 

1676 def get_content( 

1677 self, 

1678 nut, 

1679 cache_id='default', 

1680 accessor_id='default', 

1681 show_progress=False, 

1682 model='squirrel'): 

1683 

1684 ''' 

1685 Get and possibly load full content for a given index entry from file. 

1686 

1687 Loads the actual content objects (channel, station, waveform, ...) from 

1688 file. For efficiency, sibling content (all stuff in the same file 

1689 segment) will also be loaded as a side effect. The loaded contents are 

1690 cached in the Squirrel object. 

1691 ''' 

1692 

1693 content_cache = self._content_caches[cache_id] 

1694 if not content_cache.has(nut): 

1695 

1696 for nut_loaded in io.iload( 

1697 nut.file_path, 

1698 segment=nut.file_segment, 

1699 format=nut.file_format, 

1700 database=self._database, 

1701 update_selection=self, 

1702 show_progress=show_progress): 

1703 

1704 content_cache.put(nut_loaded) 

1705 

1706 try: 

1707 return content_cache.get(nut, accessor_id, model) 

1708 

1709 except KeyError: 

1710 raise error.NotAvailable( 

1711 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1712 

1713 def advance_accessor(self, accessor_id='default', cache_id=None): 

1714 ''' 

1715 Notify memory caches about consumer moving to a new data batch. 

1716 

1717 :param accessor_id: 

1718 Name of accessing consumer to be advanced. 

1719 :type accessor_id: 

1720 str 

1721 

1722 :param cache_id: 

1723 Name of cache to for which the accessor should be advanced. By 

1724 default the named accessor is advanced in all registered caches. 

1725 By default, two caches named ``'default'`` and ``'waveform'`` are 

1726 available. 

1727 :type cache_id: 

1728 str 

1729 

1730 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1731 Squirrel's memory caching works and can be tuned. Default behaviour is 

1732 to release data when it has not been used in the latest data 

1733 window/batch. If the accessor is never advanced, data is cached 

1734 indefinitely - which is often desired e.g. for station meta-data. 

1735 Methods for consecutive data traversal, like 

1736 :py:meth:`chopper_waveforms` automatically advance and clear 

1737 their accessor. 

1738 ''' 

1739 for cache_ in ( 

1740 self._content_caches.keys() 

1741 if cache_id is None 

1742 else [cache_id]): 

1743 

1744 self._content_caches[cache_].advance_accessor(accessor_id) 

1745 

1746 def clear_accessor(self, accessor_id, cache_id=None): 

1747 ''' 

1748 Notify memory caches about a consumer having finished. 

1749 

1750 :param accessor_id: 

1751 Name of accessor to be cleared. 

1752 :type accessor_id: 

1753 str 

1754 

1755 :param cache_id: 

1756 Name of cache for which the accessor should be cleared. By default 

1757 the named accessor is cleared from all registered caches. By 

1758 default, two caches named ``'default'`` and ``'waveform'`` are 

1759 available. 

1760 :type cache_id: 

1761 str 

1762 

1763 Calling this method clears all references to cache entries held by the 

1764 named accessor. Cache entries are then freed if not referenced by any 

1765 other accessor. 

1766 ''' 

1767 

1768 for cache_ in ( 

1769 self._content_caches.keys() 

1770 if cache_id is None 

1771 else [cache_id]): 

1772 

1773 self._content_caches[cache_].clear_accessor(accessor_id) 

1774 

1775 def get_cache_stats(self, cache_id): 

1776 return self._content_caches[cache_id].get_stats() 

1777 

1778 @filldocs 

1779 def get_stations( 

1780 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1781 model='squirrel', on_error='raise'): 

1782 

1783 ''' 

1784 Get stations matching given constraints. 

1785 

1786 %(query_args)s 

1787 

1788 :param model: 

1789 Select object model for returned values: ``'squirrel'`` to get 

1790 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1791 objects with channel information attached. 

1792 :type model: 

1793 str 

1794 

1795 :returns: 

1796 List of :py:class:`pyrocko.squirrel.Station 

1797 <pyrocko.squirrel.model.Station>` objects by default or list of 

1798 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1799 objects if ``model='pyrocko'`` is requested. 

1800 

1801 See :py:meth:`iter_nuts` for details on time span matching. 

1802 ''' 

1803 

1804 if model == 'pyrocko': 

1805 return self._get_pyrocko_stations( 

1806 obj, tmin, tmax, time, codes, on_error=on_error) 

1807 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1808 args = self._get_selection_args( 

1809 STATION, obj, tmin, tmax, time, codes) 

1810 

1811 nuts = sorted( 

1812 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1813 

1814 return [self.get_content(nut, model=model) for nut in nuts] 

1815 else: 

1816 raise ValueError('Invalid station model: %s' % model) 

1817 

1818 @filldocs 

1819 def get_channels( 

1820 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1821 model='squirrel'): 

1822 

1823 ''' 

1824 Get channels matching given constraints. 

1825 

1826 %(query_args)s 

1827 

1828 :returns: 

1829 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1830 

1831 See :py:meth:`iter_nuts` for details on time span matching. 

1832 ''' 

1833 

1834 args = self._get_selection_args( 

1835 CHANNEL, obj, tmin, tmax, time, codes) 

1836 

1837 nuts = sorted( 

1838 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1839 

1840 return [self.get_content(nut, model=model) for nut in nuts] 

1841 

1842 @filldocs 

1843 def get_sensors( 

1844 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1845 

1846 ''' 

1847 Get sensors matching given constraints. 

1848 

1849 %(query_args)s 

1850 

1851 :returns: 

1852 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1853 

1854 See :py:meth:`iter_nuts` for details on time span matching. 

1855 ''' 

1856 

1857 tmin, tmax, codes = self._get_selection_args( 

1858 CHANNEL, obj, tmin, tmax, time, codes) 

1859 

1860 if codes is not None: 

1861 codes = codes_patterns_list( 

1862 (entry.replace(channel=entry.channel[:-1] + '?') 

1863 if entry.channel != '*' else entry) 

1864 for entry in codes) 

1865 

1866 nuts = sorted( 

1867 self.iter_nuts( 

1868 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1869 

1870 return [ 

1871 sensor for sensor in model.Sensor.from_channels( 

1872 self.get_content(nut) for nut in nuts) 

1873 if match_time_span(tmin, tmax, sensor)] 

1874 

1875 @filldocs 

1876 def get_responses( 

1877 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1878 model='squirrel'): 

1879 

1880 ''' 

1881 Get instrument responses matching given constraints. 

1882 

1883 %(query_args)s 

1884 

1885 :param model: 

1886 Select data model for returned objects. Choices: ``'squirrel'``, 

1887 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1888 :type model: 

1889 str 

1890 

1891 :returns: 

1892 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1893 'squirrel'`` or list of 

1894 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` 

1895 if ``model == 'stationxml'`` or list of 

1896 (:py:class:`~pyrocko.squirrel.model.Response`, 

1897 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1898 'stationxml+'``. 

1899 

1900 See :py:meth:`iter_nuts` for details on time span matching. 

1901 ''' 

1902 

1903 args = self._get_selection_args( 

1904 RESPONSE, obj, tmin, tmax, time, codes) 

1905 

1906 nuts = sorted( 

1907 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1908 

1909 return [self.get_content(nut, model=model) for nut in nuts] 

1910 

1911 @filldocs 

1912 def get_response( 

1913 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1914 model='squirrel', on_duplicate='raise'): 

1915 

1916 ''' 

1917 Get instrument response matching given constraints. 

1918 

1919 %(query_args)s 

1920 

1921 :param model: 

1922 Select data model for returned object. Choices: ``'squirrel'``, 

1923 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1924 :type model: 

1925 str 

1926 

1927 :param on_duplicate: 

1928 Determines how duplicates/multiple matching responses are handled. 

1929 Choices: ``'raise'`` - raise 

1930 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1931 warning and return first match, ``'ignore'`` - silently return 

1932 first match. 

1933 :type on_duplicate: 

1934 str 

1935 

1936 :returns: 

1937 :py:class:`~pyrocko.squirrel.model.Response` if 

1938 ``model == 'squirrel'`` or 

1939 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model == 

1940 'stationxml'`` or 

1941 (:py:class:`~pyrocko.squirrel.model.Response`, 

1942 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1943 'stationxml+'``. 

1944 

1945 Same as :py:meth:`get_responses` but returning exactly one response. 

1946 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1947 available. Duplicates are handled according to the ``on_duplicate`` 

1948 argument. 

1949 

1950 See :py:meth:`iter_nuts` for details on time span matching. 

1951 ''' 

1952 

1953 if model == 'stationxml': 

1954 model_ = 'stationxml+' 

1955 else: 

1956 model_ = model 

1957 

1958 responses = self.get_responses( 

1959 obj, tmin, tmax, time, codes, model=model_) 

1960 if len(responses) == 0: 

1961 raise error.NotAvailable( 

1962 'No instrument response available (%s).' 

1963 % self._get_selection_args_str( 

1964 RESPONSE, obj, tmin, tmax, time, codes)) 

1965 

1966 elif len(responses) > 1: 

1967 

1968 if on_duplicate in ('raise', 'warn'): 

1969 if model_ == 'squirrel': 

1970 resps_sq = responses 

1971 elif model_ == 'stationxml+': 

1972 resps_sq = [resp[0] for resp in responses] 

1973 else: 

1974 raise ValueError('Invalid response model: %s' % model) 

1975 

1976 rinfo = ':\n' + '\n'.join( 

1977 ' ' + resp.summary for resp in resps_sq) 

1978 

1979 message = \ 

1980 'Multiple instrument responses matching given ' \ 

1981 'constraints (%s)%s%s' % ( 

1982 self._get_selection_args_str( 

1983 RESPONSE, obj, tmin, tmax, time, codes), 

1984 ' -> using first' if on_duplicate == 'warn' else '', 

1985 rinfo) 

1986 

1987 if on_duplicate == 'raise': 

1988 raise error.Duplicate(message) 

1989 

1990 elif on_duplicate == 'warn': 

1991 logger.warning(message) 

1992 

1993 elif on_duplicate == 'ignore': 

1994 pass 

1995 

1996 else: 

1997 ValueError( 

1998 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1999 

2000 if model == 'stationxml': 

2001 return responses[0][1] 

2002 else: 

2003 return responses[0] 

2004 

2005 @filldocs 

2006 def get_events( 

2007 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2008 

2009 ''' 

2010 Get events matching given constraints. 

2011 

2012 %(query_args)s 

2013 

2014 :returns: 

2015 List of :py:class:`~pyrocko.model.event.Event` objects. 

2016 

2017 See :py:meth:`iter_nuts` for details on time span matching. 

2018 ''' 

2019 

2020 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2021 nuts = sorted( 

2022 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2023 

2024 return [self.get_content(nut) for nut in nuts] 

2025 

2026 def _redeem_promises(self, *args, order_only=False): 

2027 

2028 def split_promise(order, tmax=None): 

2029 self._split_nuts( 

2030 'waveform_promise', 

2031 order.tmin, tmax if tmax is not None else order.tmax, 

2032 codes=order.codes, 

2033 path=order.source_id) 

2034 

2035 tmin, tmax = args[:2] 

2036 

2037 waveforms = list(self.iter_nuts('waveform', *args)) 

2038 promises = list(self.iter_nuts('waveform_promise', *args)) 

2039 

2040 codes_to_avail = defaultdict(list) 

2041 for nut in waveforms: 

2042 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2043 

2044 def tts(x): 

2045 if isinstance(x, tuple): 

2046 return tuple(tts(e) for e in x) 

2047 elif isinstance(x, list): 

2048 return list(tts(e) for e in x) 

2049 else: 

2050 return util.time_to_str(x) 

2051 

2052 now = time.time() 

2053 orders = [] 

2054 for promise in promises: 

2055 waveforms_avail = codes_to_avail[promise.codes] 

2056 for block_tmin, block_tmax in blocks( 

2057 max(tmin, promise.tmin), 

2058 min(tmax, promise.tmax), 

2059 promise.deltat): 

2060 

2061 if block_tmin > now: 

2062 continue 

2063 

2064 orders.append( 

2065 WaveformOrder( 

2066 source_id=promise.file_path, 

2067 codes=promise.codes, 

2068 tmin=block_tmin, 

2069 tmax=block_tmax, 

2070 deltat=promise.deltat, 

2071 gaps=gaps(waveforms_avail, block_tmin, block_tmax), 

2072 time_created=now)) 

2073 

2074 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2075 

2076 order_keys_noop = set(order_key(order) for order in orders_noop) 

2077 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2078 logger.info( 

2079 'Waveform orders already satisified with cached/local data: ' 

2080 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2081 

2082 for order in orders_noop: 

2083 split_promise(order) 

2084 

2085 if order_only: 

2086 if orders: 

2087 self._pending_orders.extend(orders) 

2088 logger.info( 

2089 'Enqueuing %i waveform order%s.' 

2090 % len_plural(orders)) 

2091 return 

2092 else: 

2093 if self._pending_orders: 

2094 orders.extend(self._pending_orders) 

2095 logger.info( 

2096 'Adding %i previously enqueued order%s.' 

2097 % len_plural(self._pending_orders)) 

2098 

2099 self._pending_orders = [] 

2100 

2101 source_ids = [] 

2102 sources = {} 

2103 for source in self._sources: 

2104 if isinstance(source, fdsn.FDSNSource): 

2105 source_ids.append(source._source_id) 

2106 sources[source._source_id] = source 

2107 

2108 source_priority = dict( 

2109 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2110 

2111 order_groups = defaultdict(list) 

2112 for order in orders: 

2113 order_groups[order_key(order)].append(order) 

2114 

2115 for k, order_group in order_groups.items(): 

2116 order_group.sort( 

2117 key=lambda order: source_priority[order.source_id]) 

2118 

2119 n_order_groups = len(order_groups) 

2120 

2121 if len(order_groups) != 0 or len(orders) != 0: 

2122 logger.info( 

2123 'Waveform orders standing for download: %i (%i)' 

2124 % (len(order_groups), len(orders))) 

2125 

2126 task = make_task('Waveform orders processed', n_order_groups) 

2127 else: 

2128 task = None 

2129 

2130 def release_order_group(order): 

2131 okey = order_key(order) 

2132 for followup in order_groups[okey]: 

2133 if followup is not order: 

2134 split_promise(followup) 

2135 

2136 del order_groups[okey] 

2137 

2138 if task: 

2139 task.update(n_order_groups - len(order_groups)) 

2140 

2141 def noop(order): 

2142 pass 

2143 

2144 def success(order, trs): 

2145 release_order_group(order) 

2146 if order.is_near_real_time(): 

2147 if not trs: 

2148 return # keep promise when no data received at real time 

2149 else: 

2150 tmax = max(tr.tmax+tr.deltat for tr in trs) 

2151 tmax = order.tmin \ 

2152 + round((tmax - order.tmin) / order.deltat) \ 

2153 * order.deltat 

2154 split_promise(order, tmax) 

2155 else: 

2156 split_promise(order) 

2157 

2158 def batch_add(paths): 

2159 self.add(paths) 

2160 

2161 calls = queue.Queue() 

2162 

2163 def enqueue(f): 

2164 def wrapper(*args): 

2165 calls.put((f, args)) 

2166 

2167 return wrapper 

2168 

2169 while order_groups: 

2170 

2171 orders_now = [] 

2172 empty = [] 

2173 for k, order_group in order_groups.items(): 

2174 try: 

2175 orders_now.append(order_group.pop(0)) 

2176 except IndexError: 

2177 empty.append(k) 

2178 

2179 for k in empty: 

2180 del order_groups[k] 

2181 

2182 by_source_id = defaultdict(list) 

2183 for order in orders_now: 

2184 by_source_id[order.source_id].append(order) 

2185 

2186 threads = [] 

2187 for source_id in by_source_id: 

2188 def download(): 

2189 try: 

2190 sources[source_id].download_waveforms( 

2191 by_source_id[source_id], 

2192 success=enqueue(success), 

2193 error_permanent=enqueue(split_promise), 

2194 error_temporary=noop, 

2195 batch_add=enqueue(batch_add)) 

2196 

2197 finally: 

2198 calls.put(None) 

2199 

2200 if len(by_source_id) > 1: 

2201 thread = threading.Thread(target=download) 

2202 thread.start() 

2203 threads.append(thread) 

2204 else: 

2205 download() 

2206 calls.put(None) 

2207 

2208 ndone = 0 

2209 while ndone < len(by_source_id): 

2210 ret = calls.get() 

2211 if ret is None: 

2212 ndone += 1 

2213 else: 

2214 ret[0](*ret[1]) 

2215 

2216 for thread in threads: 

2217 thread.join() 

2218 

2219 if task: 

2220 task.update(n_order_groups - len(order_groups)) 

2221 

2222 if task: 

2223 task.done() 

2224 

2225 @filldocs 

2226 def get_waveform_nuts( 

2227 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2228 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2229 order_only=False): 

2230 

2231 ''' 

2232 Get waveform content entities matching given constraints. 

2233 

2234 %(query_args)s 

2235 

2236 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2237 resolves matching waveform promises (downloads waveforms from remote 

2238 sources). 

2239 

2240 See :py:meth:`iter_nuts` for details on time span matching. 

2241 ''' 

2242 

2243 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2244 

2245 if self.downloads_enabled: 

2246 self._redeem_promises( 

2247 *args, 

2248 codes_exclude, 

2249 sample_rate_min, 

2250 sample_rate_max, 

2251 order_only=order_only) 

2252 

2253 nuts = sorted( 

2254 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2255 

2256 return nuts 

2257 

2258 @filldocs 

2259 def have_waveforms( 

2260 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2261 

2262 ''' 

2263 Check if any waveforms or waveform promises are available for given 

2264 constraints. 

2265 

2266 %(query_args)s 

2267 ''' 

2268 

2269 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2270 return bool(list( 

2271 self.iter_nuts('waveform', *args, limit=1))) \ 

2272 or (self.downloads_enabled and bool(list( 

2273 self.iter_nuts('waveform_promise', *args, limit=1)))) 

2274 

2275 @filldocs 

2276 def get_waveforms( 

2277 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2278 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2279 uncut=False, want_incomplete=True, degap=True, 

2280 maxgap=5, maxlap=None, snap=None, include_last=False, 

2281 load_data=True, accessor_id='default', operator_params=None, 

2282 order_only=False, channel_priorities=None): 

2283 

2284 ''' 

2285 Get waveforms matching given constraints. 

2286 

2287 %(query_args)s 

2288 

2289 :param sample_rate_min: 

2290 Consider only waveforms with a sampling rate equal to or greater 

2291 than the given value [Hz]. 

2292 :type sample_rate_min: 

2293 float 

2294 

2295 :param sample_rate_max: 

2296 Consider only waveforms with a sampling rate equal to or less than 

2297 the given value [Hz]. 

2298 :type sample_rate_max: 

2299 float 

2300 

2301 :param uncut: 

2302 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2303 and to disable degapping/deoverlapping. Returns untouched traces as 

2304 they are read from file segment. File segments are always read in 

2305 their entirety. 

2306 :type uncut: 

2307 bool 

2308 

2309 :param want_incomplete: 

2310 If ``True``, gappy/incomplete traces are included in the result. 

2311 :type want_incomplete: 

2312 bool 

2313 

2314 :param degap: 

2315 If ``True``, connect traces and remove gaps and overlaps. 

2316 :type degap: 

2317 bool 

2318 

2319 :param maxgap: 

2320 Maximum gap size in samples which is filled with interpolated 

2321 samples when ``degap`` is ``True``. 

2322 :type maxgap: 

2323 int 

2324 

2325 :param maxlap: 

2326 Maximum overlap size in samples which is removed when ``degap`` is 

2327 ``True``. 

2328 :type maxlap: 

2329 int 

2330 

2331 :param snap: 

2332 Rounding functions used when computing sample index from time 

2333 instance, for trace start and trace end, respectively. By default, 

2334 ``(round, round)`` is used. 

2335 :type snap: 

2336 :py:class:`tuple` of 2 callables 

2337 

2338 :param include_last: 

2339 If ``True``, add one more sample to the returned traces (the sample 

2340 which would be the first sample of a query with ``tmin`` set to the 

2341 current value of ``tmax``). 

2342 :type include_last: 

2343 bool 

2344 

2345 :param load_data: 

2346 If ``True``, waveform data samples are read from files (or cache). 

2347 If ``False``, meta-information-only traces are returned (dummy 

2348 traces with no data samples). 

2349 :type load_data: 

2350 bool 

2351 

2352 :param accessor_id: 

2353 Name of consumer on who's behalf data is accessed. Used in cache 

2354 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2355 to distinguish different points of extraction for the decision of 

2356 when to release cached waveform data. Should be used when data is 

2357 alternately extracted from more than one region / selection. 

2358 :type accessor_id: 

2359 str 

2360 

2361 :param channel_priorities: 

2362 List of band/instrument code combinations to try. For example, 

2363 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2364 then fallback to ``BH?`` if these are not available. The first 

2365 matching waveforms are returned. Use in combination with 

2366 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2367 rate. 

2368 :type channel_priorities: 

2369 :py:class:`list` of :py:class:`str` 

2370 

2371 See :py:meth:`iter_nuts` for details on time span matching. 

2372 

2373 Loaded data is kept in memory (at least) until 

2374 :py:meth:`clear_accessor` has been called or 

2375 :py:meth:`advance_accessor` has been called two consecutive times 

2376 without data being accessed between the two calls (by this accessor). 

2377 Data may still be further kept in the memory cache if held alive by 

2378 consumers with a different ``accessor_id``. 

2379 ''' 

2380 

2381 tmin, tmax, codes = self._get_selection_args( 

2382 WAVEFORM, obj, tmin, tmax, time, codes) 

2383 

2384 if channel_priorities is not None: 

2385 return self._get_waveforms_prioritized( 

2386 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2387 sample_rate_min=sample_rate_min, 

2388 sample_rate_max=sample_rate_max, 

2389 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2390 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2391 include_last=include_last, load_data=load_data, 

2392 accessor_id=accessor_id, operator_params=operator_params, 

2393 order_only=order_only, channel_priorities=channel_priorities) 

2394 

2395 kinds = ['waveform'] 

2396 if self.downloads_enabled: 

2397 kinds.append('waveform_promise') 

2398 

2399 self_tmin, self_tmax = self.get_time_span(kinds) 

2400 

2401 if None in (self_tmin, self_tmax): 

2402 logger.warning( 

2403 'No waveforms available.') 

2404 return [] 

2405 

2406 tmin = tmin if tmin is not None else self_tmin 

2407 tmax = tmax if tmax is not None else self_tmax 

2408 

2409 if codes is not None and len(codes) == 1: 

2410 # TODO: fix for multiple / mixed codes 

2411 operator = self.get_operator(codes[0]) 

2412 if operator is not None: 

2413 return operator.get_waveforms( 

2414 self, codes[0], 

2415 tmin=tmin, tmax=tmax, 

2416 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2417 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2418 include_last=include_last, load_data=load_data, 

2419 accessor_id=accessor_id, params=operator_params) 

2420 

2421 nuts = self.get_waveform_nuts( 

2422 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2423 sample_rate_max, order_only=order_only) 

2424 

2425 if order_only: 

2426 return [] 

2427 

2428 if load_data: 

2429 traces = [ 

2430 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2431 

2432 else: 

2433 traces = [ 

2434 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2435 

2436 if uncut: 

2437 return traces 

2438 

2439 if snap is None: 

2440 snap = (round, round) 

2441 

2442 chopped = [] 

2443 for tr in traces: 

2444 if not load_data and tr.ydata is not None: 

2445 tr = tr.copy(data=False) 

2446 tr.ydata = None 

2447 

2448 try: 

2449 chopped.append(tr.chop( 

2450 tmin, tmax, 

2451 inplace=False, 

2452 snap=snap, 

2453 include_last=include_last)) 

2454 

2455 except trace.NoData: 

2456 pass 

2457 

2458 processed = self._process_chopped( 

2459 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2460 

2461 return processed 

2462 

2463 def _get_waveforms_prioritized( 

2464 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2465 channel_priorities=None, **kwargs): 

2466 

2467 trs_all = [] 

2468 codes_have = set() 

2469 for channel in channel_priorities: 

2470 assert len(channel) == 2 

2471 if codes is not None: 

2472 codes_now = [ 

2473 codes_.replace(channel=channel+'?') for codes_ in codes] 

2474 else: 

2475 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2476 

2477 codes_exclude_now = list(set( 

2478 codes_.replace(channel=channel+codes_.channel[-1]) 

2479 for codes_ in codes_have)) 

2480 

2481 if codes_exclude: 

2482 codes_exclude_now.extend(codes_exclude) 

2483 

2484 trs = self.get_waveforms( 

2485 tmin=tmin, 

2486 tmax=tmax, 

2487 codes=codes_now, 

2488 codes_exclude=codes_exclude_now, 

2489 **kwargs) 

2490 

2491 codes_have.update(set(tr.codes for tr in trs)) 

2492 trs_all.extend(trs) 

2493 

2494 return trs_all 

2495 

2496 @filldocs 

2497 def chopper_waveforms( 

2498 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2499 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2500 tinc=None, tpad=0., 

2501 want_incomplete=True, snap_window=False, 

2502 degap=True, maxgap=5, maxlap=None, 

2503 snap=None, include_last=False, load_data=True, 

2504 accessor_id=None, clear_accessor=True, operator_params=None, 

2505 grouping=None, channel_priorities=None): 

2506 

2507 ''' 

2508 Iterate window-wise over waveform archive. 

2509 

2510 %(query_args)s 

2511 

2512 :param tinc: 

2513 Time increment (window shift time) (default uses ``tmax-tmin``). 

2514 :type tinc: 

2515 :py:func:`~pyrocko.util.get_time_float` 

2516 

2517 :param tpad: 

2518 Padding time appended on either side of the data window (window 

2519 overlap is ``2*tpad``). 

2520 :type tpad: 

2521 :py:func:`~pyrocko.util.get_time_float` 

2522 

2523 :param want_incomplete: 

2524 If ``True``, gappy/incomplete traces are included in the result. 

2525 :type want_incomplete: 

2526 bool 

2527 

2528 :param snap_window: 

2529 If ``True``, start time windows at multiples of tinc with respect 

2530 to system time zero. 

2531 :type snap_window: 

2532 bool 

2533 

2534 :param degap: 

2535 If ``True``, connect traces and remove gaps and overlaps. 

2536 :type degap: 

2537 bool 

2538 

2539 :param maxgap: 

2540 Maximum gap size in samples which is filled with interpolated 

2541 samples when ``degap`` is ``True``. 

2542 :type maxgap: 

2543 int 

2544 

2545 :param maxlap: 

2546 Maximum overlap size in samples which is removed when ``degap`` is 

2547 ``True``. 

2548 :type maxlap: 

2549 int 

2550 

2551 :param snap: 

2552 Rounding functions used when computing sample index from time 

2553 instance, for trace start and trace end, respectively. By default, 

2554 ``(round, round)`` is used. 

2555 :type snap: 

2556 :py:class:`tuple` of 2 callables 

2557 

2558 :param include_last: 

2559 If ``True``, add one more sample to the returned traces (the sample 

2560 which would be the first sample of a query with ``tmin`` set to the 

2561 current value of ``tmax``). 

2562 :type include_last: 

2563 bool 

2564 

2565 :param load_data: 

2566 If ``True``, waveform data samples are read from files (or cache). 

2567 If ``False``, meta-information-only traces are returned (dummy 

2568 traces with no data samples). 

2569 :type load_data: 

2570 bool 

2571 

2572 :param accessor_id: 

2573 Name of consumer on who's behalf data is accessed. Used in cache 

2574 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2575 to distinguish different points of extraction for the decision of 

2576 when to release cached waveform data. Should be used when data is 

2577 alternately extracted from more than one region / selection. 

2578 :type accessor_id: 

2579 str 

2580 

2581 :param clear_accessor: 

2582 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2583 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2584 memory when the generator returns. 

2585 :type clear_accessor: 

2586 bool 

2587 

2588 :param grouping: 

2589 By default, traversal over the data is over time and all matching 

2590 traces of a time window are yielded. Using this option, it is 

2591 possible to traverse the data first by group (e.g. station or 

2592 network) and second by time. This can reduce the number of traces 

2593 in each batch and thus reduce the memory footprint of the process. 

2594 :type grouping: 

2595 :py:class:`~pyrocko.squirrel.operators.base.Grouping` 

2596 

2597 :yields: 

2598 For each extracted time window or waveform group a 

2599 :py:class:`Batch` object is yielded. 

2600 

2601 See :py:meth:`iter_nuts` for details on time span matching. 

2602 ''' 

2603 

2604 tmin, tmax, codes = self._get_selection_args( 

2605 WAVEFORM, obj, tmin, tmax, time, codes) 

2606 

2607 kinds = ['waveform'] 

2608 if self.downloads_enabled: 

2609 kinds.append('waveform_promise') 

2610 

2611 self_tmin, self_tmax = self.get_time_span(kinds) 

2612 

2613 if None in (self_tmin, self_tmax): 

2614 logger.warning( 

2615 'Content has undefined time span. No waveforms and no ' 

2616 'waveform promises?') 

2617 return 

2618 

2619 if snap_window and tinc is not None: 

2620 tmin = tmin if tmin is not None else self_tmin 

2621 tmax = tmax if tmax is not None else self_tmax 

2622 tmin = math.floor(tmin / tinc) * tinc 

2623 tmax = math.ceil(tmax / tinc) * tinc 

2624 else: 

2625 tmin = tmin if tmin is not None else self_tmin + tpad 

2626 tmax = tmax if tmax is not None else self_tmax - tpad 

2627 

2628 if tinc is None: 

2629 tinc = tmax - tmin 

2630 nwin = 1 

2631 elif tinc == 0.0: 

2632 nwin = 1 

2633 else: 

2634 eps = 1e-6 

2635 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1) 

2636 

2637 try: 

2638 if accessor_id is None: 

2639 accessor_id = 'chopper%i' % self._n_choppers_active 

2640 

2641 self._n_choppers_active += 1 

2642 

2643 if grouping is None: 

2644 codes_list = [codes] 

2645 else: 

2646 operator = Operator( 

2647 filtering=CodesPatternFiltering(codes=codes), 

2648 grouping=grouping) 

2649 

2650 available = set(self.get_codes(kind='waveform')) 

2651 if self.downloads_enabled: 

2652 available.update(self.get_codes(kind='waveform_promise')) 

2653 operator.update_mappings(sorted(available)) 

2654 

2655 codes_list = [ 

2656 codes_patterns_list(scl) 

2657 for scl in operator.iter_in_codes()] 

2658 

2659 ngroups = len(codes_list) 

2660 for igroup, scl in enumerate(codes_list): 

2661 for iwin in range(nwin): 

2662 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2663 

2664 chopped = self.get_waveforms( 

2665 tmin=wmin-tpad, 

2666 tmax=wmax+tpad, 

2667 codes=scl, 

2668 codes_exclude=codes_exclude, 

2669 sample_rate_min=sample_rate_min, 

2670 sample_rate_max=sample_rate_max, 

2671 snap=snap, 

2672 include_last=include_last, 

2673 load_data=load_data, 

2674 want_incomplete=want_incomplete, 

2675 degap=degap, 

2676 maxgap=maxgap, 

2677 maxlap=maxlap, 

2678 accessor_id=accessor_id, 

2679 operator_params=operator_params, 

2680 channel_priorities=channel_priorities) 

2681 

2682 self.advance_accessor(accessor_id) 

2683 

2684 yield Batch( 

2685 tmin=wmin, 

2686 tmax=wmax, 

2687 i=iwin, 

2688 n=nwin, 

2689 igroup=igroup, 

2690 ngroups=ngroups, 

2691 traces=chopped) 

2692 

2693 finally: 

2694 self._n_choppers_active -= 1 

2695 if clear_accessor: 

2696 self.clear_accessor(accessor_id, 'waveform') 

2697 

2698 def _process_chopped( 

2699 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2700 

2701 chopped.sort(key=lambda a: a.full_id) 

2702 if degap: 

2703 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2704 

2705 if not want_incomplete: 

2706 chopped_weeded = [] 

2707 for tr in chopped: 

2708 emin = tr.tmin - tmin 

2709 emax = tr.tmax + tr.deltat - tmax 

2710 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2711 chopped_weeded.append(tr) 

2712 

2713 elif degap: 

2714 if (0. < emin <= 5. * tr.deltat 

2715 and -5. * tr.deltat <= emax < 0.): 

2716 

2717 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2718 chopped_weeded.append(tr) 

2719 

2720 chopped = chopped_weeded 

2721 

2722 return chopped 

2723 

2724 def _get_pyrocko_stations( 

2725 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2726 on_error='raise'): 

2727 

2728 from pyrocko import model as pmodel 

2729 

2730 if codes is not None: 

2731 codes = codes_patterns_for_kind(STATION, codes) 

2732 

2733 by_nsl = defaultdict(lambda: (list(), list())) 

2734 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2735 sargs = station._get_pyrocko_station_args() 

2736 by_nsl[station.codes.nsl][0].append(sargs) 

2737 

2738 if codes is not None: 

2739 codes = [model.CodesNSLCE(c) for c in codes] 

2740 

2741 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2742 sargs = channel._get_pyrocko_station_args() 

2743 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2744 sargs_list.append(sargs) 

2745 channels_list.append(channel) 

2746 

2747 pstations = [] 

2748 nsls = list(by_nsl.keys()) 

2749 nsls.sort() 

2750 for nsl in nsls: 

2751 sargs_list, channels_list = by_nsl[nsl] 

2752 sargs = util.consistency_merge( 

2753 [('',) + x for x in sargs_list], 

2754 error=on_error) 

2755 

2756 by_c = defaultdict(list) 

2757 for ch in channels_list: 

2758 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2759 

2760 chas = list(by_c.keys()) 

2761 chas.sort() 

2762 pchannels = [] 

2763 for cha in chas: 

2764 list_of_cargs = by_c[cha] 

2765 cargs = util.consistency_merge( 

2766 [('',) + x for x in list_of_cargs], 

2767 error=on_error) 

2768 pchannels.append(pmodel.Channel(*cargs)) 

2769 

2770 pstations.append( 

2771 pmodel.Station(*sargs, channels=pchannels)) 

2772 

2773 return pstations 

2774 

2775 @property 

2776 def pile(self): 

2777 

2778 ''' 

2779 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2780 

2781 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2782 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2783 but uses the fluffy power of the Squirrel under the hood. 

2784 

2785 This interface can be used as a drop-in replacement for piles which are 

2786 used in existing scripts and programs for efficient waveform data 

2787 access. The Squirrel-based pile scales better for large datasets. Newer 

2788 scripts should use Squirrel's native methods to avoid the emulation 

2789 overhead. 

2790 ''' 

2791 from . import pile 

2792 

2793 if self._pile is None: 

2794 self._pile = pile.Pile(self) 

2795 

2796 return self._pile 

2797 

2798 def snuffle(self, **kwargs): 

2799 ''' 

2800 Look at dataset in Snuffler. 

2801 ''' 

2802 self.pile.snuffle(**kwargs) 

2803 

2804 def _gather_codes_keys(self, kind, gather, selector): 

2805 return set( 

2806 gather(codes) 

2807 for codes in self.iter_codes(kind) 

2808 if selector is None or selector(codes)) 

2809 

2810 def __str__(self): 

2811 return str(self.get_stats()) 

2812 

2813 def get_coverage( 

2814 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2815 

2816 ''' 

2817 Get coverage information. 

2818 

2819 Get information about strips of gapless data coverage. 

2820 

2821 :param kind: 

2822 Content kind to be queried. 

2823 :type kind: 

2824 str 

2825 

2826 :param tmin: 

2827 Start time of query interval. 

2828 :type tmin: 

2829 :py:func:`~pyrocko.util.get_time_float` 

2830 

2831 :param tmax: 

2832 End time of query interval. 

2833 :type tmax: 

2834 :py:func:`~pyrocko.util.get_time_float` 

2835 

2836 :param codes: 

2837 If given, restrict query to given content codes patterns. 

2838 :type codes: 

2839 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2840 objects appropriate for the queried content type, or anything which 

2841 can be converted to such objects. 

2842 

2843 :param limit: 

2844 Limit query to return only up to a given maximum number of entries 

2845 per matching time series (without setting this option, very gappy 

2846 data could cause the query to execute for a very long time). 

2847 :type limit: 

2848 int 

2849 

2850 :returns: 

2851 Information about time spans covered by the requested time series 

2852 data. 

2853 :rtype: 

2854 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage` 

2855 ''' 

2856 

2857 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2858 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2859 kind_id = to_kind_id(kind) 

2860 

2861 codes_info = list(self._iter_codes_info(kind=kind)) 

2862 

2863 kdata_all = [] 

2864 if codes is None: 

2865 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2866 kdata_all.append( 

2867 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2868 

2869 else: 

2870 for codes_entry in codes: 

2871 pattern = to_codes(kind_id, codes_entry) 

2872 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2873 if model.match_codes(pattern, codes_entry): 

2874 kdata_all.append( 

2875 (pattern, kind_codes_id, codes_entry, deltat)) 

2876 

2877 kind_codes_ids = [x[1] for x in kdata_all] 

2878 

2879 counts_at_tmin = {} 

2880 if tmin is not None: 

2881 for nut in self.iter_nuts( 

2882 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2883 

2884 k = nut.codes, nut.deltat 

2885 if k not in counts_at_tmin: 

2886 counts_at_tmin[k] = 0 

2887 

2888 counts_at_tmin[k] += 1 

2889 

2890 coverages = [] 

2891 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2892 entry = [pattern, codes_entry, deltat, None, None, []] 

2893 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2894 sql = self._sql(''' 

2895 SELECT 

2896 time_seconds, 

2897 time_offset 

2898 FROM %(db)s.%(coverage)s 

2899 WHERE 

2900 kind_codes_id == ? 

2901 ORDER BY 

2902 kind_codes_id ''' + order + ''', 

2903 time_seconds ''' + order + ''', 

2904 time_offset ''' + order + ''' 

2905 LIMIT 1 

2906 ''') 

2907 

2908 for row in self._conn.execute(sql, [kind_codes_id]): 

2909 entry[3+i] = model.tjoin(row[0], row[1]) 

2910 

2911 if None in entry[3:5]: 

2912 continue 

2913 

2914 args = [kind_codes_id] 

2915 

2916 sql_time = '' 

2917 if tmin is not None: 

2918 # intentionally < because (== tmin) is queried from nuts 

2919 sql_time += ' AND ( ? < time_seconds ' \ 

2920 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2921 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2922 

2923 if tmax is not None: 

2924 sql_time += ' AND ( time_seconds < ? ' \ 

2925 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2926 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2927 

2928 sql_limit = '' 

2929 if limit is not None: 

2930 sql_limit = ' LIMIT ?' 

2931 args.append(limit) 

2932 

2933 sql = self._sql(''' 

2934 SELECT 

2935 time_seconds, 

2936 time_offset, 

2937 step 

2938 FROM %(db)s.%(coverage)s 

2939 WHERE 

2940 kind_codes_id == ? 

2941 ''' + sql_time + ''' 

2942 ORDER BY 

2943 kind_codes_id, 

2944 time_seconds, 

2945 time_offset 

2946 ''' + sql_limit) 

2947 

2948 rows = list(self._conn.execute(sql, args)) 

2949 

2950 if limit is not None and len(rows) == limit: 

2951 entry[-1] = None 

2952 else: 

2953 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2954 tlast = None 

2955 if tmin is not None: 

2956 entry[-1].append((tmin, counts)) 

2957 tlast = tmin 

2958 

2959 for row in rows: 

2960 t = model.tjoin(row[0], row[1]) 

2961 counts += row[2] 

2962 entry[-1].append((t, counts)) 

2963 tlast = t 

2964 

2965 if tmax is not None and (tlast is None or tlast != tmax): 

2966 entry[-1].append((tmax, counts)) 

2967 

2968 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2969 

2970 return coverages 

2971 

2972 def get_stationxml( 

2973 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2974 level='response', on_error='raise'): 

2975 

2976 ''' 

2977 Get station/channel/response metadata in StationXML representation. 

2978 

2979 %(query_args)s 

2980 

2981 :returns: 

2982 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2983 ''' 

2984 

2985 if level not in ('network', 'station', 'channel', 'response'): 

2986 raise ValueError('Invalid level: %s' % level) 

2987 

2988 tmin, tmax, codes = self._get_selection_args( 

2989 CHANNEL, obj, tmin, tmax, time, codes) 

2990 

2991 def tts(t): 

2992 if t is None: 

2993 return '<none>' 

2994 else: 

2995 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2996 

2997 if on_error == 'ignore': 

2998 def handle_error(exc): 

2999 pass 

3000 

3001 elif on_error == 'warn': 

3002 def handle_error(exc): 

3003 logger.warning(str(exc)) 

3004 

3005 elif on_error == 'raise': 

3006 def handle_error(exc): 

3007 raise exc 

3008 

3009 def use_first(node_type_name, codes, k, group): 

3010 if on_error == 'warn': 

3011 logger.warning( 

3012 'Duplicates for %s %s, %s - %s -> using first' % ( 

3013 node_type_name, 

3014 '.'.join(codes), 

3015 tts(k[0]), tts(k[1]))) 

3016 

3017 return group[0] 

3018 

3019 def deduplicate(node_type_name, codes, nodes): 

3020 groups = defaultdict(list) 

3021 for node in nodes: 

3022 k = (node.start_date, node.end_date) 

3023 groups[k].append(node) 

3024 

3025 return [ 

3026 use_first(node_type_name, codes, k, group) 

3027 for (k, group) in groups.items()] 

3028 

3029 filtering = CodesPatternFiltering(codes=codes) 

3030 

3031 nslcs = list(set( 

3032 codes.nslc for codes in 

3033 filtering.filter(self.get_codes(kind='channel')))) 

3034 

3035 from pyrocko.io import stationxml as sx 

3036 

3037 networks = [] 

3038 for net, stas in prefix_tree(nslcs): 

3039 network = sx.Network(code=net) 

3040 networks.append(network) 

3041 

3042 if level not in ('station', 'channel', 'response'): 

3043 continue 

3044 

3045 for sta, locs in stas: 

3046 stations = self.get_stations( 

3047 tmin=tmin, 

3048 tmax=tmax, 

3049 codes=(net, sta, '*'), 

3050 model='stationxml') 

3051 

3052 if on_error != 'raise': 

3053 stations = deduplicate( 

3054 'Station', (net, sta), stations) 

3055 

3056 errors = sx.check_overlaps( 

3057 'Station', (net, sta), stations) 

3058 

3059 if errors: 

3060 handle_error(error.Duplicate( 

3061 'Overlapping/duplicate station info:\n %s' 

3062 % '\n '.join(errors))) 

3063 

3064 network.station_list.extend(stations) 

3065 

3066 if level not in ('channel', 'response'): 

3067 continue 

3068 

3069 for loc, chas in locs: 

3070 for cha, _ in chas: 

3071 channels = self.get_channels( 

3072 tmin=tmin, 

3073 tmax=tmax, 

3074 codes=(net, sta, loc, cha), 

3075 model='stationxml') 

3076 

3077 if on_error != 'raise': 

3078 channels = deduplicate( 

3079 'Channel', (net, sta, loc, cha), channels) 

3080 

3081 errors = sx.check_overlaps( 

3082 'Channel', (net, sta, loc, cha), channels) 

3083 

3084 if errors: 

3085 handle_error(error.Duplicate( 

3086 'Overlapping/duplicate channel info:\n %s' 

3087 % '\n '.join(errors))) 

3088 

3089 for channel in channels: 

3090 station = sx.find_containing(stations, channel) 

3091 if station is not None: 

3092 station.channel_list.append(channel) 

3093 else: 

3094 handle_error(error.NotAvailable( 

3095 'No station or station epoch found ' 

3096 'for channel: %s' % '.'.join( 

3097 (net, sta, loc, cha)))) 

3098 

3099 continue 

3100 

3101 if level != 'response': 

3102 continue 

3103 

3104 try: 

3105 response_sq, response_sx = self.get_response( 

3106 codes=(net, sta, loc, cha), 

3107 tmin=channel.start_date, 

3108 tmax=channel.end_date, 

3109 model='stationxml+', 

3110 on_duplicate=on_error) 

3111 

3112 except error.NotAvailable as e: 

3113 handle_error(e) 

3114 continue 

3115 

3116 if not ( 

3117 sx.eq_open( 

3118 channel.start_date, response_sq.tmin) 

3119 and sx.eq_open( 

3120 channel.end_date, response_sq.tmax)): 

3121 

3122 handle_error(error.Inconsistencies( 

3123 'Response time span does not match ' 

3124 'channel time span: %s' % '.'.join( 

3125 (net, sta, loc, cha)))) 

3126 

3127 channel.response = response_sx 

3128 

3129 return sx.FDSNStationXML( 

3130 source='Generated by Pyrocko Squirrel.', 

3131 network_list=networks) 

3132 

3133 def add_operator(self, op): 

3134 self._operators.append(op) 

3135 

3136 def update_operator_mappings(self): 

3137 available = self.get_codes(kind=('channel')) 

3138 

3139 for operator in self._operators: 

3140 operator.update_mappings(available, self._operator_registry) 

3141 

3142 def iter_operator_mappings(self): 

3143 for operator in self._operators: 

3144 for in_codes, out_codes in operator.iter_mappings(): 

3145 yield operator, in_codes, out_codes 

3146 

3147 def get_operator_mappings(self): 

3148 return list(self.iter_operator_mappings()) 

3149 

3150 def get_operator(self, codes): 

3151 try: 

3152 return self._operator_registry[codes][0] 

3153 except KeyError: 

3154 return None 

3155 

3156 def get_operator_group(self, codes): 

3157 try: 

3158 return self._operator_registry[codes] 

3159 except KeyError: 

3160 return None, (None, None, None) 

3161 

3162 def iter_operator_codes(self): 

3163 for _, _, out_codes in self.iter_operator_mappings(): 

3164 for codes in out_codes: 

3165 yield codes 

3166 

3167 def get_operator_codes(self): 

3168 return list(self.iter_operator_codes()) 

3169 

3170 def print_tables(self, table_names=None, stream=None): 

3171 ''' 

3172 Dump raw database tables in textual form (for debugging purposes). 

3173 

3174 :param table_names: 

3175 Names of tables to be dumped or ``None`` to dump all. 

3176 :type table_names: 

3177 :py:class:`list` of :py:class:`str` 

3178 

3179 :param stream: 

3180 Open file or ``None`` to dump to standard output. 

3181 ''' 

3182 

3183 if stream is None: 

3184 stream = sys.stdout 

3185 

3186 if isinstance(table_names, str): 

3187 table_names = [table_names] 

3188 

3189 if table_names is None: 

3190 table_names = [ 

3191 'selection_file_states', 

3192 'selection_nuts', 

3193 'selection_kind_codes_count', 

3194 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3195 

3196 m = { 

3197 'selection_file_states': '%(db)s.%(file_states)s', 

3198 'selection_nuts': '%(db)s.%(nuts)s', 

3199 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3200 'files': 'files', 

3201 'nuts': 'nuts', 

3202 'kind_codes': 'kind_codes', 

3203 'kind_codes_count': 'kind_codes_count'} 

3204 

3205 for table_name in table_names: 

3206 self._database.print_table( 

3207 m[table_name] % self._names, stream=stream) 

3208 

3209 

3210class SquirrelStats(Object): 

3211 ''' 

3212 Container to hold statistics about contents available from a Squirrel. 

3213 

3214 See also :py:meth:`Squirrel.get_stats`. 

3215 ''' 

3216 

3217 nfiles = Int.T( 

3218 help='Number of files in selection.') 

3219 nnuts = Int.T( 

3220 help='Number of index nuts in selection.') 

3221 codes = List.T( 

3222 Tuple.T(content_t=String.T()), 

3223 help='Available code sequences in selection, e.g. ' 

3224 '(agency, network, station, location) for stations nuts.') 

3225 kinds = List.T( 

3226 String.T(), 

3227 help='Available content types in selection.') 

3228 total_size = Int.T( 

3229 help='Aggregated file size of files is selection.') 

3230 counts = Dict.T( 

3231 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3232 help='Breakdown of how many nuts of any content type and code ' 

3233 'sequence are available in selection, ``counts[kind][codes]``.') 

3234 time_spans = Dict.T( 

3235 String.T(), Tuple.T(content_t=Timestamp.T()), 

3236 help='Time spans by content type.') 

3237 sources = List.T( 

3238 String.T(), 

3239 help='Descriptions of attached sources.') 

3240 operators = List.T( 

3241 String.T(), 

3242 help='Descriptions of attached operators.') 

3243 

3244 def __str__(self): 

3245 kind_counts = dict( 

3246 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3247 

3248 scodes = model.codes_to_str_abbreviated(self.codes) 

3249 

3250 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3251 ' ' + s for s in self.sources) 

3252 

3253 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3254 ' ' + s for s in self.operators) 

3255 

3256 def stime(t): 

3257 return util.tts(t) if t is not None and t not in ( 

3258 model.g_tmin, model.g_tmax) else '<none>' 

3259 

3260 def stable(rows): 

3261 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3262 return '\n'.join( 

3263 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3264 for row in rows) 

3265 

3266 def indent(s): 

3267 return '\n'.join(' '+line for line in s.splitlines()) 

3268 

3269 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3270 kind + ':', 

3271 str(kind_counts[kind]), 

3272 stime(self.time_spans[kind][0]), 

3273 '-', 

3274 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3275 

3276 s = ''' 

3277Number of files: %i 

3278Total size of known files: %s 

3279Number of index nuts: %i 

3280Available content kinds: %s 

3281Available codes: %s 

3282Sources: %s 

3283Operators: %s''' % ( 

3284 self.nfiles, 

3285 util.human_bytesize(self.total_size), 

3286 self.nnuts, 

3287 stspans, scodes, ssources, soperators) 

3288 

3289 return s.lstrip() 

3290 

3291 

3292__all__ = [ 

3293 'Squirrel', 

3294 'SquirrelStats', 

3295]