Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%

878 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-02-27 10:58 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel main classes. 

8''' 

9 

10import sys 

11import os 

12import time 

13import math 

14import logging 

15import threading 

16import queue 

17from collections import defaultdict 

18 

19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

20from pyrocko import util, trace 

21from pyrocko import progress 

22from pyrocko.plot import nice_time_tick_inc_approx_secs 

23 

24from . import model, io, cache, dataset 

25 

26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

28 codes_patterns_for_kind 

29from .client import fdsn, catalog 

30from .selection import Selection, filldocs 

31from .database import abspath 

32from .operators.base import Operator, CodesPatternFiltering 

33from . import client, environment, error 

34 

35logger = logging.getLogger('psq.base') 

36 

37guts_prefix = 'squirrel' 

38 

39 

40def nonef(f, xs): 

41 xs_ = [x for x in xs if x is not None] 

42 if xs_: 

43 return f(xs_) 

44 else: 

45 return None 

46 

47 

48def make_task(*args): 

49 return progress.task(*args, logger=logger) 

50 

51 

52def lpick(condition, seq): 

53 ft = [], [] 

54 for ele in seq: 

55 ft[int(bool(condition(ele)))].append(ele) 

56 

57 return ft 

58 

59 

60def len_plural(obj): 

61 return len(obj), '' if len(obj) == 1 else 's' 

62 

63 

64def blocks(tmin, tmax, deltat, nsamples_block=100000): 

65 tblock = nice_time_tick_inc_approx_secs( 

66 util.to_time_float(deltat * nsamples_block)) 

67 iblock_min = int(math.floor(tmin / tblock)) 

68 iblock_max = int(math.ceil(tmax / tblock)) 

69 for iblock in range(iblock_min, iblock_max): 

70 yield iblock * tblock, (iblock+1) * tblock 

71 

72 

73def gaps(avail, tmin, tmax): 

74 assert tmin < tmax 

75 

76 data = [(tmax, 1), (tmin, -1)] 

77 for (tmin_a, tmax_a) in avail: 

78 assert tmin_a < tmax_a 

79 data.append((tmin_a, 1)) 

80 data.append((tmax_a, -1)) 

81 

82 data.sort() 

83 s = 1 

84 gaps = [] 

85 tmin_g = None 

86 for t, x in data: 

87 if s == 1 and x == -1: 

88 tmin_g = t 

89 elif s == 0 and x == 1 and tmin_g is not None: 

90 tmax_g = t 

91 if tmin_g != tmax_g: 

92 gaps.append((tmin_g, tmax_g)) 

93 

94 s += x 

95 

96 return gaps 

97 

98 

99def order_key(order): 

100 return (order.codes, order.tmin, order.tmax) 

101 

102 

103def _is_exact(pat): 

104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

105 

106 

107def prefix_tree(tups): 

108 if not tups: 

109 return [] 

110 

111 if len(tups[0]) == 1: 

112 return sorted((tup[0], []) for tup in tups) 

113 

114 d = defaultdict(list) 

115 for tup in tups: 

116 d[tup[0]].append(tup[1:]) 

117 

118 sub = [] 

119 for k in sorted(d.keys()): 

120 sub.append((k, prefix_tree(d[k]))) 

121 

122 return sub 

123 

124 

125def match_time_span(tmin, tmax, obj): 

126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

127 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

128 

129 

130class Batch(object): 

131 ''' 

132 Batch of waveforms from window-wise data extraction. 

133 

134 Encapsulates state and results yielded for each window in window-wise 

135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

136 

137 *Attributes:* 

138 

139 .. py:attribute:: tmin 

140 

141 Start of this time window. 

142 

143 .. py:attribute:: tmax 

144 

145 End of this time window. 

146 

147 .. py:attribute:: tpad 

148 

149 Padding time length. 

150 

151 .. py:attribute:: i 

152 

153 Index of this time window in sequence. 

154 

155 .. py:attribute:: n 

156 

157 Total number of time windows in sequence. 

158 

159 .. py:attribute:: igroup 

160 

161 Index of this time window's sequence group. 

162 

163 .. py:attribute:: ngroups 

164 

165 Total number of sequence groups. 

166 

167 .. py:attribute:: traces 

168 

169 Extracted waveforms for this time window. 

170 ''' 

171 

172 def __init__(self, tmin, tmax, tpad, i, n, igroup, ngroups, traces): 

173 self.tmin = tmin 

174 self.tmax = tmax 

175 self.tpad = tpad 

176 self.i = i 

177 self.n = n 

178 self.igroup = igroup 

179 self.ngroups = ngroups 

180 self.traces = traces 

181 

182 def as_multitrace(self): 

183 from pyrocko import multitrace 

184 

185 data, codes, tmin, deltat = trace.merge_traces_data_as_array( 

186 self.traces, tmin=self.tmin-self.tpad, tmax=self.tmax+self.tpad) 

187 

188 return multitrace.MultiTrace( 

189 data=data, 

190 codes=codes, 

191 tmin=tmin, 

192 deltat=deltat) 

193 

194 

195class Squirrel(Selection): 

196 ''' 

197 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

198 

199 :param env: 

200 Squirrel environment instance or directory path to use as starting 

201 point for its detection. By default, the current directory is used as 

202 starting point. When searching for a usable environment the directory 

203 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

204 directory is used if it exists, otherwise the parent directories are 

205 search upwards for the existence of such a directory. If no such 

206 directory is found, the user's global Squirrel environment 

207 ``'$HOME/.pyrocko/squirrel'`` is used. 

208 :type env: 

209 :py:class:`~pyrocko.squirrel.environment.Environment` or 

210 :py:class:`str` 

211 

212 :param database: 

213 Database instance or path to database. By default the 

214 database found in the detected Squirrel environment is used. 

215 :type database: 

216 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

217 

218 :param cache_path: 

219 Directory path to use for data caching. By default, the ``'cache'`` 

220 directory in the detected Squirrel environment is used. 

221 :type cache_path: 

222 :py:class:`str` 

223 

224 :param persistent: 

225 If given a name, create a persistent selection. 

226 :type persistent: 

227 :py:class:`str` 

228 

229 This is the central class of the Squirrel framework. It provides a unified 

230 interface to query and access seismic waveforms, station meta-data and 

231 event information from local file collections and remote data sources. For 

232 prompt responses, a profound database setup is used under the hood. To 

233 speed up assemblage of ad-hoc data selections, files are indexed on first 

234 use and the extracted meta-data is remembered in the database for 

235 subsequent accesses. Bulk data is lazily loaded from disk and remote 

236 sources, just when requested. Once loaded, data is cached in memory to 

237 expedite typical access patterns. Files and data sources can be dynamically 

238 added to and removed from the Squirrel selection at runtime. 

239 

240 Queries are restricted to the contents of the files currently added to the 

241 Squirrel selection (usually a subset of the file meta-information 

242 collection in the database). This list of files is referred to here as the 

243 "selection". By default, temporary tables are created in the attached 

244 database to hold the names of the files in the selection as well as various 

245 indices and counters. These tables are only visible inside the application 

246 which created them and are deleted when the database connection is closed 

247 or the application exits. To create a selection which is not deleted at 

248 exit, supply a name to the ``persistent`` argument of the Squirrel 

249 constructor. Persistent selections are shared among applications using the 

250 same database. 

251 

252 **Method summary** 

253 

254 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

255 :py:class:`~pyrocko.squirrel.selection.Selection`. 

256 

257 .. autosummary:: 

258 

259 ~Squirrel.add 

260 ~Squirrel.add_source 

261 ~Squirrel.add_fdsn 

262 ~Squirrel.add_catalog 

263 ~Squirrel.add_dataset 

264 ~Squirrel.add_virtual 

265 ~Squirrel.update 

266 ~Squirrel.update_waveform_promises 

267 ~Squirrel.advance_accessor 

268 ~Squirrel.clear_accessor 

269 ~Squirrel.reload 

270 ~pyrocko.squirrel.selection.Selection.iter_paths 

271 ~Squirrel.iter_nuts 

272 ~Squirrel.iter_kinds 

273 ~Squirrel.iter_deltats 

274 ~Squirrel.iter_codes 

275 ~pyrocko.squirrel.selection.Selection.get_paths 

276 ~Squirrel.get_nuts 

277 ~Squirrel.get_kinds 

278 ~Squirrel.get_deltats 

279 ~Squirrel.get_codes 

280 ~Squirrel.get_counts 

281 ~Squirrel.get_time_span 

282 ~Squirrel.get_deltat_span 

283 ~Squirrel.get_nfiles 

284 ~Squirrel.get_nnuts 

285 ~Squirrel.get_total_size 

286 ~Squirrel.get_stats 

287 ~Squirrel.get_content 

288 ~Squirrel.get_stations 

289 ~Squirrel.get_channels 

290 ~Squirrel.get_responses 

291 ~Squirrel.get_events 

292 ~Squirrel.get_waveform_nuts 

293 ~Squirrel.get_waveforms 

294 ~Squirrel.chopper_waveforms 

295 ~Squirrel.get_coverage 

296 ~Squirrel.pile 

297 ~Squirrel.snuffle 

298 ~Squirrel.glob_codes 

299 ~pyrocko.squirrel.selection.Selection.get_database 

300 ~Squirrel.print_tables 

301 ''' 

302 

303 def __init__( 

304 self, env=None, database=None, cache_path=None, persistent=None): 

305 

306 if not isinstance(env, environment.Environment): 

307 env = environment.get_environment(env) 

308 

309 if database is None: 

310 database = env.expand_path(env.database_path) 

311 

312 if cache_path is None: 

313 cache_path = env.expand_path(env.cache_path) 

314 

315 if persistent is None: 

316 persistent = env.persistent 

317 

318 Selection.__init__( 

319 self, database=database, persistent=persistent) 

320 

321 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

322 

323 self._content_caches = { 

324 'waveform': cache.ContentCache(), 

325 'default': cache.ContentCache()} 

326 

327 self._cache_path = cache_path 

328 

329 self._sources = [] 

330 self._operators = [] 

331 self._operator_registry = {} 

332 

333 self._pending_orders = [] 

334 

335 self._pile = None 

336 self._n_choppers_active = 0 

337 

338 self.downloads_enabled = True 

339 

340 self._names.update({ 

341 'nuts': self.name + '_nuts', 

342 'kind_codes_count': self.name + '_kind_codes_count', 

343 'coverage': self.name + '_coverage'}) 

344 

345 with self.transaction('create tables') as cursor: 

346 self._create_tables_squirrel(cursor) 

347 

348 def _create_tables_squirrel(self, cursor): 

349 

350 cursor.execute(self._register_table(self._sql( 

351 ''' 

352 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

353 nut_id integer PRIMARY KEY, 

354 file_id integer, 

355 file_segment integer, 

356 file_element integer, 

357 kind_id integer, 

358 kind_codes_id integer, 

359 tmin_seconds integer, 

360 tmin_offset integer, 

361 tmax_seconds integer, 

362 tmax_offset integer, 

363 kscale integer) 

364 '''))) 

365 

366 cursor.execute(self._register_table(self._sql( 

367 ''' 

368 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

369 kind_codes_id integer PRIMARY KEY, 

370 count integer) 

371 '''))) 

372 

373 cursor.execute(self._sql( 

374 ''' 

375 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

376 ON %(nuts)s (file_id, file_segment, file_element) 

377 ''')) 

378 

379 cursor.execute(self._sql( 

380 ''' 

381 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

382 ON %(nuts)s (file_id) 

383 ''')) 

384 

385 cursor.execute(self._sql( 

386 ''' 

387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

388 ON %(nuts)s (kind_id, tmin_seconds) 

389 ''')) 

390 

391 cursor.execute(self._sql( 

392 ''' 

393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

394 ON %(nuts)s (kind_id, tmax_seconds) 

395 ''')) 

396 

397 cursor.execute(self._sql( 

398 ''' 

399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

400 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

401 ''')) 

402 

403 cursor.execute(self._sql( 

404 ''' 

405 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

406 BEFORE DELETE ON main.files FOR EACH ROW 

407 BEGIN 

408 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

409 END 

410 ''')) 

411 

412 # trigger only on size to make silent update of mtime possible 

413 cursor.execute(self._sql( 

414 ''' 

415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

416 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

417 BEGIN 

418 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

419 END 

420 ''')) 

421 

422 cursor.execute(self._sql( 

423 ''' 

424 CREATE TRIGGER IF NOT EXISTS 

425 %(db)s.%(file_states)s_delete_files 

426 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

427 BEGIN 

428 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

429 END 

430 ''')) 

431 

432 cursor.execute(self._sql( 

433 ''' 

434 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

435 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

436 BEGIN 

437 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

438 (new.kind_codes_id, 0); 

439 UPDATE %(kind_codes_count)s 

440 SET count = count + 1 

441 WHERE new.kind_codes_id 

442 == %(kind_codes_count)s.kind_codes_id; 

443 END 

444 ''')) 

445 

446 cursor.execute(self._sql( 

447 ''' 

448 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

449 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

450 BEGIN 

451 UPDATE %(kind_codes_count)s 

452 SET count = count - 1 

453 WHERE old.kind_codes_id 

454 == %(kind_codes_count)s.kind_codes_id; 

455 END 

456 ''')) 

457 

458 cursor.execute(self._register_table(self._sql( 

459 ''' 

460 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

461 kind_codes_id integer, 

462 time_seconds integer, 

463 time_offset integer, 

464 step integer) 

465 '''))) 

466 

467 cursor.execute(self._sql( 

468 ''' 

469 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

470 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

471 ''')) 

472 

473 cursor.execute(self._sql( 

474 ''' 

475 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

476 AFTER INSERT ON %(nuts)s FOR EACH ROW 

477 BEGIN 

478 INSERT OR IGNORE INTO %(coverage)s VALUES 

479 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

480 ; 

481 UPDATE %(coverage)s 

482 SET step = step + 1 

483 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

484 AND new.tmin_seconds == %(coverage)s.time_seconds 

485 AND new.tmin_offset == %(coverage)s.time_offset 

486 ; 

487 INSERT OR IGNORE INTO %(coverage)s VALUES 

488 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

489 ; 

490 UPDATE %(coverage)s 

491 SET step = step - 1 

492 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

493 AND new.tmax_seconds == %(coverage)s.time_seconds 

494 AND new.tmax_offset == %(coverage)s.time_offset 

495 ; 

496 DELETE FROM %(coverage)s 

497 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

498 AND new.tmin_seconds == %(coverage)s.time_seconds 

499 AND new.tmin_offset == %(coverage)s.time_offset 

500 AND step == 0 

501 ; 

502 DELETE FROM %(coverage)s 

503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

504 AND new.tmax_seconds == %(coverage)s.time_seconds 

505 AND new.tmax_offset == %(coverage)s.time_offset 

506 AND step == 0 

507 ; 

508 END 

509 ''')) 

510 

511 cursor.execute(self._sql( 

512 ''' 

513 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

514 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

515 BEGIN 

516 INSERT OR IGNORE INTO %(coverage)s VALUES 

517 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

518 ; 

519 UPDATE %(coverage)s 

520 SET step = step - 1 

521 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

522 AND old.tmin_seconds == %(coverage)s.time_seconds 

523 AND old.tmin_offset == %(coverage)s.time_offset 

524 ; 

525 INSERT OR IGNORE INTO %(coverage)s VALUES 

526 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

527 ; 

528 UPDATE %(coverage)s 

529 SET step = step + 1 

530 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

531 AND old.tmax_seconds == %(coverage)s.time_seconds 

532 AND old.tmax_offset == %(coverage)s.time_offset 

533 ; 

534 DELETE FROM %(coverage)s 

535 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

536 AND old.tmin_seconds == %(coverage)s.time_seconds 

537 AND old.tmin_offset == %(coverage)s.time_offset 

538 AND step == 0 

539 ; 

540 DELETE FROM %(coverage)s 

541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

542 AND old.tmax_seconds == %(coverage)s.time_seconds 

543 AND old.tmax_offset == %(coverage)s.time_offset 

544 AND step == 0 

545 ; 

546 END 

547 ''')) 

548 

549 def _delete(self): 

550 '''Delete database tables associated with this Squirrel.''' 

551 

552 with self.transaction('delete tables') as cursor: 

553 for s in ''' 

554 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

555 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

556 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

557 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

558 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

559 DROP TABLE %(db)s.%(nuts)s; 

560 DROP TABLE %(db)s.%(kind_codes_count)s; 

561 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

562 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

563 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

564 '''.strip().splitlines(): 

565 

566 cursor.execute(self._sql(s)) 

567 

568 Selection._delete(self) 

569 

570 @filldocs 

571 def add(self, 

572 paths, 

573 kinds=None, 

574 format='detect', 

575 include=None, 

576 exclude=None, 

577 check=True): 

578 

579 ''' 

580 Add files to the selection. 

581 

582 :param paths: 

583 Iterator yielding paths to files or directories to be added to the 

584 selection. Recurses into directories. If given a ``str``, it 

585 is treated as a single path to be added. 

586 :type paths: 

587 :py:class:`list` of :py:class:`str` 

588 

589 :param kinds: 

590 Content types to be made available through the Squirrel selection. 

591 By default, all known content types are accepted. 

592 :type kinds: 

593 :py:class:`list` of :py:class:`str` 

594 

595 :param format: 

596 File format identifier or ``'detect'`` to enable auto-detection 

597 (available: %(file_formats)s). 

598 :type format: 

599 str 

600 

601 :param include: 

602 If not ``None``, files are only included if their paths match the 

603 given regular expression pattern. 

604 :type format: 

605 str 

606 

607 :param exclude: 

608 If not ``None``, files are only included if their paths do not 

609 match the given regular expression pattern. 

610 :type format: 

611 str 

612 

613 :param check: 

614 If ``True``, all file modification times are checked to see if 

615 cached information has to be updated (slow). If ``False``, only 

616 previously unknown files are indexed and cached information is used 

617 for known files, regardless of file state (fast, corrresponds to 

618 Squirrel's ``--optimistic`` mode). File deletions will go 

619 undetected in the latter case. 

620 :type check: 

621 bool 

622 

623 :Complexity: 

624 O(log N) 

625 ''' 

626 

627 if isinstance(kinds, str): 

628 kinds = (kinds,) 

629 

630 if isinstance(paths, str): 

631 paths = [paths] 

632 

633 kind_mask = model.to_kind_mask(kinds) 

634 

635 Selection.add( 

636 self, util.iter_select_files( 

637 paths, 

638 show_progress=False, 

639 include=include, 

640 exclude=exclude, 

641 pass_through=lambda path: path.startswith('virtual:') 

642 ), kind_mask, format) 

643 

644 self._load(check) 

645 self._update_nuts() 

646 

647 def reload(self): 

648 ''' 

649 Check for modifications and reindex modified files. 

650 

651 Based on file modification times. 

652 ''' 

653 

654 self._set_file_states_force_check() 

655 self._load(check=True) 

656 self._update_nuts() 

657 

658 def add_virtual(self, nuts, virtual_paths=None): 

659 ''' 

660 Add content which is not backed by files. 

661 

662 :param nuts: 

663 Content pieces to be added. 

664 :type nuts: 

665 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

666 

667 :param virtual_paths: 

668 List of virtual paths to prevent creating a temporary list of the 

669 nuts while aggregating the file paths for the selection. 

670 :type virtual_paths: 

671 :py:class:`list` of :py:class:`str` 

672 

673 Stores to the main database and the selection. 

674 ''' 

675 

676 if isinstance(virtual_paths, str): 

677 virtual_paths = [virtual_paths] 

678 

679 if virtual_paths is None: 

680 if not isinstance(nuts, list): 

681 nuts = list(nuts) 

682 virtual_paths = set(nut.file_path for nut in nuts) 

683 

684 Selection.add(self, virtual_paths) 

685 self.get_database().dig(nuts) 

686 self._update_nuts() 

687 

688 def add_volatile(self, nuts): 

689 if not isinstance(nuts, list): 

690 nuts = list(nuts) 

691 

692 paths = list(set(nut.file_path for nut in nuts)) 

693 io.backends.virtual.add_nuts(nuts) 

694 self.add_virtual(nuts, paths) 

695 self._volatile_paths.extend(paths) 

696 

697 def add_volatile_waveforms(self, traces): 

698 ''' 

699 Add in-memory waveforms which will be removed when the app closes. 

700 ''' 

701 

702 name = model.random_name() 

703 

704 path = 'virtual:volatile:%s' % name 

705 

706 nuts = [] 

707 for itr, tr in enumerate(traces): 

708 assert tr.tmin <= tr.tmax 

709 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

710 tmax_seconds, tmax_offset = model.tsplit( 

711 tr.tmin + tr.data_len()*tr.deltat) 

712 

713 nuts.append(model.Nut( 

714 file_path=path, 

715 file_format='virtual', 

716 file_segment=itr, 

717 file_element=0, 

718 file_mtime=0, 

719 codes=tr.codes, 

720 tmin_seconds=tmin_seconds, 

721 tmin_offset=tmin_offset, 

722 tmax_seconds=tmax_seconds, 

723 tmax_offset=tmax_offset, 

724 deltat=tr.deltat, 

725 kind_id=to_kind_id('waveform'), 

726 content=tr)) 

727 

728 self.add_volatile(nuts) 

729 return path 

730 

731 def _load(self, check): 

732 for _ in io.iload( 

733 self, 

734 content=[], 

735 skip_unchanged=True, 

736 check=check): 

737 pass 

738 

739 def _update_nuts(self, transaction=None): 

740 transaction = transaction or self.transaction('update nuts') 

741 with make_task('Aggregating selection') as task, \ 

742 transaction as cursor: 

743 

744 self._conn.set_progress_handler(task.update, 100000) 

745 nrows = cursor.execute(self._sql( 

746 ''' 

747 INSERT INTO %(db)s.%(nuts)s 

748 SELECT NULL, 

749 nuts.file_id, nuts.file_segment, nuts.file_element, 

750 nuts.kind_id, nuts.kind_codes_id, 

751 nuts.tmin_seconds, nuts.tmin_offset, 

752 nuts.tmax_seconds, nuts.tmax_offset, 

753 nuts.kscale 

754 FROM %(db)s.%(file_states)s 

755 INNER JOIN nuts 

756 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

757 INNER JOIN kind_codes 

758 ON nuts.kind_codes_id == 

759 kind_codes.kind_codes_id 

760 WHERE %(db)s.%(file_states)s.file_state != 2 

761 AND (((1 << kind_codes.kind_id) 

762 & %(db)s.%(file_states)s.kind_mask) != 0) 

763 ''')).rowcount 

764 

765 task.update(nrows) 

766 self._set_file_states_known(transaction) 

767 self._conn.set_progress_handler(None, 0) 

768 

769 def add_source(self, source, check=True): 

770 ''' 

771 Add remote resource. 

772 

773 :param source: 

774 Remote data access client instance. 

775 :type source: 

776 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

777 ''' 

778 

779 self._sources.append(source) 

780 source.setup(self, check=check) 

781 

782 def add_fdsn(self, *args, **kwargs): 

783 ''' 

784 Add FDSN site for transparent remote data access. 

785 

786 Arguments are passed to 

787 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

788 ''' 

789 

790 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

791 

792 def add_catalog(self, *args, **kwargs): 

793 ''' 

794 Add online catalog for transparent event data access. 

795 

796 Arguments are passed to 

797 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

798 ''' 

799 

800 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

801 

802 def add_dataset(self, ds, check=True): 

803 ''' 

804 Read dataset description from file and add its contents. 

805 

806 :param ds: 

807 Path to dataset description file, dataset description object 

808 or name of a built-in dataset. See 

809 :py:mod:`~pyrocko.squirrel.dataset`. 

810 :type ds: 

811 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

812 

813 :param check: 

814 If ``True``, all file modification times are checked to see if 

815 cached information has to be updated (slow). If ``False``, only 

816 previously unknown files are indexed and cached information is used 

817 for known files, regardless of file state (fast, corrresponds to 

818 Squirrel's ``--optimistic`` mode). File deletions will go 

819 undetected in the latter case. 

820 :type check: 

821 bool 

822 ''' 

823 if isinstance(ds, str): 

824 ds = dataset.read_dataset(ds) 

825 

826 ds.setup(self, check=check) 

827 

828 def _get_selection_args( 

829 self, kind_id, 

830 obj=None, tmin=None, tmax=None, time=None, codes=None): 

831 

832 if codes is not None: 

833 codes = codes_patterns_for_kind(kind_id, codes) 

834 

835 if time is not None: 

836 tmin = time 

837 tmax = time 

838 

839 if obj is not None: 

840 tmin = tmin if tmin is not None else obj.tmin 

841 tmax = tmax if tmax is not None else obj.tmax 

842 codes = codes if codes is not None else codes_patterns_for_kind( 

843 kind_id, obj.codes) 

844 

845 return tmin, tmax, codes 

846 

847 def _get_selection_args_str(self, *args, **kwargs): 

848 

849 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

850 return 'tmin: %s, tmax: %s, codes: %s' % ( 

851 util.time_to_str(tmin) if tmin is not None else 'none', 

852 util.time_to_str(tmax) if tmax is not None else 'none', 

853 ','.join(str(entry) for entry in codes)) 

854 

855 def _selection_args_to_kwargs( 

856 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

857 

858 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

859 

860 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

861 

862 tmin_seconds, tmin_offset = model.tsplit(tmin) 

863 tmax_seconds, tmax_offset = model.tsplit(tmax) 

864 if naiv: 

865 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

866 args.append(tmax_seconds) 

867 else: 

868 tscale_edges = model.tscale_edges 

869 tmin_cond = [] 

870 for kscale in range(tscale_edges.size + 1): 

871 if kscale != tscale_edges.size: 

872 tscale = int(tscale_edges[kscale]) 

873 tmin_cond.append(''' 

874 (%(db)s.%(nuts)s.kind_id = ? 

875 AND %(db)s.%(nuts)s.kscale == ? 

876 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

877 ''') 

878 args.extend( 

879 (to_kind_id(kind), kscale, 

880 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

881 

882 else: 

883 tmin_cond.append(''' 

884 (%(db)s.%(nuts)s.kind_id == ? 

885 AND %(db)s.%(nuts)s.kscale == ? 

886 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

887 ''') 

888 

889 args.extend( 

890 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

891 if tmin_cond: 

892 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

893 

894 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

895 args.append(tmin_seconds) 

896 

897 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

898 pats = codes_patterns_for_kind(kind_id, codes) 

899 if pats is None: 

900 return 

901 

902 pats_exact = [] 

903 pats_nonexact = [] 

904 for pat in pats: 

905 spat = pat.safe_str 

906 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

907 

908 codes_cond = [] 

909 if pats_exact: 

910 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

911 '?'*len(pats_exact))) 

912 

913 args.extend(pats_exact) 

914 

915 if pats_nonexact: 

916 codes_cond.append(' ( %s ) ' % ' OR '.join( 

917 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

918 

919 args.extend(pats_nonexact) 

920 

921 if codes_cond: 

922 cond.append('%s ( %s )' % ( 

923 'NOT' if not positive else '', 

924 ' OR '.join(codes_cond))) 

925 

926 def iter_nuts( 

927 self, kind=None, tmin=None, tmax=None, codes=None, 

928 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

929 naiv=False, kind_codes_ids=None, path=None, limit=None): 

930 

931 ''' 

932 Iterate over content entities matching given constraints. 

933 

934 :param kind: 

935 Content kind (or kinds) to extract. 

936 :type kind: 

937 :py:class:`str`, :py:class:`list` of :py:class:`str` 

938 

939 :param tmin: 

940 Start time of query interval. 

941 :type tmin: 

942 :py:func:`~pyrocko.util.get_time_float` 

943 

944 :param tmax: 

945 End time of query interval. 

946 :type tmax: 

947 :py:func:`~pyrocko.util.get_time_float` 

948 

949 :param codes: 

950 List of code patterns to query. 

951 :type codes: 

952 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

953 objects appropriate for the queried content type, or anything which 

954 can be converted to such objects. 

955 

956 :param naiv: 

957 Bypass time span lookup through indices (slow, for testing). 

958 :type naiv: 

959 :py:class:`bool` 

960 

961 :param kind_codes_ids: 

962 Kind-codes IDs of contents to be retrieved (internal use). 

963 :type kind_codes_ids: 

964 :py:class:`list` of :py:class:`int` 

965 

966 :yields: 

967 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

968 intersecting content. 

969 

970 :complexity: 

971 O(log N) for the time selection part due to heavy use of database 

972 indices. 

973 

974 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

975 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

976 closed-interval so that content intersecting with the time instant ``t 

977 = tmin = tmax`` is returned (otherwise nothing would be returned as 

978 ``[t, t)`` never matches anything). 

979 

980 Time spans of content entities to be matched are also treated as half 

981 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

982 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

983 modified to closed-interval when the content time span is an empty 

984 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

985 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

986 ''' 

987 

988 if not isinstance(kind, str): 

989 if kind is None: 

990 kind = model.g_content_kinds 

991 for kind_ in kind: 

992 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

993 yield nut 

994 

995 return 

996 

997 kind_id = to_kind_id(kind) 

998 

999 cond = [] 

1000 args = [] 

1001 if tmin is not None or tmax is not None: 

1002 assert kind is not None 

1003 if tmin is None: 

1004 tmin = self.get_time_span()[0] 

1005 if tmax is None: 

1006 tmax = self.get_time_span()[1] + 1.0 

1007 

1008 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

1009 

1010 cond.append('kind_codes.kind_id == ?') 

1011 args.append(kind_id) 

1012 

1013 if codes is not None: 

1014 self._codes_match_sql(True, kind_id, codes, cond, args) 

1015 

1016 if codes_exclude is not None: 

1017 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

1018 

1019 if sample_rate_min is not None: 

1020 cond.append('kind_codes.deltat <= ?') 

1021 args.append(1.0/sample_rate_min) 

1022 

1023 if sample_rate_max is not None: 

1024 cond.append('? <= kind_codes.deltat') 

1025 args.append(1.0/sample_rate_max) 

1026 

1027 if kind_codes_ids is not None: 

1028 cond.append( 

1029 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1030 '?'*len(kind_codes_ids))) 

1031 

1032 args.extend(kind_codes_ids) 

1033 

1034 db = self.get_database() 

1035 if path is not None: 

1036 cond.append('files.path == ?') 

1037 args.append(db.relpath(abspath(path))) 

1038 

1039 sql = (''' 

1040 SELECT 

1041 files.path, 

1042 files.format, 

1043 files.mtime, 

1044 files.size, 

1045 %(db)s.%(nuts)s.file_segment, 

1046 %(db)s.%(nuts)s.file_element, 

1047 kind_codes.kind_id, 

1048 kind_codes.codes, 

1049 %(db)s.%(nuts)s.tmin_seconds, 

1050 %(db)s.%(nuts)s.tmin_offset, 

1051 %(db)s.%(nuts)s.tmax_seconds, 

1052 %(db)s.%(nuts)s.tmax_offset, 

1053 kind_codes.deltat 

1054 FROM files 

1055 INNER JOIN %(db)s.%(nuts)s 

1056 ON files.file_id == %(db)s.%(nuts)s.file_id 

1057 INNER JOIN kind_codes 

1058 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1059 ''') 

1060 

1061 if cond: 

1062 sql += ''' WHERE ''' + ' AND '.join(cond) 

1063 

1064 if limit is not None: 

1065 sql += ''' LIMIT %i''' % limit 

1066 

1067 sql = self._sql(sql) 

1068 if tmin is None and tmax is None: 

1069 for row in self._conn.execute(sql, args): 

1070 row = (db.abspath(row[0]),) + row[1:] 

1071 nut = model.Nut(values_nocheck=row) 

1072 yield nut 

1073 else: 

1074 assert tmin is not None and tmax is not None 

1075 if tmin == tmax: 

1076 for row in self._conn.execute(sql, args): 

1077 row = (db.abspath(row[0]),) + row[1:] 

1078 nut = model.Nut(values_nocheck=row) 

1079 if (nut.tmin <= tmin < nut.tmax) \ 

1080 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1081 

1082 yield nut 

1083 else: 

1084 for row in self._conn.execute(sql, args): 

1085 row = (db.abspath(row[0]),) + row[1:] 

1086 nut = model.Nut(values_nocheck=row) 

1087 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1088 or (nut.tmin == nut.tmax 

1089 and tmin <= nut.tmin < tmax): 

1090 

1091 yield nut 

1092 

1093 def get_nuts(self, *args, **kwargs): 

1094 ''' 

1095 Get content entities matching given constraints. 

1096 

1097 Like :py:meth:`iter_nuts` but returns results as a list. 

1098 ''' 

1099 

1100 return list(self.iter_nuts(*args, **kwargs)) 

1101 

1102 def _split_nuts( 

1103 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1104 

1105 kind_id = to_kind_id(kind) 

1106 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1107 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1108 

1109 names_main_nuts = dict(self._names) 

1110 names_main_nuts.update(db='main', nuts='nuts') 

1111 

1112 db = self.get_database() 

1113 

1114 def main_nuts(s): 

1115 return s % names_main_nuts 

1116 

1117 with self.transaction('split nuts') as cursor: 

1118 # modify selection and main 

1119 for sql_subst in [ 

1120 self._sql, main_nuts]: 

1121 

1122 cond = [] 

1123 args = [] 

1124 

1125 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1126 

1127 if codes is not None: 

1128 self._codes_match_sql(True, kind_id, codes, cond, args) 

1129 

1130 if path is not None: 

1131 cond.append('files.path == ?') 

1132 args.append(db.relpath(abspath(path))) 

1133 

1134 sql = sql_subst(''' 

1135 SELECT 

1136 %(db)s.%(nuts)s.nut_id, 

1137 %(db)s.%(nuts)s.tmin_seconds, 

1138 %(db)s.%(nuts)s.tmin_offset, 

1139 %(db)s.%(nuts)s.tmax_seconds, 

1140 %(db)s.%(nuts)s.tmax_offset, 

1141 kind_codes.deltat 

1142 FROM files 

1143 INNER JOIN %(db)s.%(nuts)s 

1144 ON files.file_id == %(db)s.%(nuts)s.file_id 

1145 INNER JOIN kind_codes 

1146 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1147 WHERE ''' + ' AND '.join(cond)) # noqa 

1148 

1149 insert = [] 

1150 delete = [] 

1151 for row in cursor.execute(sql, args): 

1152 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1153 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1154 

1155 nut_tmin = model.tjoin( 

1156 nut_tmin_seconds, nut_tmin_offset) 

1157 nut_tmax = model.tjoin( 

1158 nut_tmax_seconds, nut_tmax_offset) 

1159 

1160 if nut_tmin < tmax and tmin < nut_tmax: 

1161 if nut_tmin < tmin: 

1162 insert.append(( 

1163 nut_tmin_seconds, nut_tmin_offset, 

1164 tmin_seconds, tmin_offset, 

1165 model.tscale_to_kscale( 

1166 tmin_seconds - nut_tmin_seconds), 

1167 nut_id)) 

1168 

1169 if tmax < nut_tmax: 

1170 insert.append(( 

1171 tmax_seconds, tmax_offset, 

1172 nut_tmax_seconds, nut_tmax_offset, 

1173 model.tscale_to_kscale( 

1174 nut_tmax_seconds - tmax_seconds), 

1175 nut_id)) 

1176 

1177 delete.append((nut_id,)) 

1178 

1179 sql_add = ''' 

1180 INSERT INTO %(db)s.%(nuts)s ( 

1181 file_id, file_segment, file_element, kind_id, 

1182 kind_codes_id, tmin_seconds, tmin_offset, 

1183 tmax_seconds, tmax_offset, kscale ) 

1184 SELECT 

1185 file_id, file_segment, file_element, 

1186 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1187 FROM %(db)s.%(nuts)s 

1188 WHERE nut_id == ? 

1189 ''' 

1190 cursor.executemany(sql_subst(sql_add), insert) 

1191 

1192 sql_delete = ''' 

1193 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1194 ''' 

1195 cursor.executemany(sql_subst(sql_delete), delete) 

1196 

1197 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1198 ''' 

1199 Get time interval over all content in selection. 

1200 

1201 :param kinds: 

1202 If not ``None``, restrict query to given content kinds. 

1203 :type kind: 

1204 list of str 

1205 

1206 :complexity: 

1207 O(1), independent of the number of nuts. 

1208 

1209 :returns: 

1210 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1211 ''' 

1212 

1213 sql_min = self._sql(''' 

1214 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1215 FROM %(db)s.%(nuts)s 

1216 WHERE kind_id == ? 

1217 AND tmin_seconds == ( 

1218 SELECT MIN(tmin_seconds) 

1219 FROM %(db)s.%(nuts)s 

1220 WHERE kind_id == ?) 

1221 ''') 

1222 

1223 sql_max = self._sql(''' 

1224 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1225 FROM %(db)s.%(nuts)s 

1226 WHERE kind_id == ? 

1227 AND tmax_seconds == ( 

1228 SELECT MAX(tmax_seconds) 

1229 FROM %(db)s.%(nuts)s 

1230 WHERE kind_id == ?) 

1231 ''') 

1232 

1233 gtmin = None 

1234 gtmax = None 

1235 

1236 if isinstance(kinds, str): 

1237 kinds = [kinds] 

1238 

1239 if kinds is None: 

1240 kind_ids = model.g_content_kind_ids 

1241 else: 

1242 kind_ids = model.to_kind_ids(kinds) 

1243 

1244 tmins = [] 

1245 tmaxs = [] 

1246 for kind_id in kind_ids: 

1247 for tmin_seconds, tmin_offset in self._conn.execute( 

1248 sql_min, (kind_id, kind_id)): 

1249 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1250 

1251 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1252 sql_max, (kind_id, kind_id)): 

1253 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1254 

1255 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1256 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1257 

1258 if tight: 

1259 gtmin = nonef(min, tmins) 

1260 gtmax = nonef(max, tmaxs) 

1261 else: 

1262 gtmin = None if None in tmins else nonef(min, tmins) 

1263 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1264 

1265 if dummy_limits: 

1266 if gtmin is None: 

1267 gtmin = model.g_tmin 

1268 if gtmax is None: 

1269 gtmax = model.g_tmax 

1270 

1271 return gtmin, gtmax 

1272 

1273 def has(self, kinds): 

1274 ''' 

1275 Check availability of given content kinds. 

1276 

1277 :param kinds: 

1278 Content kinds to query. 

1279 :type kind: 

1280 list of str 

1281 

1282 :returns: 

1283 ``True`` if any of the queried content kinds is available 

1284 in the selection. 

1285 ''' 

1286 self_tmin, self_tmax = self.get_time_span( 

1287 kinds, dummy_limits=False) 

1288 

1289 return None not in (self_tmin, self_tmax) 

1290 

1291 def get_deltat_span(self, kind): 

1292 ''' 

1293 Get min and max sampling interval of all content of given kind. 

1294 

1295 :param kind: 

1296 Content kind 

1297 :type kind: 

1298 str 

1299 

1300 :returns: ``(deltat_min, deltat_max)`` 

1301 ''' 

1302 

1303 deltats = [ 

1304 deltat for deltat in self.get_deltats(kind) 

1305 if deltat is not None] 

1306 

1307 if deltats: 

1308 return min(deltats), max(deltats) 

1309 else: 

1310 return None, None 

1311 

1312 def iter_kinds(self, codes=None): 

1313 ''' 

1314 Iterate over content types available in selection. 

1315 

1316 :param codes: 

1317 If given, get kinds only for selected codes identifier. 

1318 Only a single identifier may be given here and no pattern matching 

1319 is done, currently. 

1320 :type codes: 

1321 :py:class:`~pyrocko.squirrel.model.Codes` 

1322 

1323 :yields: 

1324 Available content kinds as :py:class:`str`. 

1325 

1326 :complexity: 

1327 O(1), independent of number of nuts. 

1328 ''' 

1329 

1330 return self._database._iter_kinds( 

1331 codes=codes, 

1332 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1333 

1334 def iter_deltats(self, kind=None): 

1335 ''' 

1336 Iterate over sampling intervals available in selection. 

1337 

1338 :param kind: 

1339 If given, get sampling intervals only for a given content type. 

1340 :type kind: 

1341 str 

1342 

1343 :yields: 

1344 :py:class:`float` values. 

1345 

1346 :complexity: 

1347 O(1), independent of number of nuts. 

1348 ''' 

1349 return self._database._iter_deltats( 

1350 kind=kind, 

1351 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1352 

1353 def iter_codes(self, kind=None): 

1354 ''' 

1355 Iterate over content identifier code sequences available in selection. 

1356 

1357 :param kind: 

1358 If given, get codes only for a given content type. 

1359 :type kind: 

1360 str 

1361 

1362 :yields: 

1363 :py:class:`tuple` of :py:class:`str` 

1364 

1365 :complexity: 

1366 O(1), independent of number of nuts. 

1367 ''' 

1368 return self._database._iter_codes( 

1369 kind=kind, 

1370 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1371 

1372 def _iter_codes_info(self, kind=None, codes=None): 

1373 ''' 

1374 Iterate over number of occurrences of any (kind, codes) combination. 

1375 

1376 :param kind: 

1377 If given, get counts only for selected content type. 

1378 :type kind: 

1379 str 

1380 

1381 :yields: 

1382 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1383 

1384 :complexity: 

1385 O(1), independent of number of nuts. 

1386 ''' 

1387 return self._database._iter_codes_info( 

1388 kind=kind, 

1389 codes=codes, 

1390 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1391 

1392 def get_kinds(self, codes=None): 

1393 ''' 

1394 Get content types available in selection. 

1395 

1396 :param codes: 

1397 If given, get kinds only for selected codes identifier. 

1398 Only a single identifier may be given here and no pattern matching 

1399 is done, currently. 

1400 :type codes: 

1401 :py:class:`~pyrocko.squirrel.model.Codes` 

1402 

1403 :returns: 

1404 Sorted list of available content types. 

1405 :rtype: 

1406 py:class:`list` of :py:class:`str` 

1407 

1408 :complexity: 

1409 O(1), independent of number of nuts. 

1410 

1411 ''' 

1412 return sorted(list(self.iter_kinds(codes=codes))) 

1413 

1414 def get_deltats(self, kind=None): 

1415 ''' 

1416 Get sampling intervals available in selection. 

1417 

1418 :param kind: 

1419 If given, get sampling intervals only for selected content type. 

1420 :type kind: 

1421 str 

1422 

1423 :complexity: 

1424 O(1), independent of number of nuts. 

1425 

1426 :returns: Sorted list of available sampling intervals. 

1427 ''' 

1428 return sorted(list(self.iter_deltats(kind=kind))) 

1429 

1430 def get_codes(self, kind=None): 

1431 ''' 

1432 Get identifier code sequences available in selection. 

1433 

1434 :param kind: 

1435 If given, get codes only for selected content type. 

1436 :type kind: 

1437 str 

1438 

1439 :complexity: 

1440 O(1), independent of number of nuts. 

1441 

1442 :returns: Sorted list of available codes as tuples of strings. 

1443 ''' 

1444 return sorted(list(self.iter_codes(kind=kind))) 

1445 

1446 def get_counts(self, kind=None): 

1447 ''' 

1448 Get number of occurrences of any (kind, codes) combination. 

1449 

1450 :param kind: 

1451 If given, get codes only for selected content type. 

1452 :type kind: 

1453 str 

1454 

1455 :complexity: 

1456 O(1), independent of number of nuts. 

1457 

1458 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1459 if kind is not ``None`` 

1460 ''' 

1461 d = {} 

1462 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1463 if kind_id not in d: 

1464 v = d[kind_id] = {} 

1465 else: 

1466 v = d[kind_id] 

1467 

1468 if codes not in v: 

1469 v[codes] = 0 

1470 

1471 v[codes] += count 

1472 

1473 if kind is not None: 

1474 return d[to_kind_id(kind)] 

1475 else: 

1476 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1477 

1478 def glob_codes(self, kind, codes): 

1479 ''' 

1480 Find codes matching given patterns. 

1481 

1482 :param kind: 

1483 Content kind to be queried. 

1484 :type kind: 

1485 str 

1486 

1487 :param codes: 

1488 List of code patterns to query. 

1489 :type codes: 

1490 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1491 objects appropriate for the queried content type, or anything which 

1492 can be converted to such objects. 

1493 

1494 :returns: 

1495 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1496 ''' 

1497 

1498 kind_id = to_kind_id(kind) 

1499 args = [kind_id] 

1500 pats = codes_patterns_for_kind(kind_id, codes) 

1501 

1502 if pats: 

1503 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1504 ('kind_codes.codes GLOB ?',) * len(pats)) 

1505 

1506 args.extend(pat.safe_str for pat in pats) 

1507 else: 

1508 codes_cond = '' 

1509 

1510 sql = self._sql(''' 

1511 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1512 WHERE 

1513 kind_id == ? ''' + codes_cond) 

1514 

1515 return list(map(list, self._conn.execute(sql, args))) 

1516 

1517 def update(self, constraint=None, **kwargs): 

1518 ''' 

1519 Update or partially update channel and event inventories. 

1520 

1521 :param constraint: 

1522 Selection of times or areas to be brought up to date. 

1523 :type constraint: 

1524 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1525 

1526 :param \\*\\*kwargs: 

1527 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1528 

1529 This function triggers all attached remote sources, to check for 

1530 updates in the meta-data. The sources will only submit queries when 

1531 their expiration date has passed, or if the selection spans into 

1532 previously unseen times or areas. 

1533 ''' 

1534 

1535 if constraint is None: 

1536 constraint = client.Constraint(**kwargs) 

1537 

1538 task = make_task('Updating sources') 

1539 for source in task(self._sources): 

1540 source.update_channel_inventory(self, constraint) 

1541 source.update_event_inventory(self, constraint) 

1542 

1543 def update_waveform_promises(self, constraint=None, **kwargs): 

1544 ''' 

1545 Permit downloading of remote waveforms. 

1546 

1547 :param constraint: 

1548 Remote waveforms compatible with the given constraint are enabled 

1549 for download. 

1550 :type constraint: 

1551 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1552 

1553 :param \\*\\*kwargs: 

1554 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1555 

1556 Calling this method permits Squirrel to download waveforms from remote 

1557 sources when processing subsequent waveform requests. This works by 

1558 inserting so called waveform promises into the database. It will look 

1559 into the available channels for each remote source and create a promise 

1560 for each channel compatible with the given constraint. If the promise 

1561 then matches in a waveform request, Squirrel tries to download the 

1562 waveform. If the download is successful, the downloaded waveform is 

1563 added to the Squirrel and the promise is deleted. If the download 

1564 fails, the promise is kept if the reason of failure looks like being 

1565 temporary, e.g. because of a network failure. If the cause of failure 

1566 however seems to be permanent, the promise is deleted so that no 

1567 further attempts are made to download a waveform which might not be 

1568 available from that server at all. To force re-scheduling after a 

1569 permanent failure, call :py:meth:`update_waveform_promises` 

1570 yet another time. 

1571 ''' 

1572 

1573 if constraint is None: 

1574 constraint = client.Constraint(**kwargs) 

1575 

1576 for source in self._sources: 

1577 source.update_waveform_promises(self, constraint) 

1578 

1579 def remove_waveform_promises(self, from_database='selection'): 

1580 ''' 

1581 Remove waveform promises from live selection or global database. 

1582 

1583 Calling this function removes all waveform promises provided by the 

1584 attached sources. 

1585 

1586 :param from_database: 

1587 Remove from live selection ``'selection'`` or global database 

1588 ``'global'``. 

1589 ''' 

1590 for source in self._sources: 

1591 source.remove_waveform_promises(self, from_database=from_database) 

1592 

1593 def update_responses(self, constraint=None, **kwargs): 

1594 if constraint is None: 

1595 constraint = client.Constraint(**kwargs) 

1596 

1597 for source in self._sources: 

1598 source.update_response_inventory(self, constraint) 

1599 

1600 def get_nfiles(self): 

1601 ''' 

1602 Get number of files in selection. 

1603 ''' 

1604 

1605 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1606 for row in self._conn.execute(sql): 

1607 return row[0] 

1608 

1609 def get_nnuts(self): 

1610 ''' 

1611 Get number of nuts in selection. 

1612 ''' 

1613 

1614 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1615 for row in self._conn.execute(sql): 

1616 return row[0] 

1617 

1618 def get_total_size(self): 

1619 ''' 

1620 Get aggregated file size available in selection. 

1621 ''' 

1622 

1623 sql = self._sql(''' 

1624 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1625 INNER JOIN files 

1626 ON %(db)s.%(file_states)s.file_id = files.file_id 

1627 ''') 

1628 

1629 for row in self._conn.execute(sql): 

1630 return row[0] or 0 

1631 

1632 def get_stats(self): 

1633 ''' 

1634 Get statistics on contents available through this selection. 

1635 ''' 

1636 

1637 kinds = self.get_kinds() 

1638 time_spans = {} 

1639 for kind in kinds: 

1640 time_spans[kind] = self.get_time_span([kind]) 

1641 

1642 return SquirrelStats( 

1643 nfiles=self.get_nfiles(), 

1644 nnuts=self.get_nnuts(), 

1645 kinds=kinds, 

1646 codes=self.get_codes(), 

1647 total_size=self.get_total_size(), 

1648 counts=self.get_counts(), 

1649 time_spans=time_spans, 

1650 sources=[s.describe() for s in self._sources], 

1651 operators=[op.describe() for op in self._operators]) 

1652 

1653 @filldocs 

1654 def check( 

1655 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1656 ignore=[]): 

1657 ''' 

1658 Check for common data/metadata problems. 

1659 

1660 %(query_args)s 

1661 

1662 :param ignore: 

1663 Problem types to be ignored. 

1664 :type ignore: 

1665 :class:`list` of :class:`str` 

1666 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1667 

1668 :returns: 

1669 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1670 containing the results of the check. 

1671 

1672 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1673 ''' 

1674 

1675 from .check import do_check 

1676 tmin, tmax, codes = self._get_selection_args( 

1677 CHANNEL, obj, tmin, tmax, time, codes) 

1678 

1679 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1680 

1681 def get_content( 

1682 self, 

1683 nut, 

1684 cache_id='default', 

1685 accessor_id='default', 

1686 show_progress=False, 

1687 model='squirrel'): 

1688 

1689 ''' 

1690 Get and possibly load full content for a given index entry from file. 

1691 

1692 Loads the actual content objects (channel, station, waveform, ...) from 

1693 file. For efficiency, sibling content (all stuff in the same file 

1694 segment) will also be loaded as a side effect. The loaded contents are 

1695 cached in the Squirrel object. 

1696 ''' 

1697 

1698 content_cache = self._content_caches[cache_id] 

1699 if not content_cache.has(nut): 

1700 

1701 for nut_loaded in io.iload( 

1702 nut.file_path, 

1703 segment=nut.file_segment, 

1704 format=nut.file_format, 

1705 database=self._database, 

1706 update_selection=self, 

1707 show_progress=show_progress): 

1708 

1709 content_cache.put(nut_loaded) 

1710 

1711 try: 

1712 return content_cache.get(nut, accessor_id, model) 

1713 

1714 except KeyError: 

1715 raise error.NotAvailable( 

1716 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1717 

1718 def advance_accessor(self, accessor_id='default', cache_id=None): 

1719 ''' 

1720 Notify memory caches about consumer moving to a new data batch. 

1721 

1722 :param accessor_id: 

1723 Name of accessing consumer to be advanced. 

1724 :type accessor_id: 

1725 str 

1726 

1727 :param cache_id: 

1728 Name of cache to for which the accessor should be advanced. By 

1729 default the named accessor is advanced in all registered caches. 

1730 By default, two caches named ``'default'`` and ``'waveform'`` are 

1731 available. 

1732 :type cache_id: 

1733 str 

1734 

1735 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1736 Squirrel's memory caching works and can be tuned. Default behaviour is 

1737 to release data when it has not been used in the latest data 

1738 window/batch. If the accessor is never advanced, data is cached 

1739 indefinitely - which is often desired e.g. for station meta-data. 

1740 Methods for consecutive data traversal, like 

1741 :py:meth:`chopper_waveforms` automatically advance and clear 

1742 their accessor. 

1743 ''' 

1744 for cache_ in ( 

1745 self._content_caches.keys() 

1746 if cache_id is None 

1747 else [cache_id]): 

1748 

1749 self._content_caches[cache_].advance_accessor(accessor_id) 

1750 

1751 def clear_accessor(self, accessor_id, cache_id=None): 

1752 ''' 

1753 Notify memory caches about a consumer having finished. 

1754 

1755 :param accessor_id: 

1756 Name of accessor to be cleared. 

1757 :type accessor_id: 

1758 str 

1759 

1760 :param cache_id: 

1761 Name of cache for which the accessor should be cleared. By default 

1762 the named accessor is cleared from all registered caches. By 

1763 default, two caches named ``'default'`` and ``'waveform'`` are 

1764 available. 

1765 :type cache_id: 

1766 str 

1767 

1768 Calling this method clears all references to cache entries held by the 

1769 named accessor. Cache entries are then freed if not referenced by any 

1770 other accessor. 

1771 ''' 

1772 

1773 for cache_ in ( 

1774 self._content_caches.keys() 

1775 if cache_id is None 

1776 else [cache_id]): 

1777 

1778 self._content_caches[cache_].clear_accessor(accessor_id) 

1779 

1780 def get_cache_stats(self, cache_id): 

1781 return self._content_caches[cache_id].get_stats() 

1782 

1783 @filldocs 

1784 def get_stations( 

1785 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1786 model='squirrel', on_error='raise'): 

1787 

1788 ''' 

1789 Get stations matching given constraints. 

1790 

1791 %(query_args)s 

1792 

1793 :param model: 

1794 Select object model for returned values: ``'squirrel'`` to get 

1795 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1796 objects with channel information attached. 

1797 :type model: 

1798 str 

1799 

1800 :returns: 

1801 List of :py:class:`pyrocko.squirrel.Station 

1802 <pyrocko.squirrel.model.Station>` objects by default or list of 

1803 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1804 objects if ``model='pyrocko'`` is requested. 

1805 

1806 See :py:meth:`iter_nuts` for details on time span matching. 

1807 ''' 

1808 

1809 if model == 'pyrocko': 

1810 return self._get_pyrocko_stations( 

1811 obj, tmin, tmax, time, codes, on_error=on_error) 

1812 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1813 args = self._get_selection_args( 

1814 STATION, obj, tmin, tmax, time, codes) 

1815 

1816 nuts = sorted( 

1817 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1818 

1819 return [self.get_content(nut, model=model) for nut in nuts] 

1820 else: 

1821 raise ValueError('Invalid station model: %s' % model) 

1822 

1823 @filldocs 

1824 def get_channels( 

1825 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1826 model='squirrel'): 

1827 

1828 ''' 

1829 Get channels matching given constraints. 

1830 

1831 %(query_args)s 

1832 

1833 :returns: 

1834 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1835 

1836 See :py:meth:`iter_nuts` for details on time span matching. 

1837 ''' 

1838 

1839 args = self._get_selection_args( 

1840 CHANNEL, obj, tmin, tmax, time, codes) 

1841 

1842 nuts = sorted( 

1843 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1844 

1845 return [self.get_content(nut, model=model) for nut in nuts] 

1846 

1847 @filldocs 

1848 def get_sensors( 

1849 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1850 

1851 ''' 

1852 Get sensors matching given constraints. 

1853 

1854 %(query_args)s 

1855 

1856 :returns: 

1857 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1858 

1859 See :py:meth:`iter_nuts` for details on time span matching. 

1860 ''' 

1861 

1862 tmin, tmax, codes = self._get_selection_args( 

1863 CHANNEL, obj, tmin, tmax, time, codes) 

1864 

1865 if codes is not None: 

1866 codes = codes_patterns_list( 

1867 (entry.replace(channel=entry.channel[:-1] + '?') 

1868 if entry.channel != '*' else entry) 

1869 for entry in codes) 

1870 

1871 nuts = sorted( 

1872 self.iter_nuts( 

1873 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1874 

1875 return [ 

1876 sensor for sensor in model.Sensor.from_channels( 

1877 self.get_content(nut) for nut in nuts) 

1878 if match_time_span(tmin, tmax, sensor)] 

1879 

1880 @filldocs 

1881 def get_responses( 

1882 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1883 model='squirrel'): 

1884 

1885 ''' 

1886 Get instrument responses matching given constraints. 

1887 

1888 %(query_args)s 

1889 

1890 :param model: 

1891 Select data model for returned objects. Choices: ``'squirrel'``, 

1892 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1893 :type model: 

1894 str 

1895 

1896 :returns: 

1897 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1898 'squirrel'`` or list of 

1899 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` 

1900 if ``model == 'stationxml'`` or list of 

1901 (:py:class:`~pyrocko.squirrel.model.Response`, 

1902 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1903 'stationxml+'``. 

1904 

1905 See :py:meth:`iter_nuts` for details on time span matching. 

1906 ''' 

1907 

1908 args = self._get_selection_args( 

1909 RESPONSE, obj, tmin, tmax, time, codes) 

1910 

1911 nuts = sorted( 

1912 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1913 

1914 return [self.get_content(nut, model=model) for nut in nuts] 

1915 

1916 @filldocs 

1917 def get_response( 

1918 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1919 model='squirrel', on_duplicate='raise'): 

1920 

1921 ''' 

1922 Get instrument response matching given constraints. 

1923 

1924 %(query_args)s 

1925 

1926 :param model: 

1927 Select data model for returned object. Choices: ``'squirrel'``, 

1928 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1929 :type model: 

1930 str 

1931 

1932 :param on_duplicate: 

1933 Determines how duplicates/multiple matching responses are handled. 

1934 Choices: ``'raise'`` - raise 

1935 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1936 warning and return first match, ``'ignore'`` - silently return 

1937 first match. 

1938 :type on_duplicate: 

1939 str 

1940 

1941 :returns: 

1942 :py:class:`~pyrocko.squirrel.model.Response` if 

1943 ``model == 'squirrel'`` or 

1944 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model == 

1945 'stationxml'`` or 

1946 (:py:class:`~pyrocko.squirrel.model.Response`, 

1947 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1948 'stationxml+'``. 

1949 

1950 Same as :py:meth:`get_responses` but returning exactly one response. 

1951 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1952 available. Duplicates are handled according to the ``on_duplicate`` 

1953 argument. 

1954 

1955 See :py:meth:`iter_nuts` for details on time span matching. 

1956 ''' 

1957 

1958 if model == 'stationxml': 

1959 model_ = 'stationxml+' 

1960 else: 

1961 model_ = model 

1962 

1963 responses = self.get_responses( 

1964 obj, tmin, tmax, time, codes, model=model_) 

1965 if len(responses) == 0: 

1966 raise error.NotAvailable( 

1967 'No instrument response available (%s).' 

1968 % self._get_selection_args_str( 

1969 RESPONSE, obj, tmin, tmax, time, codes)) 

1970 

1971 elif len(responses) > 1: 

1972 

1973 if on_duplicate in ('raise', 'warn'): 

1974 if model_ == 'squirrel': 

1975 resps_sq = responses 

1976 elif model_ == 'stationxml+': 

1977 resps_sq = [resp[0] for resp in responses] 

1978 else: 

1979 raise ValueError('Invalid response model: %s' % model) 

1980 

1981 rinfo = ':\n' + '\n'.join( 

1982 ' ' + resp.summary for resp in resps_sq) 

1983 

1984 message = \ 

1985 'Multiple instrument responses matching given ' \ 

1986 'constraints (%s)%s%s' % ( 

1987 self._get_selection_args_str( 

1988 RESPONSE, obj, tmin, tmax, time, codes), 

1989 ' -> using first' if on_duplicate == 'warn' else '', 

1990 rinfo) 

1991 

1992 if on_duplicate == 'raise': 

1993 raise error.Duplicate(message) 

1994 

1995 elif on_duplicate == 'warn': 

1996 logger.warning(message) 

1997 

1998 elif on_duplicate == 'ignore': 

1999 pass 

2000 

2001 else: 

2002 ValueError( 

2003 'Invalid argument for on_duplicate: %s' % on_duplicate) 

2004 

2005 if model == 'stationxml': 

2006 return responses[0][1] 

2007 else: 

2008 return responses[0] 

2009 

2010 @filldocs 

2011 def get_events( 

2012 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2013 

2014 ''' 

2015 Get events matching given constraints. 

2016 

2017 %(query_args)s 

2018 

2019 :returns: 

2020 List of :py:class:`~pyrocko.model.event.Event` objects. 

2021 

2022 See :py:meth:`iter_nuts` for details on time span matching. 

2023 ''' 

2024 

2025 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2026 nuts = sorted( 

2027 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2028 

2029 return [self.get_content(nut) for nut in nuts] 

2030 

2031 def _redeem_promises(self, *args, order_only=False): 

2032 

2033 def split_promise(order, tmax=None): 

2034 self._split_nuts( 

2035 'waveform_promise', 

2036 order.tmin, tmax if tmax is not None else order.tmax, 

2037 codes=order.codes, 

2038 path=order.source_id) 

2039 

2040 tmin, tmax = args[:2] 

2041 

2042 waveforms = list(self.iter_nuts('waveform', *args)) 

2043 promises = list(self.iter_nuts('waveform_promise', *args)) 

2044 

2045 codes_to_avail = defaultdict(list) 

2046 for nut in waveforms: 

2047 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2048 

2049 def tts(x): 

2050 if isinstance(x, tuple): 

2051 return tuple(tts(e) for e in x) 

2052 elif isinstance(x, list): 

2053 return list(tts(e) for e in x) 

2054 else: 

2055 return util.time_to_str(x) 

2056 

2057 now = time.time() 

2058 orders = [] 

2059 for promise in promises: 

2060 waveforms_avail = codes_to_avail[promise.codes] 

2061 for block_tmin, block_tmax in blocks( 

2062 max(tmin, promise.tmin), 

2063 min(tmax, promise.tmax), 

2064 promise.deltat): 

2065 

2066 if block_tmin > now: 

2067 continue 

2068 

2069 orders.append( 

2070 WaveformOrder( 

2071 source_id=promise.file_path, 

2072 codes=promise.codes, 

2073 tmin=block_tmin, 

2074 tmax=block_tmax, 

2075 deltat=promise.deltat, 

2076 gaps=gaps(waveforms_avail, block_tmin, block_tmax), 

2077 time_created=now)) 

2078 

2079 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2080 

2081 order_keys_noop = set(order_key(order) for order in orders_noop) 

2082 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2083 logger.info( 

2084 'Waveform orders already satisified with cached/local data: ' 

2085 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2086 

2087 for order in orders_noop: 

2088 split_promise(order) 

2089 

2090 if order_only: 

2091 if orders: 

2092 self._pending_orders.extend(orders) 

2093 logger.info( 

2094 'Enqueuing %i waveform order%s.' 

2095 % len_plural(orders)) 

2096 return 

2097 else: 

2098 if self._pending_orders: 

2099 orders.extend(self._pending_orders) 

2100 logger.info( 

2101 'Adding %i previously enqueued order%s.' 

2102 % len_plural(self._pending_orders)) 

2103 

2104 self._pending_orders = [] 

2105 

2106 source_ids = [] 

2107 sources = {} 

2108 for source in self._sources: 

2109 if isinstance(source, fdsn.FDSNSource): 

2110 source_ids.append(source._source_id) 

2111 sources[source._source_id] = source 

2112 

2113 source_priority = dict( 

2114 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2115 

2116 order_groups = defaultdict(list) 

2117 for order in orders: 

2118 order_groups[order_key(order)].append(order) 

2119 

2120 for k, order_group in order_groups.items(): 

2121 order_group.sort( 

2122 key=lambda order: source_priority[order.source_id]) 

2123 

2124 n_order_groups = len(order_groups) 

2125 

2126 if len(order_groups) != 0 or len(orders) != 0: 

2127 logger.info( 

2128 'Waveform orders standing for download: %i (%i)' 

2129 % (len(order_groups), len(orders))) 

2130 

2131 task = make_task('Waveform orders processed', n_order_groups) 

2132 else: 

2133 task = None 

2134 

2135 def release_order_group(order): 

2136 okey = order_key(order) 

2137 for followup in order_groups[okey]: 

2138 if followup is not order: 

2139 split_promise(followup) 

2140 

2141 del order_groups[okey] 

2142 

2143 if task: 

2144 task.update(n_order_groups - len(order_groups)) 

2145 

2146 def noop(order): 

2147 pass 

2148 

2149 def success(order, trs): 

2150 release_order_group(order) 

2151 if order.is_near_real_time(): 

2152 if not trs: 

2153 return # keep promise when no data received at real time 

2154 else: 

2155 tmax = max(tr.tmax+tr.deltat for tr in trs) 

2156 tmax = order.tmin \ 

2157 + round((tmax - order.tmin) / order.deltat) \ 

2158 * order.deltat 

2159 split_promise(order, tmax) 

2160 else: 

2161 split_promise(order) 

2162 

2163 def batch_add(paths): 

2164 self.add(paths) 

2165 

2166 calls = queue.Queue() 

2167 

2168 def enqueue(f): 

2169 def wrapper(*args): 

2170 calls.put((f, args)) 

2171 

2172 return wrapper 

2173 

2174 while order_groups: 

2175 

2176 orders_now = [] 

2177 empty = [] 

2178 for k, order_group in order_groups.items(): 

2179 try: 

2180 orders_now.append(order_group.pop(0)) 

2181 except IndexError: 

2182 empty.append(k) 

2183 

2184 for k in empty: 

2185 del order_groups[k] 

2186 

2187 by_source_id = defaultdict(list) 

2188 for order in orders_now: 

2189 by_source_id[order.source_id].append(order) 

2190 

2191 threads = [] 

2192 for source_id in by_source_id: 

2193 def download(): 

2194 try: 

2195 sources[source_id].download_waveforms( 

2196 by_source_id[source_id], 

2197 success=enqueue(success), 

2198 error_permanent=enqueue(split_promise), 

2199 error_temporary=noop, 

2200 batch_add=enqueue(batch_add)) 

2201 

2202 finally: 

2203 calls.put(None) 

2204 

2205 if len(by_source_id) > 1: 

2206 thread = threading.Thread(target=download) 

2207 thread.start() 

2208 threads.append(thread) 

2209 else: 

2210 download() 

2211 calls.put(None) 

2212 

2213 ndone = 0 

2214 while ndone < len(by_source_id): 

2215 ret = calls.get() 

2216 if ret is None: 

2217 ndone += 1 

2218 else: 

2219 ret[0](*ret[1]) 

2220 

2221 for thread in threads: 

2222 thread.join() 

2223 

2224 if task: 

2225 task.update(n_order_groups - len(order_groups)) 

2226 

2227 if task: 

2228 task.done() 

2229 

2230 @filldocs 

2231 def get_waveform_nuts( 

2232 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2233 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2234 order_only=False): 

2235 

2236 ''' 

2237 Get waveform content entities matching given constraints. 

2238 

2239 %(query_args)s 

2240 

2241 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2242 resolves matching waveform promises (downloads waveforms from remote 

2243 sources). 

2244 

2245 See :py:meth:`iter_nuts` for details on time span matching. 

2246 ''' 

2247 

2248 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2249 

2250 if self.downloads_enabled: 

2251 self._redeem_promises( 

2252 *args, 

2253 codes_exclude, 

2254 sample_rate_min, 

2255 sample_rate_max, 

2256 order_only=order_only) 

2257 

2258 nuts = sorted( 

2259 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2260 

2261 return nuts 

2262 

2263 @filldocs 

2264 def have_waveforms( 

2265 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2266 

2267 ''' 

2268 Check if any waveforms or waveform promises are available for given 

2269 constraints. 

2270 

2271 %(query_args)s 

2272 ''' 

2273 

2274 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2275 return bool(list( 

2276 self.iter_nuts('waveform', *args, limit=1))) \ 

2277 or (self.downloads_enabled and bool(list( 

2278 self.iter_nuts('waveform_promise', *args, limit=1)))) 

2279 

2280 @filldocs 

2281 def get_waveforms( 

2282 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2283 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2284 uncut=False, want_incomplete=True, degap=True, 

2285 maxgap=5, maxlap=None, snap=None, include_last=False, 

2286 load_data=True, accessor_id='default', operator_params=None, 

2287 order_only=False, channel_priorities=None): 

2288 

2289 ''' 

2290 Get waveforms matching given constraints. 

2291 

2292 %(query_args)s 

2293 

2294 :param sample_rate_min: 

2295 Consider only waveforms with a sampling rate equal to or greater 

2296 than the given value [Hz]. 

2297 :type sample_rate_min: 

2298 float 

2299 

2300 :param sample_rate_max: 

2301 Consider only waveforms with a sampling rate equal to or less than 

2302 the given value [Hz]. 

2303 :type sample_rate_max: 

2304 float 

2305 

2306 :param uncut: 

2307 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2308 and to disable degapping/deoverlapping. Returns untouched traces as 

2309 they are read from file segment. File segments are always read in 

2310 their entirety. 

2311 :type uncut: 

2312 bool 

2313 

2314 :param want_incomplete: 

2315 If ``True``, gappy/incomplete traces are included in the result. 

2316 :type want_incomplete: 

2317 bool 

2318 

2319 :param degap: 

2320 If ``True``, connect traces and remove gaps and overlaps. 

2321 :type degap: 

2322 bool 

2323 

2324 :param maxgap: 

2325 Maximum gap size in samples which is filled with interpolated 

2326 samples when ``degap`` is ``True``. 

2327 :type maxgap: 

2328 int 

2329 

2330 :param maxlap: 

2331 Maximum overlap size in samples which is removed when ``degap`` is 

2332 ``True``. 

2333 :type maxlap: 

2334 int 

2335 

2336 :param snap: 

2337 Rounding functions used when computing sample index from time 

2338 instance, for trace start and trace end, respectively. By default, 

2339 ``(round, round)`` is used. 

2340 :type snap: 

2341 :py:class:`tuple` of 2 callables 

2342 

2343 :param include_last: 

2344 If ``True``, add one more sample to the returned traces (the sample 

2345 which would be the first sample of a query with ``tmin`` set to the 

2346 current value of ``tmax``). 

2347 :type include_last: 

2348 bool 

2349 

2350 :param load_data: 

2351 If ``True``, waveform data samples are read from files (or cache). 

2352 If ``False``, meta-information-only traces are returned (dummy 

2353 traces with no data samples). 

2354 :type load_data: 

2355 bool 

2356 

2357 :param accessor_id: 

2358 Name of consumer on who's behalf data is accessed. Used in cache 

2359 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2360 to distinguish different points of extraction for the decision of 

2361 when to release cached waveform data. Should be used when data is 

2362 alternately extracted from more than one region / selection. 

2363 :type accessor_id: 

2364 str 

2365 

2366 :param channel_priorities: 

2367 List of band/instrument code combinations to try. For example, 

2368 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2369 then fallback to ``BH?`` if these are not available. The first 

2370 matching waveforms are returned. Use in combination with 

2371 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2372 rate. 

2373 :type channel_priorities: 

2374 :py:class:`list` of :py:class:`str` 

2375 

2376 See :py:meth:`iter_nuts` for details on time span matching. 

2377 

2378 Loaded data is kept in memory (at least) until 

2379 :py:meth:`clear_accessor` has been called or 

2380 :py:meth:`advance_accessor` has been called two consecutive times 

2381 without data being accessed between the two calls (by this accessor). 

2382 Data may still be further kept in the memory cache if held alive by 

2383 consumers with a different ``accessor_id``. 

2384 ''' 

2385 

2386 tmin, tmax, codes = self._get_selection_args( 

2387 WAVEFORM, obj, tmin, tmax, time, codes) 

2388 

2389 if channel_priorities is not None: 

2390 return self._get_waveforms_prioritized( 

2391 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2392 sample_rate_min=sample_rate_min, 

2393 sample_rate_max=sample_rate_max, 

2394 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2395 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2396 include_last=include_last, load_data=load_data, 

2397 accessor_id=accessor_id, operator_params=operator_params, 

2398 order_only=order_only, channel_priorities=channel_priorities) 

2399 

2400 kinds = ['waveform'] 

2401 if self.downloads_enabled: 

2402 kinds.append('waveform_promise') 

2403 

2404 self_tmin, self_tmax = self.get_time_span(kinds) 

2405 

2406 if None in (self_tmin, self_tmax): 

2407 logger.warning( 

2408 'No waveforms available.') 

2409 return [] 

2410 

2411 tmin = tmin if tmin is not None else self_tmin 

2412 tmax = tmax if tmax is not None else self_tmax 

2413 

2414 if codes is not None and len(codes) == 1: 

2415 # TODO: fix for multiple / mixed codes 

2416 operator = self.get_operator(codes[0]) 

2417 if operator is not None: 

2418 return operator.get_waveforms( 

2419 self, codes[0], 

2420 tmin=tmin, tmax=tmax, 

2421 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2422 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2423 include_last=include_last, load_data=load_data, 

2424 accessor_id=accessor_id, params=operator_params) 

2425 

2426 nuts = self.get_waveform_nuts( 

2427 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2428 sample_rate_max, order_only=order_only) 

2429 

2430 if order_only: 

2431 return [] 

2432 

2433 if load_data: 

2434 traces = [ 

2435 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2436 

2437 else: 

2438 traces = [ 

2439 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2440 

2441 if uncut: 

2442 return traces 

2443 

2444 if snap is None: 

2445 snap = (round, round) 

2446 

2447 chopped = [] 

2448 for tr in traces: 

2449 if not load_data and tr.ydata is not None: 

2450 tr = tr.copy(data=False) 

2451 tr.ydata = None 

2452 

2453 try: 

2454 chopped.append(tr.chop( 

2455 tmin, tmax, 

2456 inplace=False, 

2457 snap=snap, 

2458 include_last=include_last)) 

2459 

2460 except trace.NoData: 

2461 pass 

2462 

2463 processed = self._process_chopped( 

2464 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2465 

2466 return processed 

2467 

2468 def _get_waveforms_prioritized( 

2469 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2470 channel_priorities=None, **kwargs): 

2471 

2472 trs_all = [] 

2473 codes_have = set() 

2474 for channel in channel_priorities: 

2475 assert len(channel) == 2 

2476 if codes is not None: 

2477 codes_now = [ 

2478 codes_.replace(channel=channel+'?') for codes_ in codes] 

2479 else: 

2480 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2481 

2482 codes_exclude_now = list(set( 

2483 codes_.replace(channel=channel+codes_.channel[-1]) 

2484 for codes_ in codes_have)) 

2485 

2486 if codes_exclude: 

2487 codes_exclude_now.extend(codes_exclude) 

2488 

2489 trs = self.get_waveforms( 

2490 tmin=tmin, 

2491 tmax=tmax, 

2492 codes=codes_now, 

2493 codes_exclude=codes_exclude_now, 

2494 **kwargs) 

2495 

2496 codes_have.update(set(tr.codes for tr in trs)) 

2497 trs_all.extend(trs) 

2498 

2499 return trs_all 

2500 

2501 @filldocs 

2502 def chopper_waveforms( 

2503 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2504 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2505 tinc=None, tpad=0., 

2506 want_incomplete=True, snap_window=False, 

2507 degap=True, maxgap=5, maxlap=None, 

2508 snap=None, include_last=False, load_data=True, 

2509 accessor_id=None, clear_accessor=True, operator_params=None, 

2510 grouping=None, channel_priorities=None): 

2511 

2512 ''' 

2513 Iterate window-wise over waveform archive. 

2514 

2515 %(query_args)s 

2516 

2517 :param tinc: 

2518 Time increment (window shift time) (default uses ``tmax-tmin``). 

2519 :type tinc: 

2520 :py:func:`~pyrocko.util.get_time_float` 

2521 

2522 :param tpad: 

2523 Padding time appended on either side of the data window (window 

2524 overlap is ``2*tpad``). 

2525 :type tpad: 

2526 :py:func:`~pyrocko.util.get_time_float` 

2527 

2528 :param want_incomplete: 

2529 If ``True``, gappy/incomplete traces are included in the result. 

2530 :type want_incomplete: 

2531 bool 

2532 

2533 :param snap_window: 

2534 If ``True``, start time windows at multiples of tinc with respect 

2535 to system time zero. 

2536 :type snap_window: 

2537 bool 

2538 

2539 :param degap: 

2540 If ``True``, connect traces and remove gaps and overlaps. 

2541 :type degap: 

2542 bool 

2543 

2544 :param maxgap: 

2545 Maximum gap size in samples which is filled with interpolated 

2546 samples when ``degap`` is ``True``. 

2547 :type maxgap: 

2548 int 

2549 

2550 :param maxlap: 

2551 Maximum overlap size in samples which is removed when ``degap`` is 

2552 ``True``. 

2553 :type maxlap: 

2554 int 

2555 

2556 :param snap: 

2557 Rounding functions used when computing sample index from time 

2558 instance, for trace start and trace end, respectively. By default, 

2559 ``(round, round)`` is used. 

2560 :type snap: 

2561 :py:class:`tuple` of 2 callables 

2562 

2563 :param include_last: 

2564 If ``True``, add one more sample to the returned traces (the sample 

2565 which would be the first sample of a query with ``tmin`` set to the 

2566 current value of ``tmax``). 

2567 :type include_last: 

2568 bool 

2569 

2570 :param load_data: 

2571 If ``True``, waveform data samples are read from files (or cache). 

2572 If ``False``, meta-information-only traces are returned (dummy 

2573 traces with no data samples). 

2574 :type load_data: 

2575 bool 

2576 

2577 :param accessor_id: 

2578 Name of consumer on who's behalf data is accessed. Used in cache 

2579 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2580 to distinguish different points of extraction for the decision of 

2581 when to release cached waveform data. Should be used when data is 

2582 alternately extracted from more than one region / selection. 

2583 :type accessor_id: 

2584 str 

2585 

2586 :param clear_accessor: 

2587 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2588 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2589 memory when the generator returns. 

2590 :type clear_accessor: 

2591 bool 

2592 

2593 :param grouping: 

2594 By default, traversal over the data is over time and all matching 

2595 traces of a time window are yielded. Using this option, it is 

2596 possible to traverse the data first by group (e.g. station or 

2597 network) and second by time. This can reduce the number of traces 

2598 in each batch and thus reduce the memory footprint of the process. 

2599 :type grouping: 

2600 :py:class:`~pyrocko.squirrel.operators.base.Grouping` 

2601 

2602 :yields: 

2603 For each extracted time window or waveform group a 

2604 :py:class:`Batch` object is yielded. 

2605 

2606 See :py:meth:`iter_nuts` for details on time span matching. 

2607 ''' 

2608 

2609 tmin, tmax, codes = self._get_selection_args( 

2610 WAVEFORM, obj, tmin, tmax, time, codes) 

2611 

2612 kinds = ['waveform'] 

2613 if self.downloads_enabled: 

2614 kinds.append('waveform_promise') 

2615 

2616 self_tmin, self_tmax = self.get_time_span(kinds) 

2617 

2618 if None in (self_tmin, self_tmax): 

2619 logger.warning( 

2620 'Content has undefined time span. No waveforms and no ' 

2621 'waveform promises?') 

2622 return 

2623 

2624 if snap_window and tinc is not None: 

2625 tmin = tmin if tmin is not None else self_tmin 

2626 tmax = tmax if tmax is not None else self_tmax 

2627 tmin = math.floor(tmin / tinc) * tinc 

2628 tmax = math.ceil(tmax / tinc) * tinc 

2629 else: 

2630 tmin = tmin if tmin is not None else self_tmin + tpad 

2631 tmax = tmax if tmax is not None else self_tmax - tpad 

2632 

2633 if tinc is None: 

2634 tinc = tmax - tmin 

2635 nwin = 1 

2636 elif tinc == 0.0: 

2637 nwin = 1 

2638 else: 

2639 eps = 1e-6 

2640 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1) 

2641 

2642 try: 

2643 if accessor_id is None: 

2644 accessor_id = 'chopper%i' % self._n_choppers_active 

2645 

2646 self._n_choppers_active += 1 

2647 

2648 if grouping is None: 

2649 codes_list = [codes] 

2650 else: 

2651 operator = Operator( 

2652 filtering=CodesPatternFiltering(codes=codes), 

2653 grouping=grouping) 

2654 

2655 available = set(self.get_codes(kind='waveform')) 

2656 if self.downloads_enabled: 

2657 available.update(self.get_codes(kind='waveform_promise')) 

2658 operator.update_mappings(sorted(available)) 

2659 

2660 codes_list = [ 

2661 codes_patterns_list(scl) 

2662 for scl in operator.iter_in_codes()] 

2663 

2664 ngroups = len(codes_list) 

2665 for igroup, scl in enumerate(codes_list): 

2666 for iwin in range(nwin): 

2667 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2668 

2669 chopped = self.get_waveforms( 

2670 tmin=wmin-tpad, 

2671 tmax=wmax+tpad, 

2672 codes=scl, 

2673 codes_exclude=codes_exclude, 

2674 sample_rate_min=sample_rate_min, 

2675 sample_rate_max=sample_rate_max, 

2676 snap=snap, 

2677 include_last=include_last, 

2678 load_data=load_data, 

2679 want_incomplete=want_incomplete, 

2680 degap=degap, 

2681 maxgap=maxgap, 

2682 maxlap=maxlap, 

2683 accessor_id=accessor_id, 

2684 operator_params=operator_params, 

2685 channel_priorities=channel_priorities) 

2686 

2687 self.advance_accessor(accessor_id) 

2688 

2689 yield Batch( 

2690 tmin=wmin, 

2691 tmax=wmax, 

2692 tpad=tpad, 

2693 i=iwin, 

2694 n=nwin, 

2695 igroup=igroup, 

2696 ngroups=ngroups, 

2697 traces=chopped) 

2698 

2699 finally: 

2700 self._n_choppers_active -= 1 

2701 if clear_accessor: 

2702 self.clear_accessor(accessor_id, 'waveform') 

2703 

2704 def _process_chopped( 

2705 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2706 

2707 chopped.sort(key=lambda a: a.full_id) 

2708 if degap: 

2709 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2710 

2711 if not want_incomplete: 

2712 chopped_weeded = [] 

2713 for tr in chopped: 

2714 emin = tr.tmin - tmin 

2715 emax = tr.tmax + tr.deltat - tmax 

2716 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2717 chopped_weeded.append(tr) 

2718 

2719 elif degap: 

2720 if (0. < emin <= 5. * tr.deltat 

2721 and -5. * tr.deltat <= emax < 0.): 

2722 

2723 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2724 chopped_weeded.append(tr) 

2725 

2726 chopped = chopped_weeded 

2727 

2728 return chopped 

2729 

2730 def _get_pyrocko_stations( 

2731 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2732 on_error='raise'): 

2733 

2734 from pyrocko import model as pmodel 

2735 

2736 if codes is not None: 

2737 codes = codes_patterns_for_kind(STATION, codes) 

2738 

2739 by_nsl = defaultdict(lambda: (list(), list())) 

2740 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2741 sargs = station._get_pyrocko_station_args() 

2742 by_nsl[station.codes.nsl][0].append(sargs) 

2743 

2744 if codes is not None: 

2745 codes = [model.CodesNSLCE(c) for c in codes] 

2746 

2747 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2748 sargs = channel._get_pyrocko_station_args() 

2749 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2750 sargs_list.append(sargs) 

2751 channels_list.append(channel) 

2752 

2753 pstations = [] 

2754 nsls = list(by_nsl.keys()) 

2755 nsls.sort() 

2756 for nsl in nsls: 

2757 sargs_list, channels_list = by_nsl[nsl] 

2758 sargs = util.consistency_merge( 

2759 [('',) + x for x in sargs_list], 

2760 error=on_error) 

2761 

2762 by_c = defaultdict(list) 

2763 for ch in channels_list: 

2764 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2765 

2766 chas = list(by_c.keys()) 

2767 chas.sort() 

2768 pchannels = [] 

2769 for cha in chas: 

2770 list_of_cargs = by_c[cha] 

2771 cargs = util.consistency_merge( 

2772 [('',) + x for x in list_of_cargs], 

2773 error=on_error) 

2774 pchannels.append(pmodel.Channel(*cargs)) 

2775 

2776 pstations.append( 

2777 pmodel.Station(*sargs, channels=pchannels)) 

2778 

2779 return pstations 

2780 

2781 @property 

2782 def pile(self): 

2783 

2784 ''' 

2785 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2786 

2787 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2788 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2789 but uses the fluffy power of the Squirrel under the hood. 

2790 

2791 This interface can be used as a drop-in replacement for piles which are 

2792 used in existing scripts and programs for efficient waveform data 

2793 access. The Squirrel-based pile scales better for large datasets. Newer 

2794 scripts should use Squirrel's native methods to avoid the emulation 

2795 overhead. 

2796 ''' 

2797 from . import pile 

2798 

2799 if self._pile is None: 

2800 self._pile = pile.Pile(self) 

2801 

2802 return self._pile 

2803 

2804 def snuffle(self, **kwargs): 

2805 ''' 

2806 Look at dataset in Snuffler. 

2807 ''' 

2808 self.pile.snuffle(**kwargs) 

2809 

2810 def _gather_codes_keys(self, kind, gather, selector): 

2811 return set( 

2812 gather(codes) 

2813 for codes in self.iter_codes(kind) 

2814 if selector is None or selector(codes)) 

2815 

2816 def __str__(self): 

2817 return str(self.get_stats()) 

2818 

2819 def get_coverage( 

2820 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2821 

2822 ''' 

2823 Get coverage information. 

2824 

2825 Get information about strips of gapless data coverage. 

2826 

2827 :param kind: 

2828 Content kind to be queried. 

2829 :type kind: 

2830 str 

2831 

2832 :param tmin: 

2833 Start time of query interval. 

2834 :type tmin: 

2835 :py:func:`~pyrocko.util.get_time_float` 

2836 

2837 :param tmax: 

2838 End time of query interval. 

2839 :type tmax: 

2840 :py:func:`~pyrocko.util.get_time_float` 

2841 

2842 :param codes: 

2843 If given, restrict query to given content codes patterns. 

2844 :type codes: 

2845 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2846 objects appropriate for the queried content type, or anything which 

2847 can be converted to such objects. 

2848 

2849 :param limit: 

2850 Limit query to return only up to a given maximum number of entries 

2851 per matching time series (without setting this option, very gappy 

2852 data could cause the query to execute for a very long time). 

2853 :type limit: 

2854 int 

2855 

2856 :returns: 

2857 Information about time spans covered by the requested time series 

2858 data. 

2859 :rtype: 

2860 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage` 

2861 ''' 

2862 

2863 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2864 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2865 kind_id = to_kind_id(kind) 

2866 

2867 codes_info = list(self._iter_codes_info(kind=kind)) 

2868 

2869 kdata_all = [] 

2870 if codes is None: 

2871 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2872 kdata_all.append( 

2873 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2874 

2875 else: 

2876 for codes_entry in codes: 

2877 pattern = to_codes(kind_id, codes_entry) 

2878 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2879 if model.match_codes(pattern, codes_entry): 

2880 kdata_all.append( 

2881 (pattern, kind_codes_id, codes_entry, deltat)) 

2882 

2883 kind_codes_ids = [x[1] for x in kdata_all] 

2884 

2885 counts_at_tmin = {} 

2886 if tmin is not None: 

2887 for nut in self.iter_nuts( 

2888 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2889 

2890 k = nut.codes, nut.deltat 

2891 if k not in counts_at_tmin: 

2892 counts_at_tmin[k] = 0 

2893 

2894 counts_at_tmin[k] += 1 

2895 

2896 coverages = [] 

2897 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2898 entry = [pattern, codes_entry, deltat, None, None, []] 

2899 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2900 sql = self._sql(''' 

2901 SELECT 

2902 time_seconds, 

2903 time_offset 

2904 FROM %(db)s.%(coverage)s 

2905 WHERE 

2906 kind_codes_id == ? 

2907 ORDER BY 

2908 kind_codes_id ''' + order + ''', 

2909 time_seconds ''' + order + ''', 

2910 time_offset ''' + order + ''' 

2911 LIMIT 1 

2912 ''') 

2913 

2914 for row in self._conn.execute(sql, [kind_codes_id]): 

2915 entry[3+i] = model.tjoin(row[0], row[1]) 

2916 

2917 if None in entry[3:5]: 

2918 continue 

2919 

2920 args = [kind_codes_id] 

2921 

2922 sql_time = '' 

2923 if tmin is not None: 

2924 # intentionally < because (== tmin) is queried from nuts 

2925 sql_time += ' AND ( ? < time_seconds ' \ 

2926 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2927 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2928 

2929 if tmax is not None: 

2930 sql_time += ' AND ( time_seconds < ? ' \ 

2931 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2932 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2933 

2934 sql_limit = '' 

2935 if limit is not None: 

2936 sql_limit = ' LIMIT ?' 

2937 args.append(limit) 

2938 

2939 sql = self._sql(''' 

2940 SELECT 

2941 time_seconds, 

2942 time_offset, 

2943 step 

2944 FROM %(db)s.%(coverage)s 

2945 WHERE 

2946 kind_codes_id == ? 

2947 ''' + sql_time + ''' 

2948 ORDER BY 

2949 kind_codes_id, 

2950 time_seconds, 

2951 time_offset 

2952 ''' + sql_limit) 

2953 

2954 rows = list(self._conn.execute(sql, args)) 

2955 

2956 if limit is not None and len(rows) == limit: 

2957 entry[-1] = None 

2958 else: 

2959 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2960 tlast = None 

2961 if tmin is not None: 

2962 entry[-1].append((tmin, counts)) 

2963 tlast = tmin 

2964 

2965 for row in rows: 

2966 t = model.tjoin(row[0], row[1]) 

2967 counts += row[2] 

2968 entry[-1].append((t, counts)) 

2969 tlast = t 

2970 

2971 if tmax is not None and (tlast is None or tlast != tmax): 

2972 entry[-1].append((tmax, counts)) 

2973 

2974 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2975 

2976 return coverages 

2977 

2978 def get_stationxml( 

2979 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2980 level='response', on_error='raise'): 

2981 

2982 ''' 

2983 Get station/channel/response metadata in StationXML representation. 

2984 

2985 %(query_args)s 

2986 

2987 :returns: 

2988 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2989 ''' 

2990 

2991 if level not in ('network', 'station', 'channel', 'response'): 

2992 raise ValueError('Invalid level: %s' % level) 

2993 

2994 tmin, tmax, codes = self._get_selection_args( 

2995 CHANNEL, obj, tmin, tmax, time, codes) 

2996 

2997 def tts(t): 

2998 if t is None: 

2999 return '<none>' 

3000 else: 

3001 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

3002 

3003 if on_error == 'ignore': 

3004 def handle_error(exc): 

3005 pass 

3006 

3007 elif on_error == 'warn': 

3008 def handle_error(exc): 

3009 logger.warning(str(exc)) 

3010 

3011 elif on_error == 'raise': 

3012 def handle_error(exc): 

3013 raise exc 

3014 

3015 def use_first(node_type_name, codes, k, group): 

3016 if on_error == 'warn': 

3017 logger.warning( 

3018 'Duplicates for %s %s, %s - %s -> using first' % ( 

3019 node_type_name, 

3020 '.'.join(codes), 

3021 tts(k[0]), tts(k[1]))) 

3022 

3023 return group[0] 

3024 

3025 def deduplicate(node_type_name, codes, nodes): 

3026 groups = defaultdict(list) 

3027 for node in nodes: 

3028 k = (node.start_date, node.end_date) 

3029 groups[k].append(node) 

3030 

3031 return [ 

3032 use_first(node_type_name, codes, k, group) 

3033 for (k, group) in groups.items()] 

3034 

3035 filtering = CodesPatternFiltering(codes=codes) 

3036 

3037 nslcs = list(set( 

3038 codes.nslc for codes in 

3039 filtering.filter(self.get_codes(kind='channel')))) 

3040 

3041 from pyrocko.io import stationxml as sx 

3042 

3043 networks = [] 

3044 for net, stas in prefix_tree(nslcs): 

3045 network = sx.Network(code=net) 

3046 networks.append(network) 

3047 

3048 if level not in ('station', 'channel', 'response'): 

3049 continue 

3050 

3051 for sta, locs in stas: 

3052 stations = self.get_stations( 

3053 tmin=tmin, 

3054 tmax=tmax, 

3055 codes=(net, sta, '*'), 

3056 model='stationxml') 

3057 

3058 if on_error != 'raise': 

3059 stations = deduplicate( 

3060 'Station', (net, sta), stations) 

3061 

3062 errors = sx.check_overlaps( 

3063 'Station', (net, sta), stations) 

3064 

3065 if errors: 

3066 handle_error(error.Duplicate( 

3067 'Overlapping/duplicate station info:\n %s' 

3068 % '\n '.join(errors))) 

3069 

3070 network.station_list.extend(stations) 

3071 

3072 if level not in ('channel', 'response'): 

3073 continue 

3074 

3075 for loc, chas in locs: 

3076 for cha, _ in chas: 

3077 channels = self.get_channels( 

3078 tmin=tmin, 

3079 tmax=tmax, 

3080 codes=(net, sta, loc, cha), 

3081 model='stationxml') 

3082 

3083 if on_error != 'raise': 

3084 channels = deduplicate( 

3085 'Channel', (net, sta, loc, cha), channels) 

3086 

3087 errors = sx.check_overlaps( 

3088 'Channel', (net, sta, loc, cha), channels) 

3089 

3090 if errors: 

3091 handle_error(error.Duplicate( 

3092 'Overlapping/duplicate channel info:\n %s' 

3093 % '\n '.join(errors))) 

3094 

3095 for channel in channels: 

3096 station = sx.find_containing(stations, channel) 

3097 if station is not None: 

3098 station.channel_list.append(channel) 

3099 else: 

3100 handle_error(error.NotAvailable( 

3101 'No station or station epoch found ' 

3102 'for channel: %s' % '.'.join( 

3103 (net, sta, loc, cha)))) 

3104 

3105 continue 

3106 

3107 if level != 'response': 

3108 continue 

3109 

3110 try: 

3111 response_sq, response_sx = self.get_response( 

3112 codes=(net, sta, loc, cha), 

3113 tmin=channel.start_date, 

3114 tmax=channel.end_date, 

3115 model='stationxml+', 

3116 on_duplicate=on_error) 

3117 

3118 except error.NotAvailable as e: 

3119 handle_error(e) 

3120 continue 

3121 

3122 if not ( 

3123 sx.eq_open( 

3124 channel.start_date, response_sq.tmin) 

3125 and sx.eq_open( 

3126 channel.end_date, response_sq.tmax)): 

3127 

3128 handle_error(error.Inconsistencies( 

3129 'Response time span does not match ' 

3130 'channel time span: %s' % '.'.join( 

3131 (net, sta, loc, cha)))) 

3132 

3133 channel.response = response_sx 

3134 

3135 return sx.FDSNStationXML( 

3136 source='Generated by Pyrocko Squirrel.', 

3137 network_list=networks) 

3138 

3139 def add_operator(self, op): 

3140 self._operators.append(op) 

3141 

3142 def update_operator_mappings(self): 

3143 available = self.get_codes(kind=('channel')) 

3144 

3145 for operator in self._operators: 

3146 operator.update_mappings(available, self._operator_registry) 

3147 

3148 def iter_operator_mappings(self): 

3149 for operator in self._operators: 

3150 for in_codes, out_codes in operator.iter_mappings(): 

3151 yield operator, in_codes, out_codes 

3152 

3153 def get_operator_mappings(self): 

3154 return list(self.iter_operator_mappings()) 

3155 

3156 def get_operator(self, codes): 

3157 try: 

3158 return self._operator_registry[codes][0] 

3159 except KeyError: 

3160 return None 

3161 

3162 def get_operator_group(self, codes): 

3163 try: 

3164 return self._operator_registry[codes] 

3165 except KeyError: 

3166 return None, (None, None, None) 

3167 

3168 def iter_operator_codes(self): 

3169 for _, _, out_codes in self.iter_operator_mappings(): 

3170 for codes in out_codes: 

3171 yield codes 

3172 

3173 def get_operator_codes(self): 

3174 return list(self.iter_operator_codes()) 

3175 

3176 def print_tables(self, table_names=None, stream=None): 

3177 ''' 

3178 Dump raw database tables in textual form (for debugging purposes). 

3179 

3180 :param table_names: 

3181 Names of tables to be dumped or ``None`` to dump all. 

3182 :type table_names: 

3183 :py:class:`list` of :py:class:`str` 

3184 

3185 :param stream: 

3186 Open file or ``None`` to dump to standard output. 

3187 ''' 

3188 

3189 if stream is None: 

3190 stream = sys.stdout 

3191 

3192 if isinstance(table_names, str): 

3193 table_names = [table_names] 

3194 

3195 if table_names is None: 

3196 table_names = [ 

3197 'selection_file_states', 

3198 'selection_nuts', 

3199 'selection_kind_codes_count', 

3200 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3201 

3202 m = { 

3203 'selection_file_states': '%(db)s.%(file_states)s', 

3204 'selection_nuts': '%(db)s.%(nuts)s', 

3205 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3206 'files': 'files', 

3207 'nuts': 'nuts', 

3208 'kind_codes': 'kind_codes', 

3209 'kind_codes_count': 'kind_codes_count'} 

3210 

3211 for table_name in table_names: 

3212 self._database.print_table( 

3213 m[table_name] % self._names, stream=stream) 

3214 

3215 

3216class SquirrelStats(Object): 

3217 ''' 

3218 Container to hold statistics about contents available from a Squirrel. 

3219 

3220 See also :py:meth:`Squirrel.get_stats`. 

3221 ''' 

3222 

3223 nfiles = Int.T( 

3224 help='Number of files in selection.') 

3225 nnuts = Int.T( 

3226 help='Number of index nuts in selection.') 

3227 codes = List.T( 

3228 Tuple.T(content_t=String.T()), 

3229 help='Available code sequences in selection, e.g. ' 

3230 '(agency, network, station, location) for stations nuts.') 

3231 kinds = List.T( 

3232 String.T(), 

3233 help='Available content types in selection.') 

3234 total_size = Int.T( 

3235 help='Aggregated file size of files is selection.') 

3236 counts = Dict.T( 

3237 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3238 help='Breakdown of how many nuts of any content type and code ' 

3239 'sequence are available in selection, ``counts[kind][codes]``.') 

3240 time_spans = Dict.T( 

3241 String.T(), Tuple.T(content_t=Timestamp.T()), 

3242 help='Time spans by content type.') 

3243 sources = List.T( 

3244 String.T(), 

3245 help='Descriptions of attached sources.') 

3246 operators = List.T( 

3247 String.T(), 

3248 help='Descriptions of attached operators.') 

3249 

3250 def __str__(self): 

3251 kind_counts = dict( 

3252 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3253 

3254 scodes = model.codes_to_str_abbreviated(self.codes) 

3255 

3256 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3257 ' ' + s for s in self.sources) 

3258 

3259 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3260 ' ' + s for s in self.operators) 

3261 

3262 def stime(t): 

3263 return util.tts(t) if t is not None and t not in ( 

3264 model.g_tmin, model.g_tmax) else '<none>' 

3265 

3266 def stable(rows): 

3267 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3268 return '\n'.join( 

3269 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3270 for row in rows) 

3271 

3272 def indent(s): 

3273 return '\n'.join(' '+line for line in s.splitlines()) 

3274 

3275 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3276 kind + ':', 

3277 str(kind_counts[kind]), 

3278 stime(self.time_spans[kind][0]), 

3279 '-', 

3280 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3281 

3282 s = ''' 

3283Number of files: %i 

3284Total size of known files: %s 

3285Number of index nuts: %i 

3286Available content kinds: %s 

3287Available codes: %s 

3288Sources: %s 

3289Operators: %s''' % ( 

3290 self.nfiles, 

3291 util.human_bytesize(self.total_size), 

3292 self.nnuts, 

3293 stspans, scodes, ssources, soperators) 

3294 

3295 return s.lstrip() 

3296 

3297 

3298__all__ = [ 

3299 'Squirrel', 

3300 'SquirrelStats', 

3301]