Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%

873 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-03-07 11:54 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel main classes. 

8''' 

9 

10import sys 

11import os 

12import time 

13import math 

14import logging 

15import threading 

16import queue 

17from collections import defaultdict 

18 

19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

20from pyrocko import util, trace 

21from pyrocko import progress 

22from pyrocko.plot import nice_time_tick_inc_approx_secs 

23 

24from . import model, io, cache, dataset 

25 

26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

28 codes_patterns_for_kind 

29from .client import fdsn, catalog 

30from .selection import Selection, filldocs 

31from .database import abspath 

32from .operators.base import Operator, CodesPatternFiltering 

33from . import client, environment, error 

34 

35logger = logging.getLogger('psq.base') 

36 

37guts_prefix = 'squirrel' 

38 

39 

40def nonef(f, xs): 

41 xs_ = [x for x in xs if x is not None] 

42 if xs_: 

43 return f(xs_) 

44 else: 

45 return None 

46 

47 

48def make_task(*args): 

49 return progress.task(*args, logger=logger) 

50 

51 

52def lpick(condition, seq): 

53 ft = [], [] 

54 for ele in seq: 

55 ft[int(bool(condition(ele)))].append(ele) 

56 

57 return ft 

58 

59 

60def len_plural(obj): 

61 return len(obj), '' if len(obj) == 1 else 's' 

62 

63 

64def blocks(tmin, tmax, deltat, nsamples_block=100000): 

65 tblock = nice_time_tick_inc_approx_secs( 

66 util.to_time_float(deltat * nsamples_block)) 

67 iblock_min = int(math.floor(tmin / tblock)) 

68 iblock_max = int(math.ceil(tmax / tblock)) 

69 for iblock in range(iblock_min, iblock_max): 

70 yield iblock * tblock, (iblock+1) * tblock 

71 

72 

73def gaps(avail, tmin, tmax): 

74 assert tmin < tmax 

75 

76 data = [(tmax, 1), (tmin, -1)] 

77 for (tmin_a, tmax_a) in avail: 

78 assert tmin_a < tmax_a 

79 data.append((tmin_a, 1)) 

80 data.append((tmax_a, -1)) 

81 

82 data.sort() 

83 s = 1 

84 gaps = [] 

85 tmin_g = None 

86 for t, x in data: 

87 if s == 1 and x == -1: 

88 tmin_g = t 

89 elif s == 0 and x == 1 and tmin_g is not None: 

90 tmax_g = t 

91 if tmin_g != tmax_g: 

92 gaps.append((tmin_g, tmax_g)) 

93 

94 s += x 

95 

96 return gaps 

97 

98 

99def order_key(order): 

100 return (order.codes, order.tmin, order.tmax) 

101 

102 

103def _is_exact(pat): 

104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

105 

106 

107def prefix_tree(tups): 

108 if not tups: 

109 return [] 

110 

111 if len(tups[0]) == 1: 

112 return sorted((tup[0], []) for tup in tups) 

113 

114 d = defaultdict(list) 

115 for tup in tups: 

116 d[tup[0]].append(tup[1:]) 

117 

118 sub = [] 

119 for k in sorted(d.keys()): 

120 sub.append((k, prefix_tree(d[k]))) 

121 

122 return sub 

123 

124 

125def match_time_span(tmin, tmax, obj): 

126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

127 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

128 

129 

130class Batch(object): 

131 ''' 

132 Batch of waveforms from window-wise data extraction. 

133 

134 Encapsulates state and results yielded for each window in window-wise 

135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

136 

137 *Attributes:* 

138 

139 .. py:attribute:: tmin 

140 

141 Start of this time window. 

142 

143 .. py:attribute:: tmax 

144 

145 End of this time window. 

146 

147 .. py:attribute:: i 

148 

149 Index of this time window in sequence. 

150 

151 .. py:attribute:: n 

152 

153 Total number of time windows in sequence. 

154 

155 .. py:attribute:: igroup 

156 

157 Index of this time window's sequence group. 

158 

159 .. py:attribute:: ngroups 

160 

161 Total number of sequence groups. 

162 

163 .. py:attribute:: traces 

164 

165 Extracted waveforms for this time window. 

166 ''' 

167 

168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

169 self.tmin = tmin 

170 self.tmax = tmax 

171 self.i = i 

172 self.n = n 

173 self.igroup = igroup 

174 self.ngroups = ngroups 

175 self.traces = traces 

176 

177 

178class Squirrel(Selection): 

179 ''' 

180 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

181 

182 :param env: 

183 Squirrel environment instance or directory path to use as starting 

184 point for its detection. By default, the current directory is used as 

185 starting point. When searching for a usable environment the directory 

186 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

187 directory is used if it exists, otherwise the parent directories are 

188 search upwards for the existence of such a directory. If no such 

189 directory is found, the user's global Squirrel environment 

190 ``'$HOME/.pyrocko/squirrel'`` is used. 

191 :type env: 

192 :py:class:`~pyrocko.squirrel.environment.Environment` or 

193 :py:class:`str` 

194 

195 :param database: 

196 Database instance or path to database. By default the 

197 database found in the detected Squirrel environment is used. 

198 :type database: 

199 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

200 

201 :param cache_path: 

202 Directory path to use for data caching. By default, the ``'cache'`` 

203 directory in the detected Squirrel environment is used. 

204 :type cache_path: 

205 :py:class:`str` 

206 

207 :param persistent: 

208 If given a name, create a persistent selection. 

209 :type persistent: 

210 :py:class:`str` 

211 

212 This is the central class of the Squirrel framework. It provides a unified 

213 interface to query and access seismic waveforms, station meta-data and 

214 event information from local file collections and remote data sources. For 

215 prompt responses, a profound database setup is used under the hood. To 

216 speed up assemblage of ad-hoc data selections, files are indexed on first 

217 use and the extracted meta-data is remembered in the database for 

218 subsequent accesses. Bulk data is lazily loaded from disk and remote 

219 sources, just when requested. Once loaded, data is cached in memory to 

220 expedite typical access patterns. Files and data sources can be dynamically 

221 added to and removed from the Squirrel selection at runtime. 

222 

223 Queries are restricted to the contents of the files currently added to the 

224 Squirrel selection (usually a subset of the file meta-information 

225 collection in the database). This list of files is referred to here as the 

226 "selection". By default, temporary tables are created in the attached 

227 database to hold the names of the files in the selection as well as various 

228 indices and counters. These tables are only visible inside the application 

229 which created them and are deleted when the database connection is closed 

230 or the application exits. To create a selection which is not deleted at 

231 exit, supply a name to the ``persistent`` argument of the Squirrel 

232 constructor. Persistent selections are shared among applications using the 

233 same database. 

234 

235 **Method summary** 

236 

237 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

238 :py:class:`~pyrocko.squirrel.selection.Selection`. 

239 

240 .. autosummary:: 

241 

242 ~Squirrel.add 

243 ~Squirrel.add_source 

244 ~Squirrel.add_fdsn 

245 ~Squirrel.add_catalog 

246 ~Squirrel.add_dataset 

247 ~Squirrel.add_virtual 

248 ~Squirrel.update 

249 ~Squirrel.update_waveform_promises 

250 ~Squirrel.advance_accessor 

251 ~Squirrel.clear_accessor 

252 ~Squirrel.reload 

253 ~pyrocko.squirrel.selection.Selection.iter_paths 

254 ~Squirrel.iter_nuts 

255 ~Squirrel.iter_kinds 

256 ~Squirrel.iter_deltats 

257 ~Squirrel.iter_codes 

258 ~pyrocko.squirrel.selection.Selection.get_paths 

259 ~Squirrel.get_nuts 

260 ~Squirrel.get_kinds 

261 ~Squirrel.get_deltats 

262 ~Squirrel.get_codes 

263 ~Squirrel.get_counts 

264 ~Squirrel.get_time_span 

265 ~Squirrel.get_deltat_span 

266 ~Squirrel.get_nfiles 

267 ~Squirrel.get_nnuts 

268 ~Squirrel.get_total_size 

269 ~Squirrel.get_stats 

270 ~Squirrel.get_content 

271 ~Squirrel.get_stations 

272 ~Squirrel.get_channels 

273 ~Squirrel.get_responses 

274 ~Squirrel.get_events 

275 ~Squirrel.get_waveform_nuts 

276 ~Squirrel.get_waveforms 

277 ~Squirrel.chopper_waveforms 

278 ~Squirrel.get_coverage 

279 ~Squirrel.pile 

280 ~Squirrel.snuffle 

281 ~Squirrel.glob_codes 

282 ~pyrocko.squirrel.selection.Selection.get_database 

283 ~Squirrel.print_tables 

284 ''' 

285 

286 def __init__( 

287 self, env=None, database=None, cache_path=None, persistent=None): 

288 

289 if not isinstance(env, environment.Environment): 

290 env = environment.get_environment(env) 

291 

292 if database is None: 

293 database = env.expand_path(env.database_path) 

294 

295 if cache_path is None: 

296 cache_path = env.expand_path(env.cache_path) 

297 

298 if persistent is None: 

299 persistent = env.persistent 

300 

301 Selection.__init__( 

302 self, database=database, persistent=persistent) 

303 

304 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

305 

306 self._content_caches = { 

307 'waveform': cache.ContentCache(), 

308 'default': cache.ContentCache()} 

309 

310 self._cache_path = cache_path 

311 

312 self._sources = [] 

313 self._operators = [] 

314 self._operator_registry = {} 

315 

316 self._pending_orders = [] 

317 

318 self._pile = None 

319 self._n_choppers_active = 0 

320 

321 self.downloads_enabled = True 

322 

323 self._names.update({ 

324 'nuts': self.name + '_nuts', 

325 'kind_codes_count': self.name + '_kind_codes_count', 

326 'coverage': self.name + '_coverage'}) 

327 

328 with self.transaction('create tables') as cursor: 

329 self._create_tables_squirrel(cursor) 

330 

331 def _create_tables_squirrel(self, cursor): 

332 

333 cursor.execute(self._register_table(self._sql( 

334 ''' 

335 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

336 nut_id integer PRIMARY KEY, 

337 file_id integer, 

338 file_segment integer, 

339 file_element integer, 

340 kind_id integer, 

341 kind_codes_id integer, 

342 tmin_seconds integer, 

343 tmin_offset integer, 

344 tmax_seconds integer, 

345 tmax_offset integer, 

346 kscale integer) 

347 '''))) 

348 

349 cursor.execute(self._register_table(self._sql( 

350 ''' 

351 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

352 kind_codes_id integer PRIMARY KEY, 

353 count integer) 

354 '''))) 

355 

356 cursor.execute(self._sql( 

357 ''' 

358 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

359 ON %(nuts)s (file_id, file_segment, file_element) 

360 ''')) 

361 

362 cursor.execute(self._sql( 

363 ''' 

364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

365 ON %(nuts)s (file_id) 

366 ''')) 

367 

368 cursor.execute(self._sql( 

369 ''' 

370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

371 ON %(nuts)s (kind_id, tmin_seconds) 

372 ''')) 

373 

374 cursor.execute(self._sql( 

375 ''' 

376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

377 ON %(nuts)s (kind_id, tmax_seconds) 

378 ''')) 

379 

380 cursor.execute(self._sql( 

381 ''' 

382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

383 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

384 ''')) 

385 

386 cursor.execute(self._sql( 

387 ''' 

388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

389 BEFORE DELETE ON main.files FOR EACH ROW 

390 BEGIN 

391 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

392 END 

393 ''')) 

394 

395 # trigger only on size to make silent update of mtime possible 

396 cursor.execute(self._sql( 

397 ''' 

398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

399 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

400 BEGIN 

401 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

402 END 

403 ''')) 

404 

405 cursor.execute(self._sql( 

406 ''' 

407 CREATE TRIGGER IF NOT EXISTS 

408 %(db)s.%(file_states)s_delete_files 

409 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

410 BEGIN 

411 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

412 END 

413 ''')) 

414 

415 cursor.execute(self._sql( 

416 ''' 

417 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

418 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

419 BEGIN 

420 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

421 (new.kind_codes_id, 0); 

422 UPDATE %(kind_codes_count)s 

423 SET count = count + 1 

424 WHERE new.kind_codes_id 

425 == %(kind_codes_count)s.kind_codes_id; 

426 END 

427 ''')) 

428 

429 cursor.execute(self._sql( 

430 ''' 

431 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

432 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

433 BEGIN 

434 UPDATE %(kind_codes_count)s 

435 SET count = count - 1 

436 WHERE old.kind_codes_id 

437 == %(kind_codes_count)s.kind_codes_id; 

438 END 

439 ''')) 

440 

441 cursor.execute(self._register_table(self._sql( 

442 ''' 

443 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

444 kind_codes_id integer, 

445 time_seconds integer, 

446 time_offset integer, 

447 step integer) 

448 '''))) 

449 

450 cursor.execute(self._sql( 

451 ''' 

452 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

453 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

454 ''')) 

455 

456 cursor.execute(self._sql( 

457 ''' 

458 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

459 AFTER INSERT ON %(nuts)s FOR EACH ROW 

460 BEGIN 

461 INSERT OR IGNORE INTO %(coverage)s VALUES 

462 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

463 ; 

464 UPDATE %(coverage)s 

465 SET step = step + 1 

466 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

467 AND new.tmin_seconds == %(coverage)s.time_seconds 

468 AND new.tmin_offset == %(coverage)s.time_offset 

469 ; 

470 INSERT OR IGNORE INTO %(coverage)s VALUES 

471 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

472 ; 

473 UPDATE %(coverage)s 

474 SET step = step - 1 

475 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

476 AND new.tmax_seconds == %(coverage)s.time_seconds 

477 AND new.tmax_offset == %(coverage)s.time_offset 

478 ; 

479 DELETE FROM %(coverage)s 

480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

481 AND new.tmin_seconds == %(coverage)s.time_seconds 

482 AND new.tmin_offset == %(coverage)s.time_offset 

483 AND step == 0 

484 ; 

485 DELETE FROM %(coverage)s 

486 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

487 AND new.tmax_seconds == %(coverage)s.time_seconds 

488 AND new.tmax_offset == %(coverage)s.time_offset 

489 AND step == 0 

490 ; 

491 END 

492 ''')) 

493 

494 cursor.execute(self._sql( 

495 ''' 

496 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

497 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

498 BEGIN 

499 INSERT OR IGNORE INTO %(coverage)s VALUES 

500 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

501 ; 

502 UPDATE %(coverage)s 

503 SET step = step - 1 

504 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

505 AND old.tmin_seconds == %(coverage)s.time_seconds 

506 AND old.tmin_offset == %(coverage)s.time_offset 

507 ; 

508 INSERT OR IGNORE INTO %(coverage)s VALUES 

509 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

510 ; 

511 UPDATE %(coverage)s 

512 SET step = step + 1 

513 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

514 AND old.tmax_seconds == %(coverage)s.time_seconds 

515 AND old.tmax_offset == %(coverage)s.time_offset 

516 ; 

517 DELETE FROM %(coverage)s 

518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

519 AND old.tmin_seconds == %(coverage)s.time_seconds 

520 AND old.tmin_offset == %(coverage)s.time_offset 

521 AND step == 0 

522 ; 

523 DELETE FROM %(coverage)s 

524 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

525 AND old.tmax_seconds == %(coverage)s.time_seconds 

526 AND old.tmax_offset == %(coverage)s.time_offset 

527 AND step == 0 

528 ; 

529 END 

530 ''')) 

531 

532 def _delete(self): 

533 '''Delete database tables associated with this Squirrel.''' 

534 

535 with self.transaction('delete tables') as cursor: 

536 for s in ''' 

537 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

538 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

539 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

540 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

541 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

542 DROP TABLE %(db)s.%(nuts)s; 

543 DROP TABLE %(db)s.%(kind_codes_count)s; 

544 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

545 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

546 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

547 '''.strip().splitlines(): 

548 

549 cursor.execute(self._sql(s)) 

550 

551 Selection._delete(self) 

552 

553 @filldocs 

554 def add(self, 

555 paths, 

556 kinds=None, 

557 format='detect', 

558 include=None, 

559 exclude=None, 

560 check=True): 

561 

562 ''' 

563 Add files to the selection. 

564 

565 :param paths: 

566 Iterator yielding paths to files or directories to be added to the 

567 selection. Recurses into directories. If given a ``str``, it 

568 is treated as a single path to be added. 

569 :type paths: 

570 :py:class:`list` of :py:class:`str` 

571 

572 :param kinds: 

573 Content types to be made available through the Squirrel selection. 

574 By default, all known content types are accepted. 

575 :type kinds: 

576 :py:class:`list` of :py:class:`str` 

577 

578 :param format: 

579 File format identifier or ``'detect'`` to enable auto-detection 

580 (available: %(file_formats)s). 

581 :type format: 

582 str 

583 

584 :param include: 

585 If not ``None``, files are only included if their paths match the 

586 given regular expression pattern. 

587 :type format: 

588 str 

589 

590 :param exclude: 

591 If not ``None``, files are only included if their paths do not 

592 match the given regular expression pattern. 

593 :type format: 

594 str 

595 

596 :param check: 

597 If ``True``, all file modification times are checked to see if 

598 cached information has to be updated (slow). If ``False``, only 

599 previously unknown files are indexed and cached information is used 

600 for known files, regardless of file state (fast, corrresponds to 

601 Squirrel's ``--optimistic`` mode). File deletions will go 

602 undetected in the latter case. 

603 :type check: 

604 bool 

605 

606 :Complexity: 

607 O(log N) 

608 ''' 

609 

610 if isinstance(kinds, str): 

611 kinds = (kinds,) 

612 

613 if isinstance(paths, str): 

614 paths = [paths] 

615 

616 kind_mask = model.to_kind_mask(kinds) 

617 

618 Selection.add( 

619 self, util.iter_select_files( 

620 paths, 

621 show_progress=False, 

622 include=include, 

623 exclude=exclude, 

624 pass_through=lambda path: path.startswith('virtual:') 

625 ), kind_mask, format) 

626 

627 self._load(check) 

628 self._update_nuts() 

629 

630 def reload(self): 

631 ''' 

632 Check for modifications and reindex modified files. 

633 

634 Based on file modification times. 

635 ''' 

636 

637 self._set_file_states_force_check() 

638 self._load(check=True) 

639 self._update_nuts() 

640 

641 def add_virtual(self, nuts, virtual_paths=None): 

642 ''' 

643 Add content which is not backed by files. 

644 

645 :param nuts: 

646 Content pieces to be added. 

647 :type nuts: 

648 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

649 

650 :param virtual_paths: 

651 List of virtual paths to prevent creating a temporary list of the 

652 nuts while aggregating the file paths for the selection. 

653 :type virtual_paths: 

654 :py:class:`list` of :py:class:`str` 

655 

656 Stores to the main database and the selection. 

657 ''' 

658 

659 if isinstance(virtual_paths, str): 

660 virtual_paths = [virtual_paths] 

661 

662 if virtual_paths is None: 

663 if not isinstance(nuts, list): 

664 nuts = list(nuts) 

665 virtual_paths = set(nut.file_path for nut in nuts) 

666 

667 Selection.add(self, virtual_paths) 

668 self.get_database().dig(nuts) 

669 self._update_nuts() 

670 

671 def add_volatile(self, nuts): 

672 if not isinstance(nuts, list): 

673 nuts = list(nuts) 

674 

675 paths = list(set(nut.file_path for nut in nuts)) 

676 io.backends.virtual.add_nuts(nuts) 

677 self.add_virtual(nuts, paths) 

678 self._volatile_paths.extend(paths) 

679 

680 def add_volatile_waveforms(self, traces): 

681 ''' 

682 Add in-memory waveforms which will be removed when the app closes. 

683 ''' 

684 

685 name = model.random_name() 

686 

687 path = 'virtual:volatile:%s' % name 

688 

689 nuts = [] 

690 for itr, tr in enumerate(traces): 

691 assert tr.tmin <= tr.tmax 

692 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

693 tmax_seconds, tmax_offset = model.tsplit( 

694 tr.tmin + tr.data_len()*tr.deltat) 

695 

696 nuts.append(model.Nut( 

697 file_path=path, 

698 file_format='virtual', 

699 file_segment=itr, 

700 file_element=0, 

701 file_mtime=0, 

702 codes=tr.codes, 

703 tmin_seconds=tmin_seconds, 

704 tmin_offset=tmin_offset, 

705 tmax_seconds=tmax_seconds, 

706 tmax_offset=tmax_offset, 

707 deltat=tr.deltat, 

708 kind_id=to_kind_id('waveform'), 

709 content=tr)) 

710 

711 self.add_volatile(nuts) 

712 return path 

713 

714 def _load(self, check): 

715 for _ in io.iload( 

716 self, 

717 content=[], 

718 skip_unchanged=True, 

719 check=check): 

720 pass 

721 

722 def _update_nuts(self, transaction=None): 

723 transaction = transaction or self.transaction('update nuts') 

724 with make_task('Aggregating selection') as task, \ 

725 transaction as cursor: 

726 

727 self._conn.set_progress_handler(task.update, 100000) 

728 nrows = cursor.execute(self._sql( 

729 ''' 

730 INSERT INTO %(db)s.%(nuts)s 

731 SELECT NULL, 

732 nuts.file_id, nuts.file_segment, nuts.file_element, 

733 nuts.kind_id, nuts.kind_codes_id, 

734 nuts.tmin_seconds, nuts.tmin_offset, 

735 nuts.tmax_seconds, nuts.tmax_offset, 

736 nuts.kscale 

737 FROM %(db)s.%(file_states)s 

738 INNER JOIN nuts 

739 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

740 INNER JOIN kind_codes 

741 ON nuts.kind_codes_id == 

742 kind_codes.kind_codes_id 

743 WHERE %(db)s.%(file_states)s.file_state != 2 

744 AND (((1 << kind_codes.kind_id) 

745 & %(db)s.%(file_states)s.kind_mask) != 0) 

746 ''')).rowcount 

747 

748 task.update(nrows) 

749 self._set_file_states_known(transaction) 

750 self._conn.set_progress_handler(None, 0) 

751 

752 def add_source(self, source, check=True): 

753 ''' 

754 Add remote resource. 

755 

756 :param source: 

757 Remote data access client instance. 

758 :type source: 

759 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

760 ''' 

761 

762 self._sources.append(source) 

763 source.setup(self, check=check) 

764 

765 def add_fdsn(self, *args, **kwargs): 

766 ''' 

767 Add FDSN site for transparent remote data access. 

768 

769 Arguments are passed to 

770 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

771 ''' 

772 

773 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

774 

775 def add_catalog(self, *args, **kwargs): 

776 ''' 

777 Add online catalog for transparent event data access. 

778 

779 Arguments are passed to 

780 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

781 ''' 

782 

783 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

784 

785 def add_dataset(self, ds, check=True): 

786 ''' 

787 Read dataset description from file and add its contents. 

788 

789 :param ds: 

790 Path to dataset description file, dataset description object 

791 or name of a built-in dataset. See 

792 :py:mod:`~pyrocko.squirrel.dataset`. 

793 :type ds: 

794 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

795 

796 :param check: 

797 If ``True``, all file modification times are checked to see if 

798 cached information has to be updated (slow). If ``False``, only 

799 previously unknown files are indexed and cached information is used 

800 for known files, regardless of file state (fast, corrresponds to 

801 Squirrel's ``--optimistic`` mode). File deletions will go 

802 undetected in the latter case. 

803 :type check: 

804 bool 

805 ''' 

806 if isinstance(ds, str): 

807 ds = dataset.read_dataset(ds) 

808 

809 ds.setup(self, check=check) 

810 

811 def _get_selection_args( 

812 self, kind_id, 

813 obj=None, tmin=None, tmax=None, time=None, codes=None): 

814 

815 if codes is not None: 

816 codes = codes_patterns_for_kind(kind_id, codes) 

817 

818 if time is not None: 

819 tmin = time 

820 tmax = time 

821 

822 if obj is not None: 

823 tmin = tmin if tmin is not None else obj.tmin 

824 tmax = tmax if tmax is not None else obj.tmax 

825 codes = codes if codes is not None else codes_patterns_for_kind( 

826 kind_id, obj.codes) 

827 

828 return tmin, tmax, codes 

829 

830 def _get_selection_args_str(self, *args, **kwargs): 

831 

832 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

833 return 'tmin: %s, tmax: %s, codes: %s' % ( 

834 util.time_to_str(tmin) if tmin is not None else 'none', 

835 util.time_to_str(tmax) if tmax is not None else 'none', 

836 ','.join(str(entry) for entry in codes)) 

837 

838 def _selection_args_to_kwargs( 

839 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

840 

841 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

842 

843 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

844 

845 tmin_seconds, tmin_offset = model.tsplit(tmin) 

846 tmax_seconds, tmax_offset = model.tsplit(tmax) 

847 if naiv: 

848 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

849 args.append(tmax_seconds) 

850 else: 

851 tscale_edges = model.tscale_edges 

852 tmin_cond = [] 

853 for kscale in range(tscale_edges.size + 1): 

854 if kscale != tscale_edges.size: 

855 tscale = int(tscale_edges[kscale]) 

856 tmin_cond.append(''' 

857 (%(db)s.%(nuts)s.kind_id = ? 

858 AND %(db)s.%(nuts)s.kscale == ? 

859 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

860 ''') 

861 args.extend( 

862 (to_kind_id(kind), kscale, 

863 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

864 

865 else: 

866 tmin_cond.append(''' 

867 (%(db)s.%(nuts)s.kind_id == ? 

868 AND %(db)s.%(nuts)s.kscale == ? 

869 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

870 ''') 

871 

872 args.extend( 

873 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

874 if tmin_cond: 

875 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

876 

877 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

878 args.append(tmin_seconds) 

879 

880 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

881 pats = codes_patterns_for_kind(kind_id, codes) 

882 if pats is None: 

883 return 

884 

885 pats_exact = [] 

886 pats_nonexact = [] 

887 for pat in pats: 

888 spat = pat.safe_str 

889 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

890 

891 codes_cond = [] 

892 if pats_exact: 

893 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

894 '?'*len(pats_exact))) 

895 

896 args.extend(pats_exact) 

897 

898 if pats_nonexact: 

899 codes_cond.append(' ( %s ) ' % ' OR '.join( 

900 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

901 

902 args.extend(pats_nonexact) 

903 

904 if codes_cond: 

905 cond.append('%s ( %s )' % ( 

906 'NOT' if not positive else '', 

907 ' OR '.join(codes_cond))) 

908 

909 def iter_nuts( 

910 self, kind=None, tmin=None, tmax=None, codes=None, 

911 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

912 naiv=False, kind_codes_ids=None, path=None, limit=None): 

913 

914 ''' 

915 Iterate over content entities matching given constraints. 

916 

917 :param kind: 

918 Content kind (or kinds) to extract. 

919 :type kind: 

920 :py:class:`str`, :py:class:`list` of :py:class:`str` 

921 

922 :param tmin: 

923 Start time of query interval. 

924 :type tmin: 

925 :py:func:`~pyrocko.util.get_time_float` 

926 

927 :param tmax: 

928 End time of query interval. 

929 :type tmax: 

930 :py:func:`~pyrocko.util.get_time_float` 

931 

932 :param codes: 

933 List of code patterns to query. 

934 :type codes: 

935 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

936 objects appropriate for the queried content type, or anything which 

937 can be converted to such objects. 

938 

939 :param naiv: 

940 Bypass time span lookup through indices (slow, for testing). 

941 :type naiv: 

942 :py:class:`bool` 

943 

944 :param kind_codes_ids: 

945 Kind-codes IDs of contents to be retrieved (internal use). 

946 :type kind_codes_ids: 

947 :py:class:`list` of :py:class:`int` 

948 

949 :yields: 

950 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

951 intersecting content. 

952 

953 :complexity: 

954 O(log N) for the time selection part due to heavy use of database 

955 indices. 

956 

957 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

958 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

959 closed-interval so that content intersecting with the time instant ``t 

960 = tmin = tmax`` is returned (otherwise nothing would be returned as 

961 ``[t, t)`` never matches anything). 

962 

963 Time spans of content entities to be matched are also treated as half 

964 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

965 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

966 modified to closed-interval when the content time span is an empty 

967 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

968 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

969 ''' 

970 

971 if not isinstance(kind, str): 

972 if kind is None: 

973 kind = model.g_content_kinds 

974 for kind_ in kind: 

975 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

976 yield nut 

977 

978 return 

979 

980 kind_id = to_kind_id(kind) 

981 

982 cond = [] 

983 args = [] 

984 if tmin is not None or tmax is not None: 

985 assert kind is not None 

986 if tmin is None: 

987 tmin = self.get_time_span()[0] 

988 if tmax is None: 

989 tmax = self.get_time_span()[1] + 1.0 

990 

991 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

992 

993 cond.append('kind_codes.kind_id == ?') 

994 args.append(kind_id) 

995 

996 if codes is not None: 

997 self._codes_match_sql(True, kind_id, codes, cond, args) 

998 

999 if codes_exclude is not None: 

1000 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

1001 

1002 if sample_rate_min is not None: 

1003 cond.append('kind_codes.deltat <= ?') 

1004 args.append(1.0/sample_rate_min) 

1005 

1006 if sample_rate_max is not None: 

1007 cond.append('? <= kind_codes.deltat') 

1008 args.append(1.0/sample_rate_max) 

1009 

1010 if kind_codes_ids is not None: 

1011 cond.append( 

1012 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1013 '?'*len(kind_codes_ids))) 

1014 

1015 args.extend(kind_codes_ids) 

1016 

1017 db = self.get_database() 

1018 if path is not None: 

1019 cond.append('files.path == ?') 

1020 args.append(db.relpath(abspath(path))) 

1021 

1022 sql = (''' 

1023 SELECT 

1024 files.path, 

1025 files.format, 

1026 files.mtime, 

1027 files.size, 

1028 %(db)s.%(nuts)s.file_segment, 

1029 %(db)s.%(nuts)s.file_element, 

1030 kind_codes.kind_id, 

1031 kind_codes.codes, 

1032 %(db)s.%(nuts)s.tmin_seconds, 

1033 %(db)s.%(nuts)s.tmin_offset, 

1034 %(db)s.%(nuts)s.tmax_seconds, 

1035 %(db)s.%(nuts)s.tmax_offset, 

1036 kind_codes.deltat 

1037 FROM files 

1038 INNER JOIN %(db)s.%(nuts)s 

1039 ON files.file_id == %(db)s.%(nuts)s.file_id 

1040 INNER JOIN kind_codes 

1041 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1042 ''') 

1043 

1044 if cond: 

1045 sql += ''' WHERE ''' + ' AND '.join(cond) 

1046 

1047 if limit is not None: 

1048 sql += ''' LIMIT %i''' % limit 

1049 

1050 sql = self._sql(sql) 

1051 if tmin is None and tmax is None: 

1052 for row in self._conn.execute(sql, args): 

1053 row = (db.abspath(row[0]),) + row[1:] 

1054 nut = model.Nut(values_nocheck=row) 

1055 yield nut 

1056 else: 

1057 assert tmin is not None and tmax is not None 

1058 if tmin == tmax: 

1059 for row in self._conn.execute(sql, args): 

1060 row = (db.abspath(row[0]),) + row[1:] 

1061 nut = model.Nut(values_nocheck=row) 

1062 if (nut.tmin <= tmin < nut.tmax) \ 

1063 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1064 

1065 yield nut 

1066 else: 

1067 for row in self._conn.execute(sql, args): 

1068 row = (db.abspath(row[0]),) + row[1:] 

1069 nut = model.Nut(values_nocheck=row) 

1070 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1071 or (nut.tmin == nut.tmax 

1072 and tmin <= nut.tmin < tmax): 

1073 

1074 yield nut 

1075 

1076 def get_nuts(self, *args, **kwargs): 

1077 ''' 

1078 Get content entities matching given constraints. 

1079 

1080 Like :py:meth:`iter_nuts` but returns results as a list. 

1081 ''' 

1082 

1083 return list(self.iter_nuts(*args, **kwargs)) 

1084 

1085 def _split_nuts( 

1086 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1087 

1088 kind_id = to_kind_id(kind) 

1089 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1090 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1091 

1092 names_main_nuts = dict(self._names) 

1093 names_main_nuts.update(db='main', nuts='nuts') 

1094 

1095 db = self.get_database() 

1096 

1097 def main_nuts(s): 

1098 return s % names_main_nuts 

1099 

1100 with self.transaction('split nuts') as cursor: 

1101 # modify selection and main 

1102 for sql_subst in [ 

1103 self._sql, main_nuts]: 

1104 

1105 cond = [] 

1106 args = [] 

1107 

1108 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1109 

1110 if codes is not None: 

1111 self._codes_match_sql(True, kind_id, codes, cond, args) 

1112 

1113 if path is not None: 

1114 cond.append('files.path == ?') 

1115 args.append(db.relpath(abspath(path))) 

1116 

1117 sql = sql_subst(''' 

1118 SELECT 

1119 %(db)s.%(nuts)s.nut_id, 

1120 %(db)s.%(nuts)s.tmin_seconds, 

1121 %(db)s.%(nuts)s.tmin_offset, 

1122 %(db)s.%(nuts)s.tmax_seconds, 

1123 %(db)s.%(nuts)s.tmax_offset, 

1124 kind_codes.deltat 

1125 FROM files 

1126 INNER JOIN %(db)s.%(nuts)s 

1127 ON files.file_id == %(db)s.%(nuts)s.file_id 

1128 INNER JOIN kind_codes 

1129 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1130 WHERE ''' + ' AND '.join(cond)) # noqa 

1131 

1132 insert = [] 

1133 delete = [] 

1134 for row in cursor.execute(sql, args): 

1135 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1136 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1137 

1138 nut_tmin = model.tjoin( 

1139 nut_tmin_seconds, nut_tmin_offset) 

1140 nut_tmax = model.tjoin( 

1141 nut_tmax_seconds, nut_tmax_offset) 

1142 

1143 if nut_tmin < tmax and tmin < nut_tmax: 

1144 if nut_tmin < tmin: 

1145 insert.append(( 

1146 nut_tmin_seconds, nut_tmin_offset, 

1147 tmin_seconds, tmin_offset, 

1148 model.tscale_to_kscale( 

1149 tmin_seconds - nut_tmin_seconds), 

1150 nut_id)) 

1151 

1152 if tmax < nut_tmax: 

1153 insert.append(( 

1154 tmax_seconds, tmax_offset, 

1155 nut_tmax_seconds, nut_tmax_offset, 

1156 model.tscale_to_kscale( 

1157 nut_tmax_seconds - tmax_seconds), 

1158 nut_id)) 

1159 

1160 delete.append((nut_id,)) 

1161 

1162 sql_add = ''' 

1163 INSERT INTO %(db)s.%(nuts)s ( 

1164 file_id, file_segment, file_element, kind_id, 

1165 kind_codes_id, tmin_seconds, tmin_offset, 

1166 tmax_seconds, tmax_offset, kscale ) 

1167 SELECT 

1168 file_id, file_segment, file_element, 

1169 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1170 FROM %(db)s.%(nuts)s 

1171 WHERE nut_id == ? 

1172 ''' 

1173 cursor.executemany(sql_subst(sql_add), insert) 

1174 

1175 sql_delete = ''' 

1176 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1177 ''' 

1178 cursor.executemany(sql_subst(sql_delete), delete) 

1179 

1180 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1181 ''' 

1182 Get time interval over all content in selection. 

1183 

1184 :param kinds: 

1185 If not ``None``, restrict query to given content kinds. 

1186 :type kind: 

1187 list of str 

1188 

1189 :complexity: 

1190 O(1), independent of the number of nuts. 

1191 

1192 :returns: 

1193 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1194 ''' 

1195 

1196 sql_min = self._sql(''' 

1197 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1198 FROM %(db)s.%(nuts)s 

1199 WHERE kind_id == ? 

1200 AND tmin_seconds == ( 

1201 SELECT MIN(tmin_seconds) 

1202 FROM %(db)s.%(nuts)s 

1203 WHERE kind_id == ?) 

1204 ''') 

1205 

1206 sql_max = self._sql(''' 

1207 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1208 FROM %(db)s.%(nuts)s 

1209 WHERE kind_id == ? 

1210 AND tmax_seconds == ( 

1211 SELECT MAX(tmax_seconds) 

1212 FROM %(db)s.%(nuts)s 

1213 WHERE kind_id == ?) 

1214 ''') 

1215 

1216 gtmin = None 

1217 gtmax = None 

1218 

1219 if isinstance(kinds, str): 

1220 kinds = [kinds] 

1221 

1222 if kinds is None: 

1223 kind_ids = model.g_content_kind_ids 

1224 else: 

1225 kind_ids = model.to_kind_ids(kinds) 

1226 

1227 tmins = [] 

1228 tmaxs = [] 

1229 for kind_id in kind_ids: 

1230 for tmin_seconds, tmin_offset in self._conn.execute( 

1231 sql_min, (kind_id, kind_id)): 

1232 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1233 

1234 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1235 sql_max, (kind_id, kind_id)): 

1236 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1237 

1238 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1239 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1240 

1241 if tight: 

1242 gtmin = nonef(min, tmins) 

1243 gtmax = nonef(max, tmaxs) 

1244 else: 

1245 gtmin = None if None in tmins else nonef(min, tmins) 

1246 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1247 

1248 if dummy_limits: 

1249 if gtmin is None: 

1250 gtmin = model.g_tmin 

1251 if gtmax is None: 

1252 gtmax = model.g_tmax 

1253 

1254 return gtmin, gtmax 

1255 

1256 def has(self, kinds): 

1257 ''' 

1258 Check availability of given content kinds. 

1259 

1260 :param kinds: 

1261 Content kinds to query. 

1262 :type kind: 

1263 list of str 

1264 

1265 :returns: 

1266 ``True`` if any of the queried content kinds is available 

1267 in the selection. 

1268 ''' 

1269 self_tmin, self_tmax = self.get_time_span( 

1270 kinds, dummy_limits=False) 

1271 

1272 return None not in (self_tmin, self_tmax) 

1273 

1274 def get_deltat_span(self, kind): 

1275 ''' 

1276 Get min and max sampling interval of all content of given kind. 

1277 

1278 :param kind: 

1279 Content kind 

1280 :type kind: 

1281 str 

1282 

1283 :returns: ``(deltat_min, deltat_max)`` 

1284 ''' 

1285 

1286 deltats = [ 

1287 deltat for deltat in self.get_deltats(kind) 

1288 if deltat is not None] 

1289 

1290 if deltats: 

1291 return min(deltats), max(deltats) 

1292 else: 

1293 return None, None 

1294 

1295 def iter_kinds(self, codes=None): 

1296 ''' 

1297 Iterate over content types available in selection. 

1298 

1299 :param codes: 

1300 If given, get kinds only for selected codes identifier. 

1301 Only a single identifier may be given here and no pattern matching 

1302 is done, currently. 

1303 :type codes: 

1304 :py:class:`~pyrocko.squirrel.model.Codes` 

1305 

1306 :yields: 

1307 Available content kinds as :py:class:`str`. 

1308 

1309 :complexity: 

1310 O(1), independent of number of nuts. 

1311 ''' 

1312 

1313 return self._database._iter_kinds( 

1314 codes=codes, 

1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1316 

1317 def iter_deltats(self, kind=None): 

1318 ''' 

1319 Iterate over sampling intervals available in selection. 

1320 

1321 :param kind: 

1322 If given, get sampling intervals only for a given content type. 

1323 :type kind: 

1324 str 

1325 

1326 :yields: 

1327 :py:class:`float` values. 

1328 

1329 :complexity: 

1330 O(1), independent of number of nuts. 

1331 ''' 

1332 return self._database._iter_deltats( 

1333 kind=kind, 

1334 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1335 

1336 def iter_codes(self, kind=None): 

1337 ''' 

1338 Iterate over content identifier code sequences available in selection. 

1339 

1340 :param kind: 

1341 If given, get codes only for a given content type. 

1342 :type kind: 

1343 str 

1344 

1345 :yields: 

1346 :py:class:`tuple` of :py:class:`str` 

1347 

1348 :complexity: 

1349 O(1), independent of number of nuts. 

1350 ''' 

1351 return self._database._iter_codes( 

1352 kind=kind, 

1353 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1354 

1355 def _iter_codes_info(self, kind=None, codes=None): 

1356 ''' 

1357 Iterate over number of occurrences of any (kind, codes) combination. 

1358 

1359 :param kind: 

1360 If given, get counts only for selected content type. 

1361 :type kind: 

1362 str 

1363 

1364 :yields: 

1365 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1366 

1367 :complexity: 

1368 O(1), independent of number of nuts. 

1369 ''' 

1370 return self._database._iter_codes_info( 

1371 kind=kind, 

1372 codes=codes, 

1373 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1374 

1375 def get_kinds(self, codes=None): 

1376 ''' 

1377 Get content types available in selection. 

1378 

1379 :param codes: 

1380 If given, get kinds only for selected codes identifier. 

1381 Only a single identifier may be given here and no pattern matching 

1382 is done, currently. 

1383 :type codes: 

1384 :py:class:`~pyrocko.squirrel.model.Codes` 

1385 

1386 :returns: 

1387 Sorted list of available content types. 

1388 :rtype: 

1389 py:class:`list` of :py:class:`str` 

1390 

1391 :complexity: 

1392 O(1), independent of number of nuts. 

1393 

1394 ''' 

1395 return sorted(list(self.iter_kinds(codes=codes))) 

1396 

1397 def get_deltats(self, kind=None): 

1398 ''' 

1399 Get sampling intervals available in selection. 

1400 

1401 :param kind: 

1402 If given, get sampling intervals only for selected content type. 

1403 :type kind: 

1404 str 

1405 

1406 :complexity: 

1407 O(1), independent of number of nuts. 

1408 

1409 :returns: Sorted list of available sampling intervals. 

1410 ''' 

1411 return sorted(list(self.iter_deltats(kind=kind))) 

1412 

1413 def get_codes(self, kind=None): 

1414 ''' 

1415 Get identifier code sequences available in selection. 

1416 

1417 :param kind: 

1418 If given, get codes only for selected content type. 

1419 :type kind: 

1420 str 

1421 

1422 :complexity: 

1423 O(1), independent of number of nuts. 

1424 

1425 :returns: Sorted list of available codes as tuples of strings. 

1426 ''' 

1427 return sorted(list(self.iter_codes(kind=kind))) 

1428 

1429 def get_counts(self, kind=None): 

1430 ''' 

1431 Get number of occurrences of any (kind, codes) combination. 

1432 

1433 :param kind: 

1434 If given, get codes only for selected content type. 

1435 :type kind: 

1436 str 

1437 

1438 :complexity: 

1439 O(1), independent of number of nuts. 

1440 

1441 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1442 if kind is not ``None`` 

1443 ''' 

1444 d = {} 

1445 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1446 if kind_id not in d: 

1447 v = d[kind_id] = {} 

1448 else: 

1449 v = d[kind_id] 

1450 

1451 if codes not in v: 

1452 v[codes] = 0 

1453 

1454 v[codes] += count 

1455 

1456 if kind is not None: 

1457 return d[to_kind_id(kind)] 

1458 else: 

1459 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1460 

1461 def glob_codes(self, kind, codes): 

1462 ''' 

1463 Find codes matching given patterns. 

1464 

1465 :param kind: 

1466 Content kind to be queried. 

1467 :type kind: 

1468 str 

1469 

1470 :param codes: 

1471 List of code patterns to query. 

1472 :type codes: 

1473 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1474 objects appropriate for the queried content type, or anything which 

1475 can be converted to such objects. 

1476 

1477 :returns: 

1478 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1479 ''' 

1480 

1481 kind_id = to_kind_id(kind) 

1482 args = [kind_id] 

1483 pats = codes_patterns_for_kind(kind_id, codes) 

1484 

1485 if pats: 

1486 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1487 ('kind_codes.codes GLOB ?',) * len(pats)) 

1488 

1489 args.extend(pat.safe_str for pat in pats) 

1490 else: 

1491 codes_cond = '' 

1492 

1493 sql = self._sql(''' 

1494 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1495 WHERE 

1496 kind_id == ? ''' + codes_cond) 

1497 

1498 return list(map(list, self._conn.execute(sql, args))) 

1499 

1500 def update(self, constraint=None, **kwargs): 

1501 ''' 

1502 Update or partially update channel and event inventories. 

1503 

1504 :param constraint: 

1505 Selection of times or areas to be brought up to date. 

1506 :type constraint: 

1507 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1508 

1509 :param \\*\\*kwargs: 

1510 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1511 

1512 This function triggers all attached remote sources, to check for 

1513 updates in the meta-data. The sources will only submit queries when 

1514 their expiration date has passed, or if the selection spans into 

1515 previously unseen times or areas. 

1516 ''' 

1517 

1518 if constraint is None: 

1519 constraint = client.Constraint(**kwargs) 

1520 

1521 task = make_task('Updating sources') 

1522 for source in task(self._sources): 

1523 source.update_channel_inventory(self, constraint) 

1524 source.update_event_inventory(self, constraint) 

1525 

1526 def update_waveform_promises(self, constraint=None, **kwargs): 

1527 ''' 

1528 Permit downloading of remote waveforms. 

1529 

1530 :param constraint: 

1531 Remote waveforms compatible with the given constraint are enabled 

1532 for download. 

1533 :type constraint: 

1534 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1535 

1536 :param \\*\\*kwargs: 

1537 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1538 

1539 Calling this method permits Squirrel to download waveforms from remote 

1540 sources when processing subsequent waveform requests. This works by 

1541 inserting so called waveform promises into the database. It will look 

1542 into the available channels for each remote source and create a promise 

1543 for each channel compatible with the given constraint. If the promise 

1544 then matches in a waveform request, Squirrel tries to download the 

1545 waveform. If the download is successful, the downloaded waveform is 

1546 added to the Squirrel and the promise is deleted. If the download 

1547 fails, the promise is kept if the reason of failure looks like being 

1548 temporary, e.g. because of a network failure. If the cause of failure 

1549 however seems to be permanent, the promise is deleted so that no 

1550 further attempts are made to download a waveform which might not be 

1551 available from that server at all. To force re-scheduling after a 

1552 permanent failure, call :py:meth:`update_waveform_promises` 

1553 yet another time. 

1554 ''' 

1555 

1556 if constraint is None: 

1557 constraint = client.Constraint(**kwargs) 

1558 

1559 for source in self._sources: 

1560 source.update_waveform_promises(self, constraint) 

1561 

1562 def remove_waveform_promises(self, from_database='selection'): 

1563 ''' 

1564 Remove waveform promises from live selection or global database. 

1565 

1566 Calling this function removes all waveform promises provided by the 

1567 attached sources. 

1568 

1569 :param from_database: 

1570 Remove from live selection ``'selection'`` or global database 

1571 ``'global'``. 

1572 ''' 

1573 for source in self._sources: 

1574 source.remove_waveform_promises(self, from_database=from_database) 

1575 

1576 def update_responses(self, constraint=None, **kwargs): 

1577 if constraint is None: 

1578 constraint = client.Constraint(**kwargs) 

1579 

1580 for source in self._sources: 

1581 source.update_response_inventory(self, constraint) 

1582 

1583 def get_nfiles(self): 

1584 ''' 

1585 Get number of files in selection. 

1586 ''' 

1587 

1588 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1589 for row in self._conn.execute(sql): 

1590 return row[0] 

1591 

1592 def get_nnuts(self): 

1593 ''' 

1594 Get number of nuts in selection. 

1595 ''' 

1596 

1597 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1598 for row in self._conn.execute(sql): 

1599 return row[0] 

1600 

1601 def get_total_size(self): 

1602 ''' 

1603 Get aggregated file size available in selection. 

1604 ''' 

1605 

1606 sql = self._sql(''' 

1607 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1608 INNER JOIN files 

1609 ON %(db)s.%(file_states)s.file_id = files.file_id 

1610 ''') 

1611 

1612 for row in self._conn.execute(sql): 

1613 return row[0] or 0 

1614 

1615 def get_stats(self): 

1616 ''' 

1617 Get statistics on contents available through this selection. 

1618 ''' 

1619 

1620 kinds = self.get_kinds() 

1621 time_spans = {} 

1622 for kind in kinds: 

1623 time_spans[kind] = self.get_time_span([kind]) 

1624 

1625 return SquirrelStats( 

1626 nfiles=self.get_nfiles(), 

1627 nnuts=self.get_nnuts(), 

1628 kinds=kinds, 

1629 codes=self.get_codes(), 

1630 total_size=self.get_total_size(), 

1631 counts=self.get_counts(), 

1632 time_spans=time_spans, 

1633 sources=[s.describe() for s in self._sources], 

1634 operators=[op.describe() for op in self._operators]) 

1635 

1636 @filldocs 

1637 def check( 

1638 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1639 ignore=[]): 

1640 ''' 

1641 Check for common data/metadata problems. 

1642 

1643 %(query_args)s 

1644 

1645 :param ignore: 

1646 Problem types to be ignored. 

1647 :type ignore: 

1648 :class:`list` of :class:`str` 

1649 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1650 

1651 :returns: 

1652 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1653 containing the results of the check. 

1654 

1655 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1656 ''' 

1657 

1658 from .check import do_check 

1659 tmin, tmax, codes = self._get_selection_args( 

1660 CHANNEL, obj, tmin, tmax, time, codes) 

1661 

1662 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1663 

1664 def get_content( 

1665 self, 

1666 nut, 

1667 cache_id='default', 

1668 accessor_id='default', 

1669 show_progress=False, 

1670 model='squirrel'): 

1671 

1672 ''' 

1673 Get and possibly load full content for a given index entry from file. 

1674 

1675 Loads the actual content objects (channel, station, waveform, ...) from 

1676 file. For efficiency, sibling content (all stuff in the same file 

1677 segment) will also be loaded as a side effect. The loaded contents are 

1678 cached in the Squirrel object. 

1679 ''' 

1680 

1681 content_cache = self._content_caches[cache_id] 

1682 if not content_cache.has(nut): 

1683 

1684 for nut_loaded in io.iload( 

1685 nut.file_path, 

1686 segment=nut.file_segment, 

1687 format=nut.file_format, 

1688 database=self._database, 

1689 update_selection=self, 

1690 show_progress=show_progress): 

1691 

1692 content_cache.put(nut_loaded) 

1693 

1694 try: 

1695 return content_cache.get(nut, accessor_id, model) 

1696 

1697 except KeyError: 

1698 raise error.NotAvailable( 

1699 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1700 

1701 def advance_accessor(self, accessor_id='default', cache_id=None): 

1702 ''' 

1703 Notify memory caches about consumer moving to a new data batch. 

1704 

1705 :param accessor_id: 

1706 Name of accessing consumer to be advanced. 

1707 :type accessor_id: 

1708 str 

1709 

1710 :param cache_id: 

1711 Name of cache to for which the accessor should be advanced. By 

1712 default the named accessor is advanced in all registered caches. 

1713 By default, two caches named ``'default'`` and ``'waveform'`` are 

1714 available. 

1715 :type cache_id: 

1716 str 

1717 

1718 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1719 Squirrel's memory caching works and can be tuned. Default behaviour is 

1720 to release data when it has not been used in the latest data 

1721 window/batch. If the accessor is never advanced, data is cached 

1722 indefinitely - which is often desired e.g. for station meta-data. 

1723 Methods for consecutive data traversal, like 

1724 :py:meth:`chopper_waveforms` automatically advance and clear 

1725 their accessor. 

1726 ''' 

1727 for cache_ in ( 

1728 self._content_caches.keys() 

1729 if cache_id is None 

1730 else [cache_id]): 

1731 

1732 self._content_caches[cache_].advance_accessor(accessor_id) 

1733 

1734 def clear_accessor(self, accessor_id, cache_id=None): 

1735 ''' 

1736 Notify memory caches about a consumer having finished. 

1737 

1738 :param accessor_id: 

1739 Name of accessor to be cleared. 

1740 :type accessor_id: 

1741 str 

1742 

1743 :param cache_id: 

1744 Name of cache for which the accessor should be cleared. By default 

1745 the named accessor is cleared from all registered caches. By 

1746 default, two caches named ``'default'`` and ``'waveform'`` are 

1747 available. 

1748 :type cache_id: 

1749 str 

1750 

1751 Calling this method clears all references to cache entries held by the 

1752 named accessor. Cache entries are then freed if not referenced by any 

1753 other accessor. 

1754 ''' 

1755 

1756 for cache_ in ( 

1757 self._content_caches.keys() 

1758 if cache_id is None 

1759 else [cache_id]): 

1760 

1761 self._content_caches[cache_].clear_accessor(accessor_id) 

1762 

1763 def get_cache_stats(self, cache_id): 

1764 return self._content_caches[cache_id].get_stats() 

1765 

1766 @filldocs 

1767 def get_stations( 

1768 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1769 model='squirrel', on_error='raise'): 

1770 

1771 ''' 

1772 Get stations matching given constraints. 

1773 

1774 %(query_args)s 

1775 

1776 :param model: 

1777 Select object model for returned values: ``'squirrel'`` to get 

1778 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1779 objects with channel information attached. 

1780 :type model: 

1781 str 

1782 

1783 :returns: 

1784 List of :py:class:`pyrocko.squirrel.Station 

1785 <pyrocko.squirrel.model.Station>` objects by default or list of 

1786 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1787 objects if ``model='pyrocko'`` is requested. 

1788 

1789 See :py:meth:`iter_nuts` for details on time span matching. 

1790 ''' 

1791 

1792 if model == 'pyrocko': 

1793 return self._get_pyrocko_stations( 

1794 obj, tmin, tmax, time, codes, on_error=on_error) 

1795 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1796 args = self._get_selection_args( 

1797 STATION, obj, tmin, tmax, time, codes) 

1798 

1799 nuts = sorted( 

1800 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1801 

1802 return [self.get_content(nut, model=model) for nut in nuts] 

1803 else: 

1804 raise ValueError('Invalid station model: %s' % model) 

1805 

1806 @filldocs 

1807 def get_channels( 

1808 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1809 model='squirrel'): 

1810 

1811 ''' 

1812 Get channels matching given constraints. 

1813 

1814 %(query_args)s 

1815 

1816 :returns: 

1817 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1818 

1819 See :py:meth:`iter_nuts` for details on time span matching. 

1820 ''' 

1821 

1822 args = self._get_selection_args( 

1823 CHANNEL, obj, tmin, tmax, time, codes) 

1824 

1825 nuts = sorted( 

1826 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1827 

1828 return [self.get_content(nut, model=model) for nut in nuts] 

1829 

1830 @filldocs 

1831 def get_sensors( 

1832 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1833 

1834 ''' 

1835 Get sensors matching given constraints. 

1836 

1837 %(query_args)s 

1838 

1839 :returns: 

1840 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1841 

1842 See :py:meth:`iter_nuts` for details on time span matching. 

1843 ''' 

1844 

1845 tmin, tmax, codes = self._get_selection_args( 

1846 CHANNEL, obj, tmin, tmax, time, codes) 

1847 

1848 if codes is not None: 

1849 codes = codes_patterns_list( 

1850 (entry.replace(channel=entry.channel[:-1] + '?') 

1851 if entry.channel != '*' else entry) 

1852 for entry in codes) 

1853 

1854 nuts = sorted( 

1855 self.iter_nuts( 

1856 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1857 

1858 return [ 

1859 sensor for sensor in model.Sensor.from_channels( 

1860 self.get_content(nut) for nut in nuts) 

1861 if match_time_span(tmin, tmax, sensor)] 

1862 

1863 @filldocs 

1864 def get_responses( 

1865 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1866 model='squirrel'): 

1867 

1868 ''' 

1869 Get instrument responses matching given constraints. 

1870 

1871 %(query_args)s 

1872 

1873 :param model: 

1874 Select data model for returned objects. Choices: ``'squirrel'``, 

1875 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1876 :type model: 

1877 str 

1878 

1879 :returns: 

1880 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1881 'squirrel'`` or list of 

1882 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` 

1883 if ``model == 'stationxml'`` or list of 

1884 (:py:class:`~pyrocko.squirrel.model.Response`, 

1885 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1886 'stationxml+'``. 

1887 

1888 See :py:meth:`iter_nuts` for details on time span matching. 

1889 ''' 

1890 

1891 args = self._get_selection_args( 

1892 RESPONSE, obj, tmin, tmax, time, codes) 

1893 

1894 nuts = sorted( 

1895 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1896 

1897 return [self.get_content(nut, model=model) for nut in nuts] 

1898 

1899 @filldocs 

1900 def get_response( 

1901 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1902 model='squirrel', on_duplicate='raise'): 

1903 

1904 ''' 

1905 Get instrument response matching given constraints. 

1906 

1907 %(query_args)s 

1908 

1909 :param model: 

1910 Select data model for returned object. Choices: ``'squirrel'``, 

1911 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1912 :type model: 

1913 str 

1914 

1915 :param on_duplicate: 

1916 Determines how duplicates/multiple matching responses are handled. 

1917 Choices: ``'raise'`` - raise 

1918 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1919 warning and return first match, ``'ignore'`` - silently return 

1920 first match. 

1921 :type on_duplicate: 

1922 str 

1923 

1924 :returns: 

1925 :py:class:`~pyrocko.squirrel.model.Response` if 

1926 ``model == 'squirrel'`` or 

1927 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model == 

1928 'stationxml'`` or 

1929 (:py:class:`~pyrocko.squirrel.model.Response`, 

1930 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model == 

1931 'stationxml+'``. 

1932 

1933 Same as :py:meth:`get_responses` but returning exactly one response. 

1934 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1935 available. Duplicates are handled according to the ``on_duplicate`` 

1936 argument. 

1937 

1938 See :py:meth:`iter_nuts` for details on time span matching. 

1939 ''' 

1940 

1941 if model == 'stationxml': 

1942 model_ = 'stationxml+' 

1943 else: 

1944 model_ = model 

1945 

1946 responses = self.get_responses( 

1947 obj, tmin, tmax, time, codes, model=model_) 

1948 if len(responses) == 0: 

1949 raise error.NotAvailable( 

1950 'No instrument response available (%s).' 

1951 % self._get_selection_args_str( 

1952 RESPONSE, obj, tmin, tmax, time, codes)) 

1953 

1954 elif len(responses) > 1: 

1955 

1956 if on_duplicate in ('raise', 'warn'): 

1957 if model_ == 'squirrel': 

1958 resps_sq = responses 

1959 elif model_ == 'stationxml+': 

1960 resps_sq = [resp[0] for resp in responses] 

1961 else: 

1962 raise ValueError('Invalid response model: %s' % model) 

1963 

1964 rinfo = ':\n' + '\n'.join( 

1965 ' ' + resp.summary for resp in resps_sq) 

1966 

1967 message = \ 

1968 'Multiple instrument responses matching given ' \ 

1969 'constraints (%s)%s%s' % ( 

1970 self._get_selection_args_str( 

1971 RESPONSE, obj, tmin, tmax, time, codes), 

1972 ' -> using first' if on_duplicate == 'warn' else '', 

1973 rinfo) 

1974 

1975 if on_duplicate == 'raise': 

1976 raise error.Duplicate(message) 

1977 

1978 elif on_duplicate == 'warn': 

1979 logger.warning(message) 

1980 

1981 elif on_duplicate == 'ignore': 

1982 pass 

1983 

1984 else: 

1985 ValueError( 

1986 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1987 

1988 if model == 'stationxml': 

1989 return responses[0][1] 

1990 else: 

1991 return responses[0] 

1992 

1993 @filldocs 

1994 def get_events( 

1995 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1996 

1997 ''' 

1998 Get events matching given constraints. 

1999 

2000 %(query_args)s 

2001 

2002 :returns: 

2003 List of :py:class:`~pyrocko.model.event.Event` objects. 

2004 

2005 See :py:meth:`iter_nuts` for details on time span matching. 

2006 ''' 

2007 

2008 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2009 nuts = sorted( 

2010 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2011 

2012 return [self.get_content(nut) for nut in nuts] 

2013 

2014 def _redeem_promises(self, *args, order_only=False): 

2015 

2016 def split_promise(order, tmax=None): 

2017 self._split_nuts( 

2018 'waveform_promise', 

2019 order.tmin, tmax if tmax is not None else order.tmax, 

2020 codes=order.codes, 

2021 path=order.source_id) 

2022 

2023 tmin, tmax = args[:2] 

2024 

2025 waveforms = list(self.iter_nuts('waveform', *args)) 

2026 promises = list(self.iter_nuts('waveform_promise', *args)) 

2027 

2028 codes_to_avail = defaultdict(list) 

2029 for nut in waveforms: 

2030 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2031 

2032 def tts(x): 

2033 if isinstance(x, tuple): 

2034 return tuple(tts(e) for e in x) 

2035 elif isinstance(x, list): 

2036 return list(tts(e) for e in x) 

2037 else: 

2038 return util.time_to_str(x) 

2039 

2040 now = time.time() 

2041 orders = [] 

2042 for promise in promises: 

2043 waveforms_avail = codes_to_avail[promise.codes] 

2044 for block_tmin, block_tmax in blocks( 

2045 max(tmin, promise.tmin), 

2046 min(tmax, promise.tmax), 

2047 promise.deltat): 

2048 

2049 if block_tmin > now: 

2050 continue 

2051 

2052 orders.append( 

2053 WaveformOrder( 

2054 source_id=promise.file_path, 

2055 codes=promise.codes, 

2056 tmin=block_tmin, 

2057 tmax=block_tmax, 

2058 deltat=promise.deltat, 

2059 gaps=gaps(waveforms_avail, block_tmin, block_tmax), 

2060 time_created=now)) 

2061 

2062 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2063 

2064 order_keys_noop = set(order_key(order) for order in orders_noop) 

2065 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2066 logger.info( 

2067 'Waveform orders already satisified with cached/local data: ' 

2068 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2069 

2070 for order in orders_noop: 

2071 split_promise(order) 

2072 

2073 if order_only: 

2074 if orders: 

2075 self._pending_orders.extend(orders) 

2076 logger.info( 

2077 'Enqueuing %i waveform order%s.' 

2078 % len_plural(orders)) 

2079 return 

2080 else: 

2081 if self._pending_orders: 

2082 orders.extend(self._pending_orders) 

2083 logger.info( 

2084 'Adding %i previously enqueued order%s.' 

2085 % len_plural(self._pending_orders)) 

2086 

2087 self._pending_orders = [] 

2088 

2089 source_ids = [] 

2090 sources = {} 

2091 for source in self._sources: 

2092 if isinstance(source, fdsn.FDSNSource): 

2093 source_ids.append(source._source_id) 

2094 sources[source._source_id] = source 

2095 

2096 source_priority = dict( 

2097 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2098 

2099 order_groups = defaultdict(list) 

2100 for order in orders: 

2101 order_groups[order_key(order)].append(order) 

2102 

2103 for k, order_group in order_groups.items(): 

2104 order_group.sort( 

2105 key=lambda order: source_priority[order.source_id]) 

2106 

2107 n_order_groups = len(order_groups) 

2108 

2109 if len(order_groups) != 0 or len(orders) != 0: 

2110 logger.info( 

2111 'Waveform orders standing for download: %i (%i)' 

2112 % (len(order_groups), len(orders))) 

2113 

2114 task = make_task('Waveform orders processed', n_order_groups) 

2115 else: 

2116 task = None 

2117 

2118 def release_order_group(order): 

2119 okey = order_key(order) 

2120 for followup in order_groups[okey]: 

2121 if followup is not order: 

2122 split_promise(followup) 

2123 

2124 del order_groups[okey] 

2125 

2126 if task: 

2127 task.update(n_order_groups - len(order_groups)) 

2128 

2129 def noop(order): 

2130 pass 

2131 

2132 def success(order, trs): 

2133 release_order_group(order) 

2134 if order.is_near_real_time(): 

2135 if not trs: 

2136 return # keep promise when no data received at real time 

2137 else: 

2138 tmax = max(tr.tmax+tr.deltat for tr in trs) 

2139 tmax = order.tmin \ 

2140 + round((tmax - order.tmin) / order.deltat) \ 

2141 * order.deltat 

2142 split_promise(order, tmax) 

2143 else: 

2144 split_promise(order) 

2145 

2146 def batch_add(paths): 

2147 self.add(paths) 

2148 

2149 calls = queue.Queue() 

2150 

2151 def enqueue(f): 

2152 def wrapper(*args): 

2153 calls.put((f, args)) 

2154 

2155 return wrapper 

2156 

2157 while order_groups: 

2158 

2159 orders_now = [] 

2160 empty = [] 

2161 for k, order_group in order_groups.items(): 

2162 try: 

2163 orders_now.append(order_group.pop(0)) 

2164 except IndexError: 

2165 empty.append(k) 

2166 

2167 for k in empty: 

2168 del order_groups[k] 

2169 

2170 by_source_id = defaultdict(list) 

2171 for order in orders_now: 

2172 by_source_id[order.source_id].append(order) 

2173 

2174 threads = [] 

2175 for source_id in by_source_id: 

2176 def download(): 

2177 try: 

2178 sources[source_id].download_waveforms( 

2179 by_source_id[source_id], 

2180 success=enqueue(success), 

2181 error_permanent=enqueue(split_promise), 

2182 error_temporary=noop, 

2183 batch_add=enqueue(batch_add)) 

2184 

2185 finally: 

2186 calls.put(None) 

2187 

2188 if len(by_source_id) > 1: 

2189 thread = threading.Thread(target=download) 

2190 thread.start() 

2191 threads.append(thread) 

2192 else: 

2193 download() 

2194 calls.put(None) 

2195 

2196 ndone = 0 

2197 while ndone < len(by_source_id): 

2198 ret = calls.get() 

2199 if ret is None: 

2200 ndone += 1 

2201 else: 

2202 ret[0](*ret[1]) 

2203 

2204 for thread in threads: 

2205 thread.join() 

2206 

2207 if task: 

2208 task.update(n_order_groups - len(order_groups)) 

2209 

2210 if task: 

2211 task.done() 

2212 

2213 @filldocs 

2214 def get_waveform_nuts( 

2215 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2216 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2217 order_only=False): 

2218 

2219 ''' 

2220 Get waveform content entities matching given constraints. 

2221 

2222 %(query_args)s 

2223 

2224 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2225 resolves matching waveform promises (downloads waveforms from remote 

2226 sources). 

2227 

2228 See :py:meth:`iter_nuts` for details on time span matching. 

2229 ''' 

2230 

2231 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2232 

2233 if self.downloads_enabled: 

2234 self._redeem_promises( 

2235 *args, 

2236 codes_exclude, 

2237 sample_rate_min, 

2238 sample_rate_max, 

2239 order_only=order_only) 

2240 

2241 nuts = sorted( 

2242 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2243 

2244 return nuts 

2245 

2246 @filldocs 

2247 def have_waveforms( 

2248 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2249 

2250 ''' 

2251 Check if any waveforms or waveform promises are available for given 

2252 constraints. 

2253 

2254 %(query_args)s 

2255 ''' 

2256 

2257 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2258 return bool(list( 

2259 self.iter_nuts('waveform', *args, limit=1))) \ 

2260 or (self.downloads_enabled and bool(list( 

2261 self.iter_nuts('waveform_promise', *args, limit=1)))) 

2262 

2263 @filldocs 

2264 def get_waveforms( 

2265 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2266 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2267 uncut=False, want_incomplete=True, degap=True, 

2268 maxgap=5, maxlap=None, snap=None, include_last=False, 

2269 load_data=True, accessor_id='default', operator_params=None, 

2270 order_only=False, channel_priorities=None): 

2271 

2272 ''' 

2273 Get waveforms matching given constraints. 

2274 

2275 %(query_args)s 

2276 

2277 :param sample_rate_min: 

2278 Consider only waveforms with a sampling rate equal to or greater 

2279 than the given value [Hz]. 

2280 :type sample_rate_min: 

2281 float 

2282 

2283 :param sample_rate_max: 

2284 Consider only waveforms with a sampling rate equal to or less than 

2285 the given value [Hz]. 

2286 :type sample_rate_max: 

2287 float 

2288 

2289 :param uncut: 

2290 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2291 and to disable degapping/deoverlapping. Returns untouched traces as 

2292 they are read from file segment. File segments are always read in 

2293 their entirety. 

2294 :type uncut: 

2295 bool 

2296 

2297 :param want_incomplete: 

2298 If ``True``, gappy/incomplete traces are included in the result. 

2299 :type want_incomplete: 

2300 bool 

2301 

2302 :param degap: 

2303 If ``True``, connect traces and remove gaps and overlaps. 

2304 :type degap: 

2305 bool 

2306 

2307 :param maxgap: 

2308 Maximum gap size in samples which is filled with interpolated 

2309 samples when ``degap`` is ``True``. 

2310 :type maxgap: 

2311 int 

2312 

2313 :param maxlap: 

2314 Maximum overlap size in samples which is removed when ``degap`` is 

2315 ``True``. 

2316 :type maxlap: 

2317 int 

2318 

2319 :param snap: 

2320 Rounding functions used when computing sample index from time 

2321 instance, for trace start and trace end, respectively. By default, 

2322 ``(round, round)`` is used. 

2323 :type snap: 

2324 :py:class:`tuple` of 2 callables 

2325 

2326 :param include_last: 

2327 If ``True``, add one more sample to the returned traces (the sample 

2328 which would be the first sample of a query with ``tmin`` set to the 

2329 current value of ``tmax``). 

2330 :type include_last: 

2331 bool 

2332 

2333 :param load_data: 

2334 If ``True``, waveform data samples are read from files (or cache). 

2335 If ``False``, meta-information-only traces are returned (dummy 

2336 traces with no data samples). 

2337 :type load_data: 

2338 bool 

2339 

2340 :param accessor_id: 

2341 Name of consumer on who's behalf data is accessed. Used in cache 

2342 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2343 to distinguish different points of extraction for the decision of 

2344 when to release cached waveform data. Should be used when data is 

2345 alternately extracted from more than one region / selection. 

2346 :type accessor_id: 

2347 str 

2348 

2349 :param channel_priorities: 

2350 List of band/instrument code combinations to try. For example, 

2351 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2352 then fallback to ``BH?`` if these are not available. The first 

2353 matching waveforms are returned. Use in combination with 

2354 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2355 rate. 

2356 :type channel_priorities: 

2357 :py:class:`list` of :py:class:`str` 

2358 

2359 See :py:meth:`iter_nuts` for details on time span matching. 

2360 

2361 Loaded data is kept in memory (at least) until 

2362 :py:meth:`clear_accessor` has been called or 

2363 :py:meth:`advance_accessor` has been called two consecutive times 

2364 without data being accessed between the two calls (by this accessor). 

2365 Data may still be further kept in the memory cache if held alive by 

2366 consumers with a different ``accessor_id``. 

2367 ''' 

2368 

2369 tmin, tmax, codes = self._get_selection_args( 

2370 WAVEFORM, obj, tmin, tmax, time, codes) 

2371 

2372 if channel_priorities is not None: 

2373 return self._get_waveforms_prioritized( 

2374 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2375 sample_rate_min=sample_rate_min, 

2376 sample_rate_max=sample_rate_max, 

2377 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2378 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2379 include_last=include_last, load_data=load_data, 

2380 accessor_id=accessor_id, operator_params=operator_params, 

2381 order_only=order_only, channel_priorities=channel_priorities) 

2382 

2383 kinds = ['waveform'] 

2384 if self.downloads_enabled: 

2385 kinds.append('waveform_promise') 

2386 

2387 self_tmin, self_tmax = self.get_time_span(kinds) 

2388 

2389 if None in (self_tmin, self_tmax): 

2390 logger.warning( 

2391 'No waveforms available.') 

2392 return [] 

2393 

2394 tmin = tmin if tmin is not None else self_tmin 

2395 tmax = tmax if tmax is not None else self_tmax 

2396 

2397 if codes is not None and len(codes) == 1: 

2398 # TODO: fix for multiple / mixed codes 

2399 operator = self.get_operator(codes[0]) 

2400 if operator is not None: 

2401 return operator.get_waveforms( 

2402 self, codes[0], 

2403 tmin=tmin, tmax=tmax, 

2404 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2405 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2406 include_last=include_last, load_data=load_data, 

2407 accessor_id=accessor_id, params=operator_params) 

2408 

2409 nuts = self.get_waveform_nuts( 

2410 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2411 sample_rate_max, order_only=order_only) 

2412 

2413 if order_only: 

2414 return [] 

2415 

2416 if load_data: 

2417 traces = [ 

2418 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2419 

2420 else: 

2421 traces = [ 

2422 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2423 

2424 if uncut: 

2425 return traces 

2426 

2427 if snap is None: 

2428 snap = (round, round) 

2429 

2430 chopped = [] 

2431 for tr in traces: 

2432 if not load_data and tr.ydata is not None: 

2433 tr = tr.copy(data=False) 

2434 tr.ydata = None 

2435 

2436 try: 

2437 chopped.append(tr.chop( 

2438 tmin, tmax, 

2439 inplace=False, 

2440 snap=snap, 

2441 include_last=include_last)) 

2442 

2443 except trace.NoData: 

2444 pass 

2445 

2446 processed = self._process_chopped( 

2447 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2448 

2449 return processed 

2450 

2451 def _get_waveforms_prioritized( 

2452 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2453 channel_priorities=None, **kwargs): 

2454 

2455 trs_all = [] 

2456 codes_have = set() 

2457 for channel in channel_priorities: 

2458 assert len(channel) == 2 

2459 if codes is not None: 

2460 codes_now = [ 

2461 codes_.replace(channel=channel+'?') for codes_ in codes] 

2462 else: 

2463 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2464 

2465 codes_exclude_now = list(set( 

2466 codes_.replace(channel=channel+codes_.channel[-1]) 

2467 for codes_ in codes_have)) 

2468 

2469 if codes_exclude: 

2470 codes_exclude_now.extend(codes_exclude) 

2471 

2472 trs = self.get_waveforms( 

2473 tmin=tmin, 

2474 tmax=tmax, 

2475 codes=codes_now, 

2476 codes_exclude=codes_exclude_now, 

2477 **kwargs) 

2478 

2479 codes_have.update(set(tr.codes for tr in trs)) 

2480 trs_all.extend(trs) 

2481 

2482 return trs_all 

2483 

2484 @filldocs 

2485 def chopper_waveforms( 

2486 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2487 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2488 tinc=None, tpad=0., 

2489 want_incomplete=True, snap_window=False, 

2490 degap=True, maxgap=5, maxlap=None, 

2491 snap=None, include_last=False, load_data=True, 

2492 accessor_id=None, clear_accessor=True, operator_params=None, 

2493 grouping=None, channel_priorities=None): 

2494 

2495 ''' 

2496 Iterate window-wise over waveform archive. 

2497 

2498 %(query_args)s 

2499 

2500 :param tinc: 

2501 Time increment (window shift time) (default uses ``tmax-tmin``). 

2502 :type tinc: 

2503 :py:func:`~pyrocko.util.get_time_float` 

2504 

2505 :param tpad: 

2506 Padding time appended on either side of the data window (window 

2507 overlap is ``2*tpad``). 

2508 :type tpad: 

2509 :py:func:`~pyrocko.util.get_time_float` 

2510 

2511 :param want_incomplete: 

2512 If ``True``, gappy/incomplete traces are included in the result. 

2513 :type want_incomplete: 

2514 bool 

2515 

2516 :param snap_window: 

2517 If ``True``, start time windows at multiples of tinc with respect 

2518 to system time zero. 

2519 :type snap_window: 

2520 bool 

2521 

2522 :param degap: 

2523 If ``True``, connect traces and remove gaps and overlaps. 

2524 :type degap: 

2525 bool 

2526 

2527 :param maxgap: 

2528 Maximum gap size in samples which is filled with interpolated 

2529 samples when ``degap`` is ``True``. 

2530 :type maxgap: 

2531 int 

2532 

2533 :param maxlap: 

2534 Maximum overlap size in samples which is removed when ``degap`` is 

2535 ``True``. 

2536 :type maxlap: 

2537 int 

2538 

2539 :param snap: 

2540 Rounding functions used when computing sample index from time 

2541 instance, for trace start and trace end, respectively. By default, 

2542 ``(round, round)`` is used. 

2543 :type snap: 

2544 :py:class:`tuple` of 2 callables 

2545 

2546 :param include_last: 

2547 If ``True``, add one more sample to the returned traces (the sample 

2548 which would be the first sample of a query with ``tmin`` set to the 

2549 current value of ``tmax``). 

2550 :type include_last: 

2551 bool 

2552 

2553 :param load_data: 

2554 If ``True``, waveform data samples are read from files (or cache). 

2555 If ``False``, meta-information-only traces are returned (dummy 

2556 traces with no data samples). 

2557 :type load_data: 

2558 bool 

2559 

2560 :param accessor_id: 

2561 Name of consumer on who's behalf data is accessed. Used in cache 

2562 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2563 to distinguish different points of extraction for the decision of 

2564 when to release cached waveform data. Should be used when data is 

2565 alternately extracted from more than one region / selection. 

2566 :type accessor_id: 

2567 str 

2568 

2569 :param clear_accessor: 

2570 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2571 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2572 memory when the generator returns. 

2573 :type clear_accessor: 

2574 bool 

2575 

2576 :param grouping: 

2577 By default, traversal over the data is over time and all matching 

2578 traces of a time window are yielded. Using this option, it is 

2579 possible to traverse the data first by group (e.g. station or 

2580 network) and second by time. This can reduce the number of traces 

2581 in each batch and thus reduce the memory footprint of the process. 

2582 :type grouping: 

2583 :py:class:`~pyrocko.squirrel.operators.base.Grouping` 

2584 

2585 :yields: 

2586 For each extracted time window or waveform group a 

2587 :py:class:`Batch` object is yielded. 

2588 

2589 See :py:meth:`iter_nuts` for details on time span matching. 

2590 ''' 

2591 

2592 tmin, tmax, codes = self._get_selection_args( 

2593 WAVEFORM, obj, tmin, tmax, time, codes) 

2594 

2595 kinds = ['waveform'] 

2596 if self.downloads_enabled: 

2597 kinds.append('waveform_promise') 

2598 

2599 self_tmin, self_tmax = self.get_time_span(kinds) 

2600 

2601 if None in (self_tmin, self_tmax): 

2602 logger.warning( 

2603 'Content has undefined time span. No waveforms and no ' 

2604 'waveform promises?') 

2605 return 

2606 

2607 if snap_window and tinc is not None: 

2608 tmin = tmin if tmin is not None else self_tmin 

2609 tmax = tmax if tmax is not None else self_tmax 

2610 tmin = math.floor(tmin / tinc) * tinc 

2611 tmax = math.ceil(tmax / tinc) * tinc 

2612 else: 

2613 tmin = tmin if tmin is not None else self_tmin + tpad 

2614 tmax = tmax if tmax is not None else self_tmax - tpad 

2615 

2616 if tinc is None: 

2617 tinc = tmax - tmin 

2618 nwin = 1 

2619 elif tinc == 0.0: 

2620 nwin = 1 

2621 else: 

2622 eps = 1e-6 

2623 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1) 

2624 

2625 try: 

2626 if accessor_id is None: 

2627 accessor_id = 'chopper%i' % self._n_choppers_active 

2628 

2629 self._n_choppers_active += 1 

2630 

2631 if grouping is None: 

2632 codes_list = [codes] 

2633 else: 

2634 operator = Operator( 

2635 filtering=CodesPatternFiltering(codes=codes), 

2636 grouping=grouping) 

2637 

2638 available = set(self.get_codes(kind='waveform')) 

2639 if self.downloads_enabled: 

2640 available.update(self.get_codes(kind='waveform_promise')) 

2641 operator.update_mappings(sorted(available)) 

2642 

2643 codes_list = [ 

2644 codes_patterns_list(scl) 

2645 for scl in operator.iter_in_codes()] 

2646 

2647 ngroups = len(codes_list) 

2648 for igroup, scl in enumerate(codes_list): 

2649 for iwin in range(nwin): 

2650 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2651 

2652 chopped = self.get_waveforms( 

2653 tmin=wmin-tpad, 

2654 tmax=wmax+tpad, 

2655 codes=scl, 

2656 codes_exclude=codes_exclude, 

2657 sample_rate_min=sample_rate_min, 

2658 sample_rate_max=sample_rate_max, 

2659 snap=snap, 

2660 include_last=include_last, 

2661 load_data=load_data, 

2662 want_incomplete=want_incomplete, 

2663 degap=degap, 

2664 maxgap=maxgap, 

2665 maxlap=maxlap, 

2666 accessor_id=accessor_id, 

2667 operator_params=operator_params, 

2668 channel_priorities=channel_priorities) 

2669 

2670 self.advance_accessor(accessor_id) 

2671 

2672 yield Batch( 

2673 tmin=wmin, 

2674 tmax=wmax, 

2675 i=iwin, 

2676 n=nwin, 

2677 igroup=igroup, 

2678 ngroups=ngroups, 

2679 traces=chopped) 

2680 

2681 finally: 

2682 self._n_choppers_active -= 1 

2683 if clear_accessor: 

2684 self.clear_accessor(accessor_id, 'waveform') 

2685 

2686 def _process_chopped( 

2687 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2688 

2689 chopped.sort(key=lambda a: a.full_id) 

2690 if degap: 

2691 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2692 

2693 if not want_incomplete: 

2694 chopped_weeded = [] 

2695 for tr in chopped: 

2696 emin = tr.tmin - tmin 

2697 emax = tr.tmax + tr.deltat - tmax 

2698 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2699 chopped_weeded.append(tr) 

2700 

2701 elif degap: 

2702 if (0. < emin <= 5. * tr.deltat 

2703 and -5. * tr.deltat <= emax < 0.): 

2704 

2705 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2706 chopped_weeded.append(tr) 

2707 

2708 chopped = chopped_weeded 

2709 

2710 return chopped 

2711 

2712 def _get_pyrocko_stations( 

2713 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2714 on_error='raise'): 

2715 

2716 from pyrocko import model as pmodel 

2717 

2718 if codes is not None: 

2719 codes = codes_patterns_for_kind(STATION, codes) 

2720 

2721 by_nsl = defaultdict(lambda: (list(), list())) 

2722 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2723 sargs = station._get_pyrocko_station_args() 

2724 by_nsl[station.codes.nsl][0].append(sargs) 

2725 

2726 if codes is not None: 

2727 codes = [model.CodesNSLCE(c) for c in codes] 

2728 

2729 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2730 sargs = channel._get_pyrocko_station_args() 

2731 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2732 sargs_list.append(sargs) 

2733 channels_list.append(channel) 

2734 

2735 pstations = [] 

2736 nsls = list(by_nsl.keys()) 

2737 nsls.sort() 

2738 for nsl in nsls: 

2739 sargs_list, channels_list = by_nsl[nsl] 

2740 sargs = util.consistency_merge( 

2741 [('',) + x for x in sargs_list], 

2742 error=on_error) 

2743 

2744 by_c = defaultdict(list) 

2745 for ch in channels_list: 

2746 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2747 

2748 chas = list(by_c.keys()) 

2749 chas.sort() 

2750 pchannels = [] 

2751 for cha in chas: 

2752 list_of_cargs = by_c[cha] 

2753 cargs = util.consistency_merge( 

2754 [('',) + x for x in list_of_cargs], 

2755 error=on_error) 

2756 pchannels.append(pmodel.Channel(*cargs)) 

2757 

2758 pstations.append( 

2759 pmodel.Station(*sargs, channels=pchannels)) 

2760 

2761 return pstations 

2762 

2763 @property 

2764 def pile(self): 

2765 

2766 ''' 

2767 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2768 

2769 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2770 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2771 but uses the fluffy power of the Squirrel under the hood. 

2772 

2773 This interface can be used as a drop-in replacement for piles which are 

2774 used in existing scripts and programs for efficient waveform data 

2775 access. The Squirrel-based pile scales better for large datasets. Newer 

2776 scripts should use Squirrel's native methods to avoid the emulation 

2777 overhead. 

2778 ''' 

2779 from . import pile 

2780 

2781 if self._pile is None: 

2782 self._pile = pile.Pile(self) 

2783 

2784 return self._pile 

2785 

2786 def snuffle(self, **kwargs): 

2787 ''' 

2788 Look at dataset in Snuffler. 

2789 ''' 

2790 self.pile.snuffle(**kwargs) 

2791 

2792 def _gather_codes_keys(self, kind, gather, selector): 

2793 return set( 

2794 gather(codes) 

2795 for codes in self.iter_codes(kind) 

2796 if selector is None or selector(codes)) 

2797 

2798 def __str__(self): 

2799 return str(self.get_stats()) 

2800 

2801 def get_coverage( 

2802 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2803 

2804 ''' 

2805 Get coverage information. 

2806 

2807 Get information about strips of gapless data coverage. 

2808 

2809 :param kind: 

2810 Content kind to be queried. 

2811 :type kind: 

2812 str 

2813 

2814 :param tmin: 

2815 Start time of query interval. 

2816 :type tmin: 

2817 :py:func:`~pyrocko.util.get_time_float` 

2818 

2819 :param tmax: 

2820 End time of query interval. 

2821 :type tmax: 

2822 :py:func:`~pyrocko.util.get_time_float` 

2823 

2824 :param codes: 

2825 If given, restrict query to given content codes patterns. 

2826 :type codes: 

2827 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2828 objects appropriate for the queried content type, or anything which 

2829 can be converted to such objects. 

2830 

2831 :param limit: 

2832 Limit query to return only up to a given maximum number of entries 

2833 per matching time series (without setting this option, very gappy 

2834 data could cause the query to execute for a very long time). 

2835 :type limit: 

2836 int 

2837 

2838 :returns: 

2839 Information about time spans covered by the requested time series 

2840 data. 

2841 :rtype: 

2842 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage` 

2843 ''' 

2844 

2845 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2846 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2847 kind_id = to_kind_id(kind) 

2848 

2849 codes_info = list(self._iter_codes_info(kind=kind)) 

2850 

2851 kdata_all = [] 

2852 if codes is None: 

2853 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2854 kdata_all.append( 

2855 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2856 

2857 else: 

2858 for codes_entry in codes: 

2859 pattern = to_codes(kind_id, codes_entry) 

2860 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2861 if model.match_codes(pattern, codes_entry): 

2862 kdata_all.append( 

2863 (pattern, kind_codes_id, codes_entry, deltat)) 

2864 

2865 kind_codes_ids = [x[1] for x in kdata_all] 

2866 

2867 counts_at_tmin = {} 

2868 if tmin is not None: 

2869 for nut in self.iter_nuts( 

2870 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2871 

2872 k = nut.codes, nut.deltat 

2873 if k not in counts_at_tmin: 

2874 counts_at_tmin[k] = 0 

2875 

2876 counts_at_tmin[k] += 1 

2877 

2878 coverages = [] 

2879 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2880 entry = [pattern, codes_entry, deltat, None, None, []] 

2881 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2882 sql = self._sql(''' 

2883 SELECT 

2884 time_seconds, 

2885 time_offset 

2886 FROM %(db)s.%(coverage)s 

2887 WHERE 

2888 kind_codes_id == ? 

2889 ORDER BY 

2890 kind_codes_id ''' + order + ''', 

2891 time_seconds ''' + order + ''', 

2892 time_offset ''' + order + ''' 

2893 LIMIT 1 

2894 ''') 

2895 

2896 for row in self._conn.execute(sql, [kind_codes_id]): 

2897 entry[3+i] = model.tjoin(row[0], row[1]) 

2898 

2899 if None in entry[3:5]: 

2900 continue 

2901 

2902 args = [kind_codes_id] 

2903 

2904 sql_time = '' 

2905 if tmin is not None: 

2906 # intentionally < because (== tmin) is queried from nuts 

2907 sql_time += ' AND ( ? < time_seconds ' \ 

2908 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2909 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2910 

2911 if tmax is not None: 

2912 sql_time += ' AND ( time_seconds < ? ' \ 

2913 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2914 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2915 

2916 sql_limit = '' 

2917 if limit is not None: 

2918 sql_limit = ' LIMIT ?' 

2919 args.append(limit) 

2920 

2921 sql = self._sql(''' 

2922 SELECT 

2923 time_seconds, 

2924 time_offset, 

2925 step 

2926 FROM %(db)s.%(coverage)s 

2927 WHERE 

2928 kind_codes_id == ? 

2929 ''' + sql_time + ''' 

2930 ORDER BY 

2931 kind_codes_id, 

2932 time_seconds, 

2933 time_offset 

2934 ''' + sql_limit) 

2935 

2936 rows = list(self._conn.execute(sql, args)) 

2937 

2938 if limit is not None and len(rows) == limit: 

2939 entry[-1] = None 

2940 else: 

2941 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2942 tlast = None 

2943 if tmin is not None: 

2944 entry[-1].append((tmin, counts)) 

2945 tlast = tmin 

2946 

2947 for row in rows: 

2948 t = model.tjoin(row[0], row[1]) 

2949 counts += row[2] 

2950 entry[-1].append((t, counts)) 

2951 tlast = t 

2952 

2953 if tmax is not None and (tlast is None or tlast != tmax): 

2954 entry[-1].append((tmax, counts)) 

2955 

2956 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2957 

2958 return coverages 

2959 

2960 def get_stationxml( 

2961 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2962 level='response', on_error='raise'): 

2963 

2964 ''' 

2965 Get station/channel/response metadata in StationXML representation. 

2966 

2967 %(query_args)s 

2968 

2969 :returns: 

2970 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2971 ''' 

2972 

2973 if level not in ('network', 'station', 'channel', 'response'): 

2974 raise ValueError('Invalid level: %s' % level) 

2975 

2976 tmin, tmax, codes = self._get_selection_args( 

2977 CHANNEL, obj, tmin, tmax, time, codes) 

2978 

2979 def tts(t): 

2980 if t is None: 

2981 return '<none>' 

2982 else: 

2983 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2984 

2985 if on_error == 'ignore': 

2986 def handle_error(exc): 

2987 pass 

2988 

2989 elif on_error == 'warn': 

2990 def handle_error(exc): 

2991 logger.warning(str(exc)) 

2992 

2993 elif on_error == 'raise': 

2994 def handle_error(exc): 

2995 raise exc 

2996 

2997 def use_first(node_type_name, codes, k, group): 

2998 if on_error == 'warn': 

2999 logger.warning( 

3000 'Duplicates for %s %s, %s - %s -> using first' % ( 

3001 node_type_name, 

3002 '.'.join(codes), 

3003 tts(k[0]), tts(k[1]))) 

3004 

3005 return group[0] 

3006 

3007 def deduplicate(node_type_name, codes, nodes): 

3008 groups = defaultdict(list) 

3009 for node in nodes: 

3010 k = (node.start_date, node.end_date) 

3011 groups[k].append(node) 

3012 

3013 return [ 

3014 use_first(node_type_name, codes, k, group) 

3015 for (k, group) in groups.items()] 

3016 

3017 filtering = CodesPatternFiltering(codes=codes) 

3018 

3019 nslcs = list(set( 

3020 codes.nslc for codes in 

3021 filtering.filter(self.get_codes(kind='channel')))) 

3022 

3023 from pyrocko.io import stationxml as sx 

3024 

3025 networks = [] 

3026 for net, stas in prefix_tree(nslcs): 

3027 network = sx.Network(code=net) 

3028 networks.append(network) 

3029 

3030 if level not in ('station', 'channel', 'response'): 

3031 continue 

3032 

3033 for sta, locs in stas: 

3034 stations = self.get_stations( 

3035 tmin=tmin, 

3036 tmax=tmax, 

3037 codes=(net, sta, '*'), 

3038 model='stationxml') 

3039 

3040 if on_error != 'raise': 

3041 stations = deduplicate( 

3042 'Station', (net, sta), stations) 

3043 

3044 errors = sx.check_overlaps( 

3045 'Station', (net, sta), stations) 

3046 

3047 if errors: 

3048 handle_error(error.Duplicate( 

3049 'Overlapping/duplicate station info:\n %s' 

3050 % '\n '.join(errors))) 

3051 

3052 network.station_list.extend(stations) 

3053 

3054 if level not in ('channel', 'response'): 

3055 continue 

3056 

3057 for loc, chas in locs: 

3058 for cha, _ in chas: 

3059 channels = self.get_channels( 

3060 tmin=tmin, 

3061 tmax=tmax, 

3062 codes=(net, sta, loc, cha), 

3063 model='stationxml') 

3064 

3065 if on_error != 'raise': 

3066 channels = deduplicate( 

3067 'Channel', (net, sta, loc, cha), channels) 

3068 

3069 errors = sx.check_overlaps( 

3070 'Channel', (net, sta, loc, cha), channels) 

3071 

3072 if errors: 

3073 handle_error(error.Duplicate( 

3074 'Overlapping/duplicate channel info:\n %s' 

3075 % '\n '.join(errors))) 

3076 

3077 for channel in channels: 

3078 station = sx.find_containing(stations, channel) 

3079 if station is not None: 

3080 station.channel_list.append(channel) 

3081 else: 

3082 handle_error(error.NotAvailable( 

3083 'No station or station epoch found ' 

3084 'for channel: %s' % '.'.join( 

3085 (net, sta, loc, cha)))) 

3086 

3087 continue 

3088 

3089 if level != 'response': 

3090 continue 

3091 

3092 try: 

3093 response_sq, response_sx = self.get_response( 

3094 codes=(net, sta, loc, cha), 

3095 tmin=channel.start_date, 

3096 tmax=channel.end_date, 

3097 model='stationxml+', 

3098 on_duplicate=on_error) 

3099 

3100 except error.NotAvailable as e: 

3101 handle_error(e) 

3102 continue 

3103 

3104 if not ( 

3105 sx.eq_open( 

3106 channel.start_date, response_sq.tmin) 

3107 and sx.eq_open( 

3108 channel.end_date, response_sq.tmax)): 

3109 

3110 handle_error(error.Inconsistencies( 

3111 'Response time span does not match ' 

3112 'channel time span: %s' % '.'.join( 

3113 (net, sta, loc, cha)))) 

3114 

3115 channel.response = response_sx 

3116 

3117 return sx.FDSNStationXML( 

3118 source='Generated by Pyrocko Squirrel.', 

3119 network_list=networks) 

3120 

3121 def add_operator(self, op): 

3122 self._operators.append(op) 

3123 

3124 def update_operator_mappings(self): 

3125 available = self.get_codes(kind=('channel')) 

3126 

3127 for operator in self._operators: 

3128 operator.update_mappings(available, self._operator_registry) 

3129 

3130 def iter_operator_mappings(self): 

3131 for operator in self._operators: 

3132 for in_codes, out_codes in operator.iter_mappings(): 

3133 yield operator, in_codes, out_codes 

3134 

3135 def get_operator_mappings(self): 

3136 return list(self.iter_operator_mappings()) 

3137 

3138 def get_operator(self, codes): 

3139 try: 

3140 return self._operator_registry[codes][0] 

3141 except KeyError: 

3142 return None 

3143 

3144 def get_operator_group(self, codes): 

3145 try: 

3146 return self._operator_registry[codes] 

3147 except KeyError: 

3148 return None, (None, None, None) 

3149 

3150 def iter_operator_codes(self): 

3151 for _, _, out_codes in self.iter_operator_mappings(): 

3152 for codes in out_codes: 

3153 yield codes 

3154 

3155 def get_operator_codes(self): 

3156 return list(self.iter_operator_codes()) 

3157 

3158 def print_tables(self, table_names=None, stream=None): 

3159 ''' 

3160 Dump raw database tables in textual form (for debugging purposes). 

3161 

3162 :param table_names: 

3163 Names of tables to be dumped or ``None`` to dump all. 

3164 :type table_names: 

3165 :py:class:`list` of :py:class:`str` 

3166 

3167 :param stream: 

3168 Open file or ``None`` to dump to standard output. 

3169 ''' 

3170 

3171 if stream is None: 

3172 stream = sys.stdout 

3173 

3174 if isinstance(table_names, str): 

3175 table_names = [table_names] 

3176 

3177 if table_names is None: 

3178 table_names = [ 

3179 'selection_file_states', 

3180 'selection_nuts', 

3181 'selection_kind_codes_count', 

3182 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3183 

3184 m = { 

3185 'selection_file_states': '%(db)s.%(file_states)s', 

3186 'selection_nuts': '%(db)s.%(nuts)s', 

3187 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3188 'files': 'files', 

3189 'nuts': 'nuts', 

3190 'kind_codes': 'kind_codes', 

3191 'kind_codes_count': 'kind_codes_count'} 

3192 

3193 for table_name in table_names: 

3194 self._database.print_table( 

3195 m[table_name] % self._names, stream=stream) 

3196 

3197 

3198class SquirrelStats(Object): 

3199 ''' 

3200 Container to hold statistics about contents available from a Squirrel. 

3201 

3202 See also :py:meth:`Squirrel.get_stats`. 

3203 ''' 

3204 

3205 nfiles = Int.T( 

3206 help='Number of files in selection.') 

3207 nnuts = Int.T( 

3208 help='Number of index nuts in selection.') 

3209 codes = List.T( 

3210 Tuple.T(content_t=String.T()), 

3211 help='Available code sequences in selection, e.g. ' 

3212 '(agency, network, station, location) for stations nuts.') 

3213 kinds = List.T( 

3214 String.T(), 

3215 help='Available content types in selection.') 

3216 total_size = Int.T( 

3217 help='Aggregated file size of files is selection.') 

3218 counts = Dict.T( 

3219 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3220 help='Breakdown of how many nuts of any content type and code ' 

3221 'sequence are available in selection, ``counts[kind][codes]``.') 

3222 time_spans = Dict.T( 

3223 String.T(), Tuple.T(content_t=Timestamp.T()), 

3224 help='Time spans by content type.') 

3225 sources = List.T( 

3226 String.T(), 

3227 help='Descriptions of attached sources.') 

3228 operators = List.T( 

3229 String.T(), 

3230 help='Descriptions of attached operators.') 

3231 

3232 def __str__(self): 

3233 kind_counts = dict( 

3234 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3235 

3236 scodes = model.codes_to_str_abbreviated(self.codes) 

3237 

3238 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3239 ' ' + s for s in self.sources) 

3240 

3241 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3242 ' ' + s for s in self.operators) 

3243 

3244 def stime(t): 

3245 return util.tts(t) if t is not None and t not in ( 

3246 model.g_tmin, model.g_tmax) else '<none>' 

3247 

3248 def stable(rows): 

3249 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3250 return '\n'.join( 

3251 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3252 for row in rows) 

3253 

3254 def indent(s): 

3255 return '\n'.join(' '+line for line in s.splitlines()) 

3256 

3257 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3258 kind + ':', 

3259 str(kind_counts[kind]), 

3260 stime(self.time_spans[kind][0]), 

3261 '-', 

3262 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3263 

3264 s = ''' 

3265Number of files: %i 

3266Total size of known files: %s 

3267Number of index nuts: %i 

3268Available content kinds: %s 

3269Available codes: %s 

3270Sources: %s 

3271Operators: %s''' % ( 

3272 self.nfiles, 

3273 util.human_bytesize(self.total_size), 

3274 self.nnuts, 

3275 stspans, scodes, ssources, soperators) 

3276 

3277 return s.lstrip() 

3278 

3279 

3280__all__ = [ 

3281 'Squirrel', 

3282 'SquirrelStats', 

3283]