1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import sys 

7import os 

8import time 

9import math 

10import logging 

11import threading 

12import queue 

13from collections import defaultdict 

14 

15from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict 

16from pyrocko import util, trace 

17from pyrocko.progress import progress 

18from pyrocko.plot import nice_time_tick_inc_approx_secs 

19 

20from . import model, io, cache, dataset 

21 

22from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \ 

23 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \ 

24 codes_patterns_for_kind 

25from .client import fdsn, catalog 

26from .selection import Selection, filldocs 

27from .database import abspath 

28from .operators.base import Operator, CodesPatternFiltering 

29from . import client, environment, error 

30 

31logger = logging.getLogger('psq.base') 

32 

33guts_prefix = 'squirrel' 

34 

35 

36def nonef(f, xs): 

37 xs_ = [x for x in xs if x is not None] 

38 if xs_: 

39 return f(xs_) 

40 else: 

41 return None 

42 

43 

44def make_task(*args): 

45 return progress.task(*args, logger=logger) 

46 

47 

48def lpick(condition, seq): 

49 ft = [], [] 

50 for ele in seq: 

51 ft[int(bool(condition(ele)))].append(ele) 

52 

53 return ft 

54 

55 

56def len_plural(obj): 

57 return len(obj), '' if len(obj) == 1 else 's' 

58 

59 

60def blocks(tmin, tmax, deltat, nsamples_block=100000): 

61 tblock = nice_time_tick_inc_approx_secs( 

62 util.to_time_float(deltat * nsamples_block)) 

63 iblock_min = int(math.floor(tmin / tblock)) 

64 iblock_max = int(math.ceil(tmax / tblock)) 

65 for iblock in range(iblock_min, iblock_max): 

66 yield iblock * tblock, (iblock+1) * tblock 

67 

68 

69def gaps(avail, tmin, tmax): 

70 assert tmin < tmax 

71 

72 data = [(tmax, 1), (tmin, -1)] 

73 for (tmin_a, tmax_a) in avail: 

74 assert tmin_a < tmax_a 

75 data.append((tmin_a, 1)) 

76 data.append((tmax_a, -1)) 

77 

78 data.sort() 

79 s = 1 

80 gaps = [] 

81 tmin_g = None 

82 for t, x in data: 

83 if s == 1 and x == -1: 

84 tmin_g = t 

85 elif s == 0 and x == 1 and tmin_g is not None: 

86 tmax_g = t 

87 if tmin_g != tmax_g: 

88 gaps.append((tmin_g, tmax_g)) 

89 

90 s += x 

91 

92 return gaps 

93 

94 

95def order_key(order): 

96 return (order.codes, order.tmin, order.tmax) 

97 

98 

99def _is_exact(pat): 

100 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat) 

101 

102 

103def prefix_tree(tups): 

104 if not tups: 

105 return [] 

106 

107 if len(tups[0]) == 1: 

108 return sorted((tup[0], []) for tup in tups) 

109 

110 d = defaultdict(list) 

111 for tup in tups: 

112 d[tup[0]].append(tup[1:]) 

113 

114 sub = [] 

115 for k in sorted(d.keys()): 

116 sub.append((k, prefix_tree(d[k]))) 

117 

118 return sub 

119 

120 

121def match_time_span(tmin, tmax, obj): 

122 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \ 

123 and (tmin is None or obj.tmax is None or tmin < obj.tmax) 

124 

125 

126class Batch(object): 

127 ''' 

128 Batch of waveforms from window-wise data extraction. 

129 

130 Encapsulates state and results yielded for each window in window-wise 

131 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method. 

132 

133 *Attributes:* 

134 

135 .. py:attribute:: tmin 

136 

137 Start of this time window. 

138 

139 .. py:attribute:: tmax 

140 

141 End of this time window. 

142 

143 .. py:attribute:: i 

144 

145 Index of this time window in sequence. 

146 

147 .. py:attribute:: n 

148 

149 Total number of time windows in sequence. 

150 

151 .. py:attribute:: igroup 

152 

153 Index of this time window's sequence group. 

154 

155 .. py:attribute:: ngroups 

156 

157 Total number of sequence groups. 

158 

159 .. py:attribute:: traces 

160 

161 Extracted waveforms for this time window. 

162 ''' 

163 

164 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces): 

165 self.tmin = tmin 

166 self.tmax = tmax 

167 self.i = i 

168 self.n = n 

169 self.igroup = igroup 

170 self.ngroups = ngroups 

171 self.traces = traces 

172 

173 

174class Squirrel(Selection): 

175 ''' 

176 Prompt, lazy, indexing, caching, dynamic seismological dataset access. 

177 

178 :param env: 

179 Squirrel environment instance or directory path to use as starting 

180 point for its detection. By default, the current directory is used as 

181 starting point. When searching for a usable environment the directory 

182 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) 

183 directory is used if it exists, otherwise the parent directories are 

184 search upwards for the existence of such a directory. If no such 

185 directory is found, the user's global Squirrel environment 

186 ``'$HOME/.pyrocko/squirrel'`` is used. 

187 :type env: 

188 :py:class:`~pyrocko.squirrel.environment.Environment` or 

189 :py:class:`str` 

190 

191 :param database: 

192 Database instance or path to database. By default the 

193 database found in the detected Squirrel environment is used. 

194 :type database: 

195 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str` 

196 

197 :param cache_path: 

198 Directory path to use for data caching. By default, the ``'cache'`` 

199 directory in the detected Squirrel environment is used. 

200 :type cache_path: 

201 :py:class:`str` 

202 

203 :param persistent: 

204 If given a name, create a persistent selection. 

205 :type persistent: 

206 :py:class:`str` 

207 

208 This is the central class of the Squirrel framework. It provides a unified 

209 interface to query and access seismic waveforms, station meta-data and 

210 event information from local file collections and remote data sources. For 

211 prompt responses, a profound database setup is used under the hood. To 

212 speed up assemblage of ad-hoc data selections, files are indexed on first 

213 use and the extracted meta-data is remembered in the database for 

214 subsequent accesses. Bulk data is lazily loaded from disk and remote 

215 sources, just when requested. Once loaded, data is cached in memory to 

216 expedite typical access patterns. Files and data sources can be dynamically 

217 added to and removed from the Squirrel selection at runtime. 

218 

219 Queries are restricted to the contents of the files currently added to the 

220 Squirrel selection (usually a subset of the file meta-information 

221 collection in the database). This list of files is referred to here as the 

222 "selection". By default, temporary tables are created in the attached 

223 database to hold the names of the files in the selection as well as various 

224 indices and counters. These tables are only visible inside the application 

225 which created them and are deleted when the database connection is closed 

226 or the application exits. To create a selection which is not deleted at 

227 exit, supply a name to the ``persistent`` argument of the Squirrel 

228 constructor. Persistent selections are shared among applications using the 

229 same database. 

230 

231 **Method summary** 

232 

233 Some of the methods are implemented in :py:class:`Squirrel`'s base class 

234 :py:class:`~pyrocko.squirrel.selection.Selection`. 

235 

236 .. autosummary:: 

237 

238 ~Squirrel.add 

239 ~Squirrel.add_source 

240 ~Squirrel.add_fdsn 

241 ~Squirrel.add_catalog 

242 ~Squirrel.add_dataset 

243 ~Squirrel.add_virtual 

244 ~Squirrel.update 

245 ~Squirrel.update_waveform_promises 

246 ~Squirrel.advance_accessor 

247 ~Squirrel.clear_accessor 

248 ~Squirrel.reload 

249 ~pyrocko.squirrel.selection.Selection.iter_paths 

250 ~Squirrel.iter_nuts 

251 ~Squirrel.iter_kinds 

252 ~Squirrel.iter_deltats 

253 ~Squirrel.iter_codes 

254 ~pyrocko.squirrel.selection.Selection.get_paths 

255 ~Squirrel.get_nuts 

256 ~Squirrel.get_kinds 

257 ~Squirrel.get_deltats 

258 ~Squirrel.get_codes 

259 ~Squirrel.get_counts 

260 ~Squirrel.get_time_span 

261 ~Squirrel.get_deltat_span 

262 ~Squirrel.get_nfiles 

263 ~Squirrel.get_nnuts 

264 ~Squirrel.get_total_size 

265 ~Squirrel.get_stats 

266 ~Squirrel.get_content 

267 ~Squirrel.get_stations 

268 ~Squirrel.get_channels 

269 ~Squirrel.get_responses 

270 ~Squirrel.get_events 

271 ~Squirrel.get_waveform_nuts 

272 ~Squirrel.get_waveforms 

273 ~Squirrel.chopper_waveforms 

274 ~Squirrel.get_coverage 

275 ~Squirrel.pile 

276 ~Squirrel.snuffle 

277 ~Squirrel.glob_codes 

278 ~pyrocko.squirrel.selection.Selection.get_database 

279 ~Squirrel.print_tables 

280 ''' 

281 

282 def __init__( 

283 self, env=None, database=None, cache_path=None, persistent=None): 

284 

285 if not isinstance(env, environment.Environment): 

286 env = environment.get_environment(env) 

287 

288 if database is None: 

289 database = env.expand_path(env.database_path) 

290 

291 if cache_path is None: 

292 cache_path = env.expand_path(env.cache_path) 

293 

294 if persistent is None: 

295 persistent = env.persistent 

296 

297 Selection.__init__( 

298 self, database=database, persistent=persistent) 

299 

300 self.get_database().set_basepath(os.path.dirname(env.get_basepath())) 

301 

302 self._content_caches = { 

303 'waveform': cache.ContentCache(), 

304 'default': cache.ContentCache()} 

305 

306 self._cache_path = cache_path 

307 

308 self._sources = [] 

309 self._operators = [] 

310 self._operator_registry = {} 

311 

312 self._pending_orders = [] 

313 

314 self._pile = None 

315 self._n_choppers_active = 0 

316 

317 self.downloads_enabled = True 

318 

319 self._names.update({ 

320 'nuts': self.name + '_nuts', 

321 'kind_codes_count': self.name + '_kind_codes_count', 

322 'coverage': self.name + '_coverage'}) 

323 

324 with self.transaction('create tables') as cursor: 

325 self._create_tables_squirrel(cursor) 

326 

327 def _create_tables_squirrel(self, cursor): 

328 

329 cursor.execute(self._register_table(self._sql( 

330 ''' 

331 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( 

332 nut_id integer PRIMARY KEY, 

333 file_id integer, 

334 file_segment integer, 

335 file_element integer, 

336 kind_id integer, 

337 kind_codes_id integer, 

338 tmin_seconds integer, 

339 tmin_offset integer, 

340 tmax_seconds integer, 

341 tmax_offset integer, 

342 kscale integer) 

343 '''))) 

344 

345 cursor.execute(self._register_table(self._sql( 

346 ''' 

347 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( 

348 kind_codes_id integer PRIMARY KEY, 

349 count integer) 

350 '''))) 

351 

352 cursor.execute(self._sql( 

353 ''' 

354 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element 

355 ON %(nuts)s (file_id, file_segment, file_element) 

356 ''')) 

357 

358 cursor.execute(self._sql( 

359 ''' 

360 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id 

361 ON %(nuts)s (file_id) 

362 ''')) 

363 

364 cursor.execute(self._sql( 

365 ''' 

366 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds 

367 ON %(nuts)s (kind_id, tmin_seconds) 

368 ''')) 

369 

370 cursor.execute(self._sql( 

371 ''' 

372 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds 

373 ON %(nuts)s (kind_id, tmax_seconds) 

374 ''')) 

375 

376 cursor.execute(self._sql( 

377 ''' 

378 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale 

379 ON %(nuts)s (kind_id, kscale, tmin_seconds) 

380 ''')) 

381 

382 cursor.execute(self._sql( 

383 ''' 

384 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts 

385 BEFORE DELETE ON main.files FOR EACH ROW 

386 BEGIN 

387 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

388 END 

389 ''')) 

390 

391 # trigger only on size to make silent update of mtime possible 

392 cursor.execute(self._sql( 

393 ''' 

394 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 

395 BEFORE UPDATE OF size ON main.files FOR EACH ROW 

396 BEGIN 

397 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

398 END 

399 ''')) 

400 

401 cursor.execute(self._sql( 

402 ''' 

403 CREATE TRIGGER IF NOT EXISTS 

404 %(db)s.%(file_states)s_delete_files 

405 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW 

406 BEGIN 

407 DELETE FROM %(nuts)s WHERE file_id == old.file_id; 

408 END 

409 ''')) 

410 

411 cursor.execute(self._sql( 

412 ''' 

413 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes 

414 BEFORE INSERT ON %(nuts)s FOR EACH ROW 

415 BEGIN 

416 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES 

417 (new.kind_codes_id, 0); 

418 UPDATE %(kind_codes_count)s 

419 SET count = count + 1 

420 WHERE new.kind_codes_id 

421 == %(kind_codes_count)s.kind_codes_id; 

422 END 

423 ''')) 

424 

425 cursor.execute(self._sql( 

426 ''' 

427 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes 

428 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

429 BEGIN 

430 UPDATE %(kind_codes_count)s 

431 SET count = count - 1 

432 WHERE old.kind_codes_id 

433 == %(kind_codes_count)s.kind_codes_id; 

434 END 

435 ''')) 

436 

437 cursor.execute(self._register_table(self._sql( 

438 ''' 

439 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( 

440 kind_codes_id integer, 

441 time_seconds integer, 

442 time_offset integer, 

443 step integer) 

444 '''))) 

445 

446 cursor.execute(self._sql( 

447 ''' 

448 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time 

449 ON %(coverage)s (kind_codes_id, time_seconds, time_offset) 

450 ''')) 

451 

452 cursor.execute(self._sql( 

453 ''' 

454 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage 

455 AFTER INSERT ON %(nuts)s FOR EACH ROW 

456 BEGIN 

457 INSERT OR IGNORE INTO %(coverage)s VALUES 

458 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) 

459 ; 

460 UPDATE %(coverage)s 

461 SET step = step + 1 

462 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

463 AND new.tmin_seconds == %(coverage)s.time_seconds 

464 AND new.tmin_offset == %(coverage)s.time_offset 

465 ; 

466 INSERT OR IGNORE INTO %(coverage)s VALUES 

467 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) 

468 ; 

469 UPDATE %(coverage)s 

470 SET step = step - 1 

471 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

472 AND new.tmax_seconds == %(coverage)s.time_seconds 

473 AND new.tmax_offset == %(coverage)s.time_offset 

474 ; 

475 DELETE FROM %(coverage)s 

476 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

477 AND new.tmin_seconds == %(coverage)s.time_seconds 

478 AND new.tmin_offset == %(coverage)s.time_offset 

479 AND step == 0 

480 ; 

481 DELETE FROM %(coverage)s 

482 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id 

483 AND new.tmax_seconds == %(coverage)s.time_seconds 

484 AND new.tmax_offset == %(coverage)s.time_offset 

485 AND step == 0 

486 ; 

487 END 

488 ''')) 

489 

490 cursor.execute(self._sql( 

491 ''' 

492 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage 

493 BEFORE DELETE ON %(nuts)s FOR EACH ROW 

494 BEGIN 

495 INSERT OR IGNORE INTO %(coverage)s VALUES 

496 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) 

497 ; 

498 UPDATE %(coverage)s 

499 SET step = step - 1 

500 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

501 AND old.tmin_seconds == %(coverage)s.time_seconds 

502 AND old.tmin_offset == %(coverage)s.time_offset 

503 ; 

504 INSERT OR IGNORE INTO %(coverage)s VALUES 

505 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) 

506 ; 

507 UPDATE %(coverage)s 

508 SET step = step + 1 

509 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

510 AND old.tmax_seconds == %(coverage)s.time_seconds 

511 AND old.tmax_offset == %(coverage)s.time_offset 

512 ; 

513 DELETE FROM %(coverage)s 

514 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

515 AND old.tmin_seconds == %(coverage)s.time_seconds 

516 AND old.tmin_offset == %(coverage)s.time_offset 

517 AND step == 0 

518 ; 

519 DELETE FROM %(coverage)s 

520 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id 

521 AND old.tmax_seconds == %(coverage)s.time_seconds 

522 AND old.tmax_offset == %(coverage)s.time_offset 

523 AND step == 0 

524 ; 

525 END 

526 ''')) 

527 

528 def _delete(self): 

529 '''Delete database tables associated with this Squirrel.''' 

530 

531 with self.transaction('delete tables') as cursor: 

532 for s in ''' 

533 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; 

534 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; 

535 DROP TRIGGER %(db)s.%(file_states)s_delete_files; 

536 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; 

537 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; 

538 DROP TABLE %(db)s.%(nuts)s; 

539 DROP TABLE %(db)s.%(kind_codes_count)s; 

540 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; 

541 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; 

542 DROP TABLE IF EXISTS %(db)s.%(coverage)s; 

543 '''.strip().splitlines(): 

544 

545 cursor.execute(self._sql(s)) 

546 

547 Selection._delete(self) 

548 

549 @filldocs 

550 def add(self, 

551 paths, 

552 kinds=None, 

553 format='detect', 

554 include=None, 

555 exclude=None, 

556 check=True): 

557 

558 ''' 

559 Add files to the selection. 

560 

561 :param paths: 

562 Iterator yielding paths to files or directories to be added to the 

563 selection. Recurses into directories. If given a ``str``, it 

564 is treated as a single path to be added. 

565 :type paths: 

566 :py:class:`list` of :py:class:`str` 

567 

568 :param kinds: 

569 Content types to be made available through the Squirrel selection. 

570 By default, all known content types are accepted. 

571 :type kinds: 

572 :py:class:`list` of :py:class:`str` 

573 

574 :param format: 

575 File format identifier or ``'detect'`` to enable auto-detection 

576 (available: %(file_formats)s). 

577 :type format: 

578 str 

579 

580 :param include: 

581 If not ``None``, files are only included if their paths match the 

582 given regular expression pattern. 

583 :type format: 

584 str 

585 

586 :param exclude: 

587 If not ``None``, files are only included if their paths do not 

588 match the given regular expression pattern. 

589 :type format: 

590 str 

591 

592 :param check: 

593 If ``True``, all file modification times are checked to see if 

594 cached information has to be updated (slow). If ``False``, only 

595 previously unknown files are indexed and cached information is used 

596 for known files, regardless of file state (fast, corrresponds to 

597 Squirrel's ``--optimistic`` mode). File deletions will go 

598 undetected in the latter case. 

599 :type check: 

600 bool 

601 

602 :Complexity: 

603 O(log N) 

604 ''' 

605 

606 if isinstance(kinds, str): 

607 kinds = (kinds,) 

608 

609 if isinstance(paths, str): 

610 paths = [paths] 

611 

612 kind_mask = model.to_kind_mask(kinds) 

613 

614 with progress.view(): 

615 Selection.add( 

616 self, util.iter_select_files( 

617 paths, 

618 show_progress=False, 

619 include=include, 

620 exclude=exclude, 

621 pass_through=lambda path: path.startswith('virtual:') 

622 ), kind_mask, format) 

623 

624 self._load(check) 

625 self._update_nuts() 

626 

627 def reload(self): 

628 ''' 

629 Check for modifications and reindex modified files. 

630 

631 Based on file modification times. 

632 ''' 

633 

634 self._set_file_states_force_check() 

635 self._load(check=True) 

636 self._update_nuts() 

637 

638 def add_virtual(self, nuts, virtual_paths=None): 

639 ''' 

640 Add content which is not backed by files. 

641 

642 :param nuts: 

643 Content pieces to be added. 

644 :type nuts: 

645 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects 

646 

647 :param virtual_paths: 

648 List of virtual paths to prevent creating a temporary list of the 

649 nuts while aggregating the file paths for the selection. 

650 :type virtual_paths: 

651 :py:class:`list` of :py:class:`str` 

652 

653 Stores to the main database and the selection. 

654 ''' 

655 

656 if isinstance(virtual_paths, str): 

657 virtual_paths = [virtual_paths] 

658 

659 if virtual_paths is None: 

660 if not isinstance(nuts, list): 

661 nuts = list(nuts) 

662 virtual_paths = set(nut.file_path for nut in nuts) 

663 

664 Selection.add(self, virtual_paths) 

665 self.get_database().dig(nuts) 

666 self._update_nuts() 

667 

668 def add_volatile(self, nuts): 

669 if not isinstance(nuts, list): 

670 nuts = list(nuts) 

671 

672 paths = list(set(nut.file_path for nut in nuts)) 

673 io.backends.virtual.add_nuts(nuts) 

674 self.add_virtual(nuts, paths) 

675 self._volatile_paths.extend(paths) 

676 

677 def add_volatile_waveforms(self, traces): 

678 ''' 

679 Add in-memory waveforms which will be removed when the app closes. 

680 ''' 

681 

682 name = model.random_name() 

683 

684 path = 'virtual:volatile:%s' % name 

685 

686 nuts = [] 

687 for itr, tr in enumerate(traces): 

688 assert tr.tmin <= tr.tmax 

689 tmin_seconds, tmin_offset = model.tsplit(tr.tmin) 

690 tmax_seconds, tmax_offset = model.tsplit( 

691 tr.tmin + tr.data_len()*tr.deltat) 

692 

693 nuts.append(model.Nut( 

694 file_path=path, 

695 file_format='virtual', 

696 file_segment=itr, 

697 file_element=0, 

698 file_mtime=0, 

699 codes=tr.codes, 

700 tmin_seconds=tmin_seconds, 

701 tmin_offset=tmin_offset, 

702 tmax_seconds=tmax_seconds, 

703 tmax_offset=tmax_offset, 

704 deltat=tr.deltat, 

705 kind_id=to_kind_id('waveform'), 

706 content=tr)) 

707 

708 self.add_volatile(nuts) 

709 return path 

710 

711 def _load(self, check): 

712 for _ in io.iload( 

713 self, 

714 content=[], 

715 skip_unchanged=True, 

716 check=check): 

717 pass 

718 

719 def _update_nuts(self, transaction=None): 

720 transaction = transaction or self.transaction('update nuts') 

721 with make_task('Aggregating selection') as task, \ 

722 transaction as cursor: 

723 

724 self._conn.set_progress_handler(task.update, 100000) 

725 nrows = cursor.execute(self._sql( 

726 ''' 

727 INSERT INTO %(db)s.%(nuts)s 

728 SELECT NULL, 

729 nuts.file_id, nuts.file_segment, nuts.file_element, 

730 nuts.kind_id, nuts.kind_codes_id, 

731 nuts.tmin_seconds, nuts.tmin_offset, 

732 nuts.tmax_seconds, nuts.tmax_offset, 

733 nuts.kscale 

734 FROM %(db)s.%(file_states)s 

735 INNER JOIN nuts 

736 ON %(db)s.%(file_states)s.file_id == nuts.file_id 

737 INNER JOIN kind_codes 

738 ON nuts.kind_codes_id == 

739 kind_codes.kind_codes_id 

740 WHERE %(db)s.%(file_states)s.file_state != 2 

741 AND (((1 << kind_codes.kind_id) 

742 & %(db)s.%(file_states)s.kind_mask) != 0) 

743 ''')).rowcount 

744 

745 task.update(nrows) 

746 self._set_file_states_known(transaction) 

747 self._conn.set_progress_handler(None, 0) 

748 

749 def add_source(self, source, check=True): 

750 ''' 

751 Add remote resource. 

752 

753 :param source: 

754 Remote data access client instance. 

755 :type source: 

756 subclass of :py:class:`~pyrocko.squirrel.client.base.Source` 

757 ''' 

758 

759 self._sources.append(source) 

760 source.setup(self, check=check) 

761 

762 def add_fdsn(self, *args, **kwargs): 

763 ''' 

764 Add FDSN site for transparent remote data access. 

765 

766 Arguments are passed to 

767 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. 

768 ''' 

769 

770 self.add_source(fdsn.FDSNSource(*args, **kwargs)) 

771 

772 def add_catalog(self, *args, **kwargs): 

773 ''' 

774 Add online catalog for transparent event data access. 

775 

776 Arguments are passed to 

777 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. 

778 ''' 

779 

780 self.add_source(catalog.CatalogSource(*args, **kwargs)) 

781 

782 def add_dataset(self, ds, check=True): 

783 ''' 

784 Read dataset description from file and add its contents. 

785 

786 :param ds: 

787 Path to dataset description file or dataset description object 

788 . See :py:mod:`~pyrocko.squirrel.dataset`. 

789 :type ds: 

790 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset` 

791 

792 :param check: 

793 If ``True``, all file modification times are checked to see if 

794 cached information has to be updated (slow). If ``False``, only 

795 previously unknown files are indexed and cached information is used 

796 for known files, regardless of file state (fast, corrresponds to 

797 Squirrel's ``--optimistic`` mode). File deletions will go 

798 undetected in the latter case. 

799 :type check: 

800 bool 

801 ''' 

802 if isinstance(ds, str): 

803 ds = dataset.read_dataset(ds) 

804 

805 ds.setup(self, check=check) 

806 

807 def _get_selection_args( 

808 self, kind_id, 

809 obj=None, tmin=None, tmax=None, time=None, codes=None): 

810 

811 if codes is not None: 

812 codes = codes_patterns_for_kind(kind_id, codes) 

813 

814 if time is not None: 

815 tmin = time 

816 tmax = time 

817 

818 if obj is not None: 

819 tmin = tmin if tmin is not None else obj.tmin 

820 tmax = tmax if tmax is not None else obj.tmax 

821 codes = codes if codes is not None else codes_patterns_for_kind( 

822 kind_id, obj.codes) 

823 

824 return tmin, tmax, codes 

825 

826 def _get_selection_args_str(self, *args, **kwargs): 

827 

828 tmin, tmax, codes = self._get_selection_args(*args, **kwargs) 

829 return 'tmin: %s, tmax: %s, codes: %s' % ( 

830 util.time_to_str(tmin) if tmin is not None else 'none', 

831 util.time_to_str(tmax) if tmax is not None else 'none', 

832 ','.join(str(entry) for entry in codes)) 

833 

834 def _selection_args_to_kwargs( 

835 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

836 

837 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes) 

838 

839 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv): 

840 

841 tmin_seconds, tmin_offset = model.tsplit(tmin) 

842 tmax_seconds, tmax_offset = model.tsplit(tmax) 

843 if naiv: 

844 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?') 

845 args.append(tmax_seconds) 

846 else: 

847 tscale_edges = model.tscale_edges 

848 tmin_cond = [] 

849 for kscale in range(tscale_edges.size + 1): 

850 if kscale != tscale_edges.size: 

851 tscale = int(tscale_edges[kscale]) 

852 tmin_cond.append(''' 

853 (%(db)s.%(nuts)s.kind_id = ? 

854 AND %(db)s.%(nuts)s.kscale == ? 

855 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) 

856 ''') 

857 args.extend( 

858 (to_kind_id(kind), kscale, 

859 tmin_seconds - tscale - 1, tmax_seconds + 1)) 

860 

861 else: 

862 tmin_cond.append(''' 

863 (%(db)s.%(nuts)s.kind_id == ? 

864 AND %(db)s.%(nuts)s.kscale == ? 

865 AND %(db)s.%(nuts)s.tmin_seconds <= ?) 

866 ''') 

867 

868 args.extend( 

869 (to_kind_id(kind), kscale, tmax_seconds + 1)) 

870 if tmin_cond: 

871 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ') 

872 

873 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?') 

874 args.append(tmin_seconds) 

875 

876 def _codes_match_sql(self, positive, kind_id, codes, cond, args): 

877 pats = codes_patterns_for_kind(kind_id, codes) 

878 if pats is None: 

879 return 

880 

881 pats_exact = [] 

882 pats_nonexact = [] 

883 for pat in pats: 

884 spat = pat.safe_str 

885 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat) 

886 

887 codes_cond = [] 

888 if pats_exact: 

889 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join( 

890 '?'*len(pats_exact))) 

891 

892 args.extend(pats_exact) 

893 

894 if pats_nonexact: 

895 codes_cond.append(' ( %s ) ' % ' OR '.join( 

896 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))) 

897 

898 args.extend(pats_nonexact) 

899 

900 if codes_cond: 

901 cond.append('%s ( %s )' % ( 

902 'NOT' if not positive else '', 

903 ' OR '.join(codes_cond))) 

904 

905 def iter_nuts( 

906 self, kind=None, tmin=None, tmax=None, codes=None, 

907 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

908 naiv=False, kind_codes_ids=None, path=None, limit=None): 

909 

910 ''' 

911 Iterate over content entities matching given constraints. 

912 

913 :param kind: 

914 Content kind (or kinds) to extract. 

915 :type kind: 

916 :py:class:`str`, :py:class:`list` of :py:class:`str` 

917 

918 :param tmin: 

919 Start time of query interval. 

920 :type tmin: 

921 timestamp 

922 

923 :param tmax: 

924 End time of query interval. 

925 :type tmax: 

926 timestamp 

927 

928 :param codes: 

929 List of code patterns to query. 

930 :type codes: 

931 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

932 objects appropriate for the queried content type, or anything which 

933 can be converted to such objects. 

934 

935 :param naiv: 

936 Bypass time span lookup through indices (slow, for testing). 

937 :type naiv: 

938 :py:class:`bool` 

939 

940 :param kind_codes_ids: 

941 Kind-codes IDs of contents to be retrieved (internal use). 

942 :type kind_codes_ids: 

943 :py:class:`list` of :py:class:`int` 

944 

945 :yields: 

946 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the 

947 intersecting content. 

948 

949 :complexity: 

950 O(log N) for the time selection part due to heavy use of database 

951 indices. 

952 

953 Query time span is treated as a half-open interval ``[tmin, tmax)``. 

954 However, if ``tmin`` equals ``tmax``, the edge logics are modified to 

955 closed-interval so that content intersecting with the time instant ``t 

956 = tmin = tmax`` is returned (otherwise nothing would be returned as 

957 ``[t, t)`` never matches anything). 

958 

959 Time spans of content entities to be matched are also treated as half 

960 open intervals, e.g. content span ``[0, 1)`` is matched by query span 

961 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are 

962 modified to closed-interval when the content time span is an empty 

963 interval, i.e. to indicate a time instant. E.g. time instant 0 is 

964 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. 

965 ''' 

966 

967 if not isinstance(kind, str): 

968 if kind is None: 

969 kind = model.g_content_kinds 

970 for kind_ in kind: 

971 for nut in self.iter_nuts(kind_, tmin, tmax, codes): 

972 yield nut 

973 

974 return 

975 

976 kind_id = to_kind_id(kind) 

977 

978 cond = [] 

979 args = [] 

980 if tmin is not None or tmax is not None: 

981 assert kind is not None 

982 if tmin is None: 

983 tmin = self.get_time_span()[0] 

984 if tmax is None: 

985 tmax = self.get_time_span()[1] + 1.0 

986 

987 self._timerange_sql(tmin, tmax, kind, cond, args, naiv) 

988 

989 cond.append('kind_codes.kind_id == ?') 

990 args.append(kind_id) 

991 

992 if codes is not None: 

993 self._codes_match_sql(True, kind_id, codes, cond, args) 

994 

995 if codes_exclude is not None: 

996 self._codes_match_sql(False, kind_id, codes_exclude, cond, args) 

997 

998 if sample_rate_min is not None: 

999 cond.append('kind_codes.deltat <= ?') 

1000 args.append(1.0/sample_rate_min) 

1001 

1002 if sample_rate_max is not None: 

1003 cond.append('? <= kind_codes.deltat') 

1004 args.append(1.0/sample_rate_max) 

1005 

1006 if kind_codes_ids is not None: 

1007 cond.append( 

1008 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( 

1009 '?'*len(kind_codes_ids))) 

1010 

1011 args.extend(kind_codes_ids) 

1012 

1013 db = self.get_database() 

1014 if path is not None: 

1015 cond.append('files.path == ?') 

1016 args.append(db.relpath(abspath(path))) 

1017 

1018 sql = (''' 

1019 SELECT 

1020 files.path, 

1021 files.format, 

1022 files.mtime, 

1023 files.size, 

1024 %(db)s.%(nuts)s.file_segment, 

1025 %(db)s.%(nuts)s.file_element, 

1026 kind_codes.kind_id, 

1027 kind_codes.codes, 

1028 %(db)s.%(nuts)s.tmin_seconds, 

1029 %(db)s.%(nuts)s.tmin_offset, 

1030 %(db)s.%(nuts)s.tmax_seconds, 

1031 %(db)s.%(nuts)s.tmax_offset, 

1032 kind_codes.deltat 

1033 FROM files 

1034 INNER JOIN %(db)s.%(nuts)s 

1035 ON files.file_id == %(db)s.%(nuts)s.file_id 

1036 INNER JOIN kind_codes 

1037 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1038 ''') 

1039 

1040 if cond: 

1041 sql += ''' WHERE ''' + ' AND '.join(cond) 

1042 

1043 if limit is not None: 

1044 sql += ''' LIMIT %i''' % limit 

1045 

1046 sql = self._sql(sql) 

1047 if tmin is None and tmax is None: 

1048 for row in self._conn.execute(sql, args): 

1049 row = (db.abspath(row[0]),) + row[1:] 

1050 nut = model.Nut(values_nocheck=row) 

1051 yield nut 

1052 else: 

1053 assert tmin is not None and tmax is not None 

1054 if tmin == tmax: 

1055 for row in self._conn.execute(sql, args): 

1056 row = (db.abspath(row[0]),) + row[1:] 

1057 nut = model.Nut(values_nocheck=row) 

1058 if (nut.tmin <= tmin < nut.tmax) \ 

1059 or (nut.tmin == nut.tmax and tmin == nut.tmin): 

1060 

1061 yield nut 

1062 else: 

1063 for row in self._conn.execute(sql, args): 

1064 row = (db.abspath(row[0]),) + row[1:] 

1065 nut = model.Nut(values_nocheck=row) 

1066 if (tmin < nut.tmax and nut.tmin < tmax) \ 

1067 or (nut.tmin == nut.tmax 

1068 and tmin <= nut.tmin < tmax): 

1069 

1070 yield nut 

1071 

1072 def get_nuts(self, *args, **kwargs): 

1073 ''' 

1074 Get content entities matching given constraints. 

1075 

1076 Like :py:meth:`iter_nuts` but returns results as a list. 

1077 ''' 

1078 

1079 return list(self.iter_nuts(*args, **kwargs)) 

1080 

1081 def _split_nuts( 

1082 self, kind, tmin=None, tmax=None, codes=None, path=None): 

1083 

1084 kind_id = to_kind_id(kind) 

1085 tmin_seconds, tmin_offset = model.tsplit(tmin) 

1086 tmax_seconds, tmax_offset = model.tsplit(tmax) 

1087 

1088 names_main_nuts = dict(self._names) 

1089 names_main_nuts.update(db='main', nuts='nuts') 

1090 

1091 db = self.get_database() 

1092 

1093 def main_nuts(s): 

1094 return s % names_main_nuts 

1095 

1096 with self.transaction('split nuts') as cursor: 

1097 # modify selection and main 

1098 for sql_subst in [ 

1099 self._sql, main_nuts]: 

1100 

1101 cond = [] 

1102 args = [] 

1103 

1104 self._timerange_sql(tmin, tmax, kind, cond, args, False) 

1105 

1106 if codes is not None: 

1107 self._codes_match_sql(True, kind_id, codes, cond, args) 

1108 

1109 if path is not None: 

1110 cond.append('files.path == ?') 

1111 args.append(db.relpath(abspath(path))) 

1112 

1113 sql = sql_subst(''' 

1114 SELECT 

1115 %(db)s.%(nuts)s.nut_id, 

1116 %(db)s.%(nuts)s.tmin_seconds, 

1117 %(db)s.%(nuts)s.tmin_offset, 

1118 %(db)s.%(nuts)s.tmax_seconds, 

1119 %(db)s.%(nuts)s.tmax_offset, 

1120 kind_codes.deltat 

1121 FROM files 

1122 INNER JOIN %(db)s.%(nuts)s 

1123 ON files.file_id == %(db)s.%(nuts)s.file_id 

1124 INNER JOIN kind_codes 

1125 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id 

1126 WHERE ''' + ' AND '.join(cond)) # noqa 

1127 

1128 insert = [] 

1129 delete = [] 

1130 for row in cursor.execute(sql, args): 

1131 nut_id, nut_tmin_seconds, nut_tmin_offset, \ 

1132 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row 

1133 

1134 nut_tmin = model.tjoin( 

1135 nut_tmin_seconds, nut_tmin_offset) 

1136 nut_tmax = model.tjoin( 

1137 nut_tmax_seconds, nut_tmax_offset) 

1138 

1139 if nut_tmin < tmax and tmin < nut_tmax: 

1140 if nut_tmin < tmin: 

1141 insert.append(( 

1142 nut_tmin_seconds, nut_tmin_offset, 

1143 tmin_seconds, tmin_offset, 

1144 model.tscale_to_kscale( 

1145 tmin_seconds - nut_tmin_seconds), 

1146 nut_id)) 

1147 

1148 if tmax < nut_tmax: 

1149 insert.append(( 

1150 tmax_seconds, tmax_offset, 

1151 nut_tmax_seconds, nut_tmax_offset, 

1152 model.tscale_to_kscale( 

1153 nut_tmax_seconds - tmax_seconds), 

1154 nut_id)) 

1155 

1156 delete.append((nut_id,)) 

1157 

1158 sql_add = ''' 

1159 INSERT INTO %(db)s.%(nuts)s ( 

1160 file_id, file_segment, file_element, kind_id, 

1161 kind_codes_id, tmin_seconds, tmin_offset, 

1162 tmax_seconds, tmax_offset, kscale ) 

1163 SELECT 

1164 file_id, file_segment, file_element, 

1165 kind_id, kind_codes_id, ?, ?, ?, ?, ? 

1166 FROM %(db)s.%(nuts)s 

1167 WHERE nut_id == ? 

1168 ''' 

1169 cursor.executemany(sql_subst(sql_add), insert) 

1170 

1171 sql_delete = ''' 

1172 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ? 

1173 ''' 

1174 cursor.executemany(sql_subst(sql_delete), delete) 

1175 

1176 def get_time_span(self, kinds=None, tight=True, dummy_limits=True): 

1177 ''' 

1178 Get time interval over all content in selection. 

1179 

1180 :param kinds: 

1181 If not ``None``, restrict query to given content kinds. 

1182 :type kind: 

1183 list of str 

1184 

1185 :complexity: 

1186 O(1), independent of the number of nuts. 

1187 

1188 :returns: 

1189 ``(tmin, tmax)``, combined time interval of queried content kinds. 

1190 ''' 

1191 

1192 sql_min = self._sql(''' 

1193 SELECT MIN(tmin_seconds), MIN(tmin_offset) 

1194 FROM %(db)s.%(nuts)s 

1195 WHERE kind_id == ? 

1196 AND tmin_seconds == ( 

1197 SELECT MIN(tmin_seconds) 

1198 FROM %(db)s.%(nuts)s 

1199 WHERE kind_id == ?) 

1200 ''') 

1201 

1202 sql_max = self._sql(''' 

1203 SELECT MAX(tmax_seconds), MAX(tmax_offset) 

1204 FROM %(db)s.%(nuts)s 

1205 WHERE kind_id == ? 

1206 AND tmax_seconds == ( 

1207 SELECT MAX(tmax_seconds) 

1208 FROM %(db)s.%(nuts)s 

1209 WHERE kind_id == ?) 

1210 ''') 

1211 

1212 gtmin = None 

1213 gtmax = None 

1214 

1215 if isinstance(kinds, str): 

1216 kinds = [kinds] 

1217 

1218 if kinds is None: 

1219 kind_ids = model.g_content_kind_ids 

1220 else: 

1221 kind_ids = model.to_kind_ids(kinds) 

1222 

1223 tmins = [] 

1224 tmaxs = [] 

1225 for kind_id in kind_ids: 

1226 for tmin_seconds, tmin_offset in self._conn.execute( 

1227 sql_min, (kind_id, kind_id)): 

1228 tmins.append(model.tjoin(tmin_seconds, tmin_offset)) 

1229 

1230 for (tmax_seconds, tmax_offset) in self._conn.execute( 

1231 sql_max, (kind_id, kind_id)): 

1232 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset)) 

1233 

1234 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins] 

1235 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs] 

1236 

1237 if tight: 

1238 gtmin = nonef(min, tmins) 

1239 gtmax = nonef(max, tmaxs) 

1240 else: 

1241 gtmin = None if None in tmins else nonef(min, tmins) 

1242 gtmax = None if None in tmaxs else nonef(max, tmaxs) 

1243 

1244 if dummy_limits: 

1245 if gtmin is None: 

1246 gtmin = model.g_tmin 

1247 if gtmax is None: 

1248 gtmax = model.g_tmax 

1249 

1250 return gtmin, gtmax 

1251 

1252 def has(self, kinds): 

1253 ''' 

1254 Check availability of given content kinds. 

1255 

1256 :param kinds: 

1257 Content kinds to query. 

1258 :type kind: 

1259 list of str 

1260 

1261 :returns: 

1262 ``True`` if any of the queried content kinds is available 

1263 in the selection. 

1264 ''' 

1265 self_tmin, self_tmax = self.get_time_span( 

1266 kinds, dummy_limits=False) 

1267 

1268 return None not in (self_tmin, self_tmax) 

1269 

1270 def get_deltat_span(self, kind): 

1271 ''' 

1272 Get min and max sampling interval of all content of given kind. 

1273 

1274 :param kind: 

1275 Content kind 

1276 :type kind: 

1277 str 

1278 

1279 :returns: ``(deltat_min, deltat_max)`` 

1280 ''' 

1281 

1282 deltats = [ 

1283 deltat for deltat in self.get_deltats(kind) 

1284 if deltat is not None] 

1285 

1286 if deltats: 

1287 return min(deltats), max(deltats) 

1288 else: 

1289 return None, None 

1290 

1291 def iter_kinds(self, codes=None): 

1292 ''' 

1293 Iterate over content types available in selection. 

1294 

1295 :param codes: 

1296 If given, get kinds only for selected codes identifier. 

1297 Only a single identifier may be given here and no pattern matching 

1298 is done, currently. 

1299 :type codes: 

1300 :py:class:`~pyrocko.squirrel.model.Codes` 

1301 

1302 :yields: 

1303 Available content kinds as :py:class:`str`. 

1304 

1305 :complexity: 

1306 O(1), independent of number of nuts. 

1307 ''' 

1308 

1309 return self._database._iter_kinds( 

1310 codes=codes, 

1311 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1312 

1313 def iter_deltats(self, kind=None): 

1314 ''' 

1315 Iterate over sampling intervals available in selection. 

1316 

1317 :param kind: 

1318 If given, get sampling intervals only for a given content type. 

1319 :type kind: 

1320 str 

1321 

1322 :yields: 

1323 :py:class:`float` values. 

1324 

1325 :complexity: 

1326 O(1), independent of number of nuts. 

1327 ''' 

1328 return self._database._iter_deltats( 

1329 kind=kind, 

1330 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1331 

1332 def iter_codes(self, kind=None): 

1333 ''' 

1334 Iterate over content identifier code sequences available in selection. 

1335 

1336 :param kind: 

1337 If given, get codes only for a given content type. 

1338 :type kind: 

1339 str 

1340 

1341 :yields: 

1342 :py:class:`tuple` of :py:class:`str` 

1343 

1344 :complexity: 

1345 O(1), independent of number of nuts. 

1346 ''' 

1347 return self._database._iter_codes( 

1348 kind=kind, 

1349 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1350 

1351 def _iter_codes_info(self, kind=None, codes=None): 

1352 ''' 

1353 Iterate over number of occurrences of any (kind, codes) combination. 

1354 

1355 :param kind: 

1356 If given, get counts only for selected content type. 

1357 :type kind: 

1358 str 

1359 

1360 :yields: 

1361 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``. 

1362 

1363 :complexity: 

1364 O(1), independent of number of nuts. 

1365 ''' 

1366 return self._database._iter_codes_info( 

1367 kind=kind, 

1368 codes=codes, 

1369 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names) 

1370 

1371 def get_kinds(self, codes=None): 

1372 ''' 

1373 Get content types available in selection. 

1374 

1375 :param codes: 

1376 If given, get kinds only for selected codes identifier. 

1377 Only a single identifier may be given here and no pattern matching 

1378 is done, currently. 

1379 :type codes: 

1380 :py:class:`~pyrocko.squirrel.model.Codes` 

1381 

1382 :returns: 

1383 Sorted list of available content types. 

1384 :rtype: 

1385 py:class:`list` of :py:class:`str` 

1386 

1387 :complexity: 

1388 O(1), independent of number of nuts. 

1389 

1390 ''' 

1391 return sorted(list(self.iter_kinds(codes=codes))) 

1392 

1393 def get_deltats(self, kind=None): 

1394 ''' 

1395 Get sampling intervals available in selection. 

1396 

1397 :param kind: 

1398 If given, get sampling intervals only for selected content type. 

1399 :type kind: 

1400 str 

1401 

1402 :complexity: 

1403 O(1), independent of number of nuts. 

1404 

1405 :returns: Sorted list of available sampling intervals. 

1406 ''' 

1407 return sorted(list(self.iter_deltats(kind=kind))) 

1408 

1409 def get_codes(self, kind=None): 

1410 ''' 

1411 Get identifier code sequences available in selection. 

1412 

1413 :param kind: 

1414 If given, get codes only for selected content type. 

1415 :type kind: 

1416 str 

1417 

1418 :complexity: 

1419 O(1), independent of number of nuts. 

1420 

1421 :returns: Sorted list of available codes as tuples of strings. 

1422 ''' 

1423 return sorted(list(self.iter_codes(kind=kind))) 

1424 

1425 def get_counts(self, kind=None): 

1426 ''' 

1427 Get number of occurrences of any (kind, codes) combination. 

1428 

1429 :param kind: 

1430 If given, get codes only for selected content type. 

1431 :type kind: 

1432 str 

1433 

1434 :complexity: 

1435 O(1), independent of number of nuts. 

1436 

1437 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` 

1438 if kind is not ``None`` 

1439 ''' 

1440 d = {} 

1441 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind): 

1442 if kind_id not in d: 

1443 v = d[kind_id] = {} 

1444 else: 

1445 v = d[kind_id] 

1446 

1447 if codes not in v: 

1448 v[codes] = 0 

1449 

1450 v[codes] += count 

1451 

1452 if kind is not None: 

1453 return d[to_kind_id(kind)] 

1454 else: 

1455 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items()) 

1456 

1457 def glob_codes(self, kind, codes): 

1458 ''' 

1459 Find codes matching given patterns. 

1460 

1461 :param kind: 

1462 Content kind to be queried. 

1463 :type kind: 

1464 str 

1465 

1466 :param codes: 

1467 List of code patterns to query. 

1468 :type codes: 

1469 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

1470 objects appropriate for the queried content type, or anything which 

1471 can be converted to such objects. 

1472 

1473 :returns: 

1474 List of matches of the form ``[kind_codes_id, codes, deltat]``. 

1475 ''' 

1476 

1477 kind_id = to_kind_id(kind) 

1478 args = [kind_id] 

1479 pats = codes_patterns_for_kind(kind_id, codes) 

1480 

1481 if pats: 

1482 codes_cond = 'AND ( %s ) ' % ' OR '.join( 

1483 ('kind_codes.codes GLOB ?',) * len(pats)) 

1484 

1485 args.extend(pat.safe_str for pat in pats) 

1486 else: 

1487 codes_cond = '' 

1488 

1489 sql = self._sql(''' 

1490 SELECT kind_codes_id, codes, deltat FROM kind_codes 

1491 WHERE 

1492 kind_id == ? ''' + codes_cond) 

1493 

1494 return list(map(list, self._conn.execute(sql, args))) 

1495 

1496 def update(self, constraint=None, **kwargs): 

1497 ''' 

1498 Update or partially update channel and event inventories. 

1499 

1500 :param constraint: 

1501 Selection of times or areas to be brought up to date. 

1502 :type constraint: 

1503 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1504 

1505 :param \\*\\*kwargs: 

1506 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1507 

1508 This function triggers all attached remote sources, to check for 

1509 updates in the meta-data. The sources will only submit queries when 

1510 their expiration date has passed, or if the selection spans into 

1511 previously unseen times or areas. 

1512 ''' 

1513 

1514 if constraint is None: 

1515 constraint = client.Constraint(**kwargs) 

1516 

1517 for source in self._sources: 

1518 source.update_channel_inventory(self, constraint) 

1519 source.update_event_inventory(self, constraint) 

1520 

1521 def update_waveform_promises(self, constraint=None, **kwargs): 

1522 ''' 

1523 Permit downloading of remote waveforms. 

1524 

1525 :param constraint: 

1526 Remote waveforms compatible with the given constraint are enabled 

1527 for download. 

1528 :type constraint: 

1529 :py:class:`~pyrocko.squirrel.client.base.Constraint` 

1530 

1531 :param \\*\\*kwargs: 

1532 Shortcut for setting ``constraint=Constraint(**kwargs)``. 

1533 

1534 Calling this method permits Squirrel to download waveforms from remote 

1535 sources when processing subsequent waveform requests. This works by 

1536 inserting so called waveform promises into the database. It will look 

1537 into the available channels for each remote source and create a promise 

1538 for each channel compatible with the given constraint. If the promise 

1539 then matches in a waveform request, Squirrel tries to download the 

1540 waveform. If the download is successful, the downloaded waveform is 

1541 added to the Squirrel and the promise is deleted. If the download 

1542 fails, the promise is kept if the reason of failure looks like being 

1543 temporary, e.g. because of a network failure. If the cause of failure 

1544 however seems to be permanent, the promise is deleted so that no 

1545 further attempts are made to download a waveform which might not be 

1546 available from that server at all. To force re-scheduling after a 

1547 permanent failure, call :py:meth:`update_waveform_promises` 

1548 yet another time. 

1549 ''' 

1550 

1551 if constraint is None: 

1552 constraint = client.Constraint(**kwargs) 

1553 

1554 for source in self._sources: 

1555 source.update_waveform_promises(self, constraint) 

1556 

1557 def remove_waveform_promises(self, from_database='selection'): 

1558 ''' 

1559 Remove waveform promises from live selection or global database. 

1560 

1561 Calling this function removes all waveform promises provided by the 

1562 attached sources. 

1563 

1564 :param from_database: 

1565 Remove from live selection ``'selection'`` or global database 

1566 ``'global'``. 

1567 ''' 

1568 for source in self._sources: 

1569 source.remove_waveform_promises(self, from_database=from_database) 

1570 

1571 def update_responses(self, constraint=None, **kwargs): 

1572 if constraint is None: 

1573 constraint = client.Constraint(**kwargs) 

1574 

1575 for source in self._sources: 

1576 source.update_response_inventory(self, constraint) 

1577 

1578 def get_nfiles(self): 

1579 ''' 

1580 Get number of files in selection. 

1581 ''' 

1582 

1583 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''') 

1584 for row in self._conn.execute(sql): 

1585 return row[0] 

1586 

1587 def get_nnuts(self): 

1588 ''' 

1589 Get number of nuts in selection. 

1590 ''' 

1591 

1592 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''') 

1593 for row in self._conn.execute(sql): 

1594 return row[0] 

1595 

1596 def get_total_size(self): 

1597 ''' 

1598 Get aggregated file size available in selection. 

1599 ''' 

1600 

1601 sql = self._sql(''' 

1602 SELECT SUM(files.size) FROM %(db)s.%(file_states)s 

1603 INNER JOIN files 

1604 ON %(db)s.%(file_states)s.file_id = files.file_id 

1605 ''') 

1606 

1607 for row in self._conn.execute(sql): 

1608 return row[0] or 0 

1609 

1610 def get_stats(self): 

1611 ''' 

1612 Get statistics on contents available through this selection. 

1613 ''' 

1614 

1615 kinds = self.get_kinds() 

1616 time_spans = {} 

1617 for kind in kinds: 

1618 time_spans[kind] = self.get_time_span([kind]) 

1619 

1620 return SquirrelStats( 

1621 nfiles=self.get_nfiles(), 

1622 nnuts=self.get_nnuts(), 

1623 kinds=kinds, 

1624 codes=self.get_codes(), 

1625 total_size=self.get_total_size(), 

1626 counts=self.get_counts(), 

1627 time_spans=time_spans, 

1628 sources=[s.describe() for s in self._sources], 

1629 operators=[op.describe() for op in self._operators]) 

1630 

1631 @filldocs 

1632 def check( 

1633 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1634 ignore=[]): 

1635 ''' 

1636 Check for common data/metadata problems. 

1637 

1638 %(query_args)s 

1639 

1640 :param ignore: 

1641 Problem types to be ignored. 

1642 :type ignore: 

1643 :class:`list` of :class:`str` 

1644 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`) 

1645 

1646 :returns: 

1647 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object 

1648 containing the results of the check. 

1649 

1650 See :py:func:`~pyrocko.squirrel.check.do_check`. 

1651 ''' 

1652 

1653 from .check import do_check 

1654 tmin, tmax, codes = self._get_selection_args( 

1655 CHANNEL, obj, tmin, tmax, time, codes) 

1656 

1657 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore) 

1658 

1659 def get_content( 

1660 self, 

1661 nut, 

1662 cache_id='default', 

1663 accessor_id='default', 

1664 show_progress=False, 

1665 model='squirrel'): 

1666 

1667 ''' 

1668 Get and possibly load full content for a given index entry from file. 

1669 

1670 Loads the actual content objects (channel, station, waveform, ...) from 

1671 file. For efficiency, sibling content (all stuff in the same file 

1672 segment) will also be loaded as a side effect. The loaded contents are 

1673 cached in the Squirrel object. 

1674 ''' 

1675 

1676 content_cache = self._content_caches[cache_id] 

1677 if not content_cache.has(nut): 

1678 

1679 for nut_loaded in io.iload( 

1680 nut.file_path, 

1681 segment=nut.file_segment, 

1682 format=nut.file_format, 

1683 database=self._database, 

1684 update_selection=self, 

1685 show_progress=show_progress): 

1686 

1687 content_cache.put(nut_loaded) 

1688 

1689 try: 

1690 return content_cache.get(nut, accessor_id, model) 

1691 

1692 except KeyError: 

1693 raise error.NotAvailable( 

1694 'Unable to retrieve content: %s, %s, %s, %s' % nut.key) 

1695 

1696 def advance_accessor(self, accessor_id='default', cache_id=None): 

1697 ''' 

1698 Notify memory caches about consumer moving to a new data batch. 

1699 

1700 :param accessor_id: 

1701 Name of accessing consumer to be advanced. 

1702 :type accessor_id: 

1703 str 

1704 

1705 :param cache_id: 

1706 Name of cache to for which the accessor should be advanced. By 

1707 default the named accessor is advanced in all registered caches. 

1708 By default, two caches named ``'default'`` and ``'waveform'`` are 

1709 available. 

1710 :type cache_id: 

1711 str 

1712 

1713 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how 

1714 Squirrel's memory caching works and can be tuned. Default behaviour is 

1715 to release data when it has not been used in the latest data 

1716 window/batch. If the accessor is never advanced, data is cached 

1717 indefinitely - which is often desired e.g. for station meta-data. 

1718 Methods for consecutive data traversal, like 

1719 :py:meth:`chopper_waveforms` automatically advance and clear 

1720 their accessor. 

1721 ''' 

1722 for cache_ in ( 

1723 self._content_caches.keys() 

1724 if cache_id is None 

1725 else [cache_id]): 

1726 

1727 self._content_caches[cache_].advance_accessor(accessor_id) 

1728 

1729 def clear_accessor(self, accessor_id, cache_id=None): 

1730 ''' 

1731 Notify memory caches about a consumer having finished. 

1732 

1733 :param accessor_id: 

1734 Name of accessor to be cleared. 

1735 :type accessor_id: 

1736 str 

1737 

1738 :param cache_id: 

1739 Name of cache for which the accessor should be cleared. By default 

1740 the named accessor is cleared from all registered caches. By 

1741 default, two caches named ``'default'`` and ``'waveform'`` are 

1742 available. 

1743 :type cache_id: 

1744 str 

1745 

1746 Calling this method clears all references to cache entries held by the 

1747 named accessor. Cache entries are then freed if not referenced by any 

1748 other accessor. 

1749 ''' 

1750 

1751 for cache_ in ( 

1752 self._content_caches.keys() 

1753 if cache_id is None 

1754 else [cache_id]): 

1755 

1756 self._content_caches[cache_].clear_accessor(accessor_id) 

1757 

1758 def get_cache_stats(self, cache_id): 

1759 return self._content_caches[cache_id].get_stats() 

1760 

1761 @filldocs 

1762 def get_stations( 

1763 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1764 model='squirrel', on_error='raise'): 

1765 

1766 ''' 

1767 Get stations matching given constraints. 

1768 

1769 %(query_args)s 

1770 

1771 :param model: 

1772 Select object model for returned values: ``'squirrel'`` to get 

1773 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station 

1774 objects with channel information attached. 

1775 :type model: 

1776 str 

1777 

1778 :returns: 

1779 List of :py:class:`pyrocko.squirrel.Station 

1780 <pyrocko.squirrel.model.Station>` objects by default or list of 

1781 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` 

1782 objects if ``model='pyrocko'`` is requested. 

1783 

1784 See :py:meth:`iter_nuts` for details on time span matching. 

1785 ''' 

1786 

1787 if model == 'pyrocko': 

1788 return self._get_pyrocko_stations( 

1789 obj, tmin, tmax, time, codes, on_error=on_error) 

1790 elif model in ('squirrel', 'stationxml', 'stationxml+'): 

1791 args = self._get_selection_args( 

1792 STATION, obj, tmin, tmax, time, codes) 

1793 

1794 nuts = sorted( 

1795 self.iter_nuts('station', *args), key=lambda nut: nut.dkey) 

1796 

1797 return [self.get_content(nut, model=model) for nut in nuts] 

1798 else: 

1799 raise ValueError('Invalid station model: %s' % model) 

1800 

1801 @filldocs 

1802 def get_channels( 

1803 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1804 model='squirrel'): 

1805 

1806 ''' 

1807 Get channels matching given constraints. 

1808 

1809 %(query_args)s 

1810 

1811 :returns: 

1812 List of :py:class:`~pyrocko.squirrel.model.Channel` objects. 

1813 

1814 See :py:meth:`iter_nuts` for details on time span matching. 

1815 ''' 

1816 

1817 args = self._get_selection_args( 

1818 CHANNEL, obj, tmin, tmax, time, codes) 

1819 

1820 nuts = sorted( 

1821 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey) 

1822 

1823 return [self.get_content(nut, model=model) for nut in nuts] 

1824 

1825 @filldocs 

1826 def get_sensors( 

1827 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1828 

1829 ''' 

1830 Get sensors matching given constraints. 

1831 

1832 %(query_args)s 

1833 

1834 :returns: 

1835 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects. 

1836 

1837 See :py:meth:`iter_nuts` for details on time span matching. 

1838 ''' 

1839 

1840 tmin, tmax, codes = self._get_selection_args( 

1841 CHANNEL, obj, tmin, tmax, time, codes) 

1842 

1843 if codes is not None: 

1844 codes = codes_patterns_list( 

1845 (entry.replace(channel=entry.channel[:-1] + '?') 

1846 if entry.channel != '*' else entry) 

1847 for entry in codes) 

1848 

1849 nuts = sorted( 

1850 self.iter_nuts( 

1851 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey) 

1852 

1853 return [ 

1854 sensor for sensor in model.Sensor.from_channels( 

1855 self.get_content(nut) for nut in nuts) 

1856 if match_time_span(tmin, tmax, sensor)] 

1857 

1858 @filldocs 

1859 def get_responses( 

1860 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1861 model='squirrel'): 

1862 

1863 ''' 

1864 Get instrument responses matching given constraints. 

1865 

1866 %(query_args)s 

1867 

1868 :param model: 

1869 Select data model for returned objects. Choices: ``'squirrel'``, 

1870 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1871 :type model: 

1872 str 

1873 

1874 :returns: 

1875 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model == 

1876 'squirrel'`` or list of :py:class:`~pyrocko.io.fdsn.FDSNStationXML` 

1877 if ``model == 'stationxml'`` or list of 

1878 (:py:class:`~pyrocko.squirrel.model.Response`, 

1879 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1880 'stationxml+'``. 

1881 

1882 See :py:meth:`iter_nuts` for details on time span matching. 

1883 ''' 

1884 

1885 args = self._get_selection_args( 

1886 RESPONSE, obj, tmin, tmax, time, codes) 

1887 

1888 nuts = sorted( 

1889 self.iter_nuts('response', *args), key=lambda nut: nut.dkey) 

1890 

1891 return [self.get_content(nut, model=model) for nut in nuts] 

1892 

1893 @filldocs 

1894 def get_response( 

1895 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

1896 model='squirrel', on_duplicate='raise'): 

1897 

1898 ''' 

1899 Get instrument response matching given constraints. 

1900 

1901 %(query_args)s 

1902 

1903 :param model: 

1904 Select data model for returned object. Choices: ``'squirrel'``, 

1905 ``'stationxml'``, ``'stationxml+'``. See return value description. 

1906 :type model: 

1907 str 

1908 

1909 :param on_duplicate: 

1910 Determines how duplicates/multiple matching responses are handled. 

1911 Choices: ``'raise'`` - raise 

1912 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a 

1913 warning and return first match, ``'ignore'`` - silently return 

1914 first match. 

1915 :type on_duplicate: 

1916 str 

1917 

1918 :returns: 

1919 :py:class:`~pyrocko.squirrel.model.Response` if 

1920 ``model == 'squirrel'`` or 

1921 :py:class:`~pyrocko.io.fdsn.FDSNStationXML` if ``model == 

1922 'stationxml'`` or 

1923 (:py:class:`~pyrocko.squirrel.model.Response`, 

1924 :py:class:`~pyrocko.io.fdsn.FDSNStationXML`) if ``model == 

1925 'stationxml+'``. 

1926 

1927 Same as :py:meth:`get_responses` but returning exactly one response. 

1928 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is 

1929 available. Duplicates are handled according to the ``on_duplicate`` 

1930 argument. 

1931 

1932 See :py:meth:`iter_nuts` for details on time span matching. 

1933 ''' 

1934 

1935 if model == 'stationxml': 

1936 model_ = 'stationxml+' 

1937 else: 

1938 model_ = model 

1939 

1940 responses = self.get_responses( 

1941 obj, tmin, tmax, time, codes, model=model_) 

1942 if len(responses) == 0: 

1943 raise error.NotAvailable( 

1944 'No instrument response available (%s).' 

1945 % self._get_selection_args_str( 

1946 RESPONSE, obj, tmin, tmax, time, codes)) 

1947 

1948 elif len(responses) > 1: 

1949 

1950 if on_duplicate in ('raise', 'warn'): 

1951 if model_ == 'squirrel': 

1952 resps_sq = responses 

1953 elif model_ == 'stationxml+': 

1954 resps_sq = [resp[0] for resp in responses] 

1955 else: 

1956 raise ValueError('Invalid response model: %s' % model) 

1957 

1958 rinfo = ':\n' + '\n'.join( 

1959 ' ' + resp.summary for resp in resps_sq) 

1960 

1961 message = \ 

1962 'Multiple instrument responses matching given ' \ 

1963 'constraints (%s)%s%s' % ( 

1964 self._get_selection_args_str( 

1965 RESPONSE, obj, tmin, tmax, time, codes), 

1966 ' -> using first' if on_duplicate == 'warn' else '', 

1967 rinfo) 

1968 

1969 if on_duplicate == 'raise': 

1970 raise error.Duplicate(message) 

1971 

1972 elif on_duplicate == 'warn': 

1973 logger.warning(message) 

1974 

1975 elif on_duplicate == 'ignore': 

1976 pass 

1977 

1978 else: 

1979 ValueError( 

1980 'Invalid argument for on_duplicate: %s' % on_duplicate) 

1981 

1982 if model == 'stationxml': 

1983 return responses[0][1] 

1984 else: 

1985 return responses[0] 

1986 

1987 @filldocs 

1988 def get_events( 

1989 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

1990 

1991 ''' 

1992 Get events matching given constraints. 

1993 

1994 %(query_args)s 

1995 

1996 :returns: 

1997 List of :py:class:`~pyrocko.model.event.Event` objects. 

1998 

1999 See :py:meth:`iter_nuts` for details on time span matching. 

2000 ''' 

2001 

2002 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes) 

2003 nuts = sorted( 

2004 self.iter_nuts('event', *args), key=lambda nut: nut.dkey) 

2005 

2006 return [self.get_content(nut) for nut in nuts] 

2007 

2008 def _redeem_promises(self, *args, order_only=False): 

2009 

2010 def split_promise(order, tmax=None): 

2011 self._split_nuts( 

2012 'waveform_promise', 

2013 order.tmin, tmax if tmax is not None else order.tmax, 

2014 codes=order.codes, 

2015 path=order.source_id) 

2016 

2017 tmin, tmax = args[:2] 

2018 

2019 waveforms = list(self.iter_nuts('waveform', *args)) 

2020 promises = list(self.iter_nuts('waveform_promise', *args)) 

2021 

2022 codes_to_avail = defaultdict(list) 

2023 for nut in waveforms: 

2024 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax)) 

2025 

2026 def tts(x): 

2027 if isinstance(x, tuple): 

2028 return tuple(tts(e) for e in x) 

2029 elif isinstance(x, list): 

2030 return list(tts(e) for e in x) 

2031 else: 

2032 return util.time_to_str(x) 

2033 

2034 now = time.time() 

2035 orders = [] 

2036 for promise in promises: 

2037 waveforms_avail = codes_to_avail[promise.codes] 

2038 for block_tmin, block_tmax in blocks( 

2039 max(tmin, promise.tmin), 

2040 min(tmax, promise.tmax), 

2041 promise.deltat): 

2042 

2043 if block_tmin > now: 

2044 continue 

2045 

2046 orders.append( 

2047 WaveformOrder( 

2048 source_id=promise.file_path, 

2049 codes=promise.codes, 

2050 tmin=block_tmin, 

2051 tmax=block_tmax, 

2052 deltat=promise.deltat, 

2053 gaps=gaps(waveforms_avail, block_tmin, block_tmax), 

2054 time_created=now)) 

2055 

2056 orders_noop, orders = lpick(lambda order: order.gaps, orders) 

2057 

2058 order_keys_noop = set(order_key(order) for order in orders_noop) 

2059 if len(order_keys_noop) != 0 or len(orders_noop) != 0: 

2060 logger.info( 

2061 'Waveform orders already satisified with cached/local data: ' 

2062 '%i (%i)' % (len(order_keys_noop), len(orders_noop))) 

2063 

2064 for order in orders_noop: 

2065 split_promise(order) 

2066 

2067 if order_only: 

2068 if orders: 

2069 self._pending_orders.extend(orders) 

2070 logger.info( 

2071 'Enqueuing %i waveform order%s.' 

2072 % len_plural(orders)) 

2073 return 

2074 else: 

2075 if self._pending_orders: 

2076 orders.extend(self._pending_orders) 

2077 logger.info( 

2078 'Adding %i previously enqueued order%s.' 

2079 % len_plural(self._pending_orders)) 

2080 

2081 self._pending_orders = [] 

2082 

2083 source_ids = [] 

2084 sources = {} 

2085 for source in self._sources: 

2086 if isinstance(source, fdsn.FDSNSource): 

2087 source_ids.append(source._source_id) 

2088 sources[source._source_id] = source 

2089 

2090 source_priority = dict( 

2091 (source_id, i) for (i, source_id) in enumerate(source_ids)) 

2092 

2093 order_groups = defaultdict(list) 

2094 for order in orders: 

2095 order_groups[order_key(order)].append(order) 

2096 

2097 for k, order_group in order_groups.items(): 

2098 order_group.sort( 

2099 key=lambda order: source_priority[order.source_id]) 

2100 

2101 n_order_groups = len(order_groups) 

2102 

2103 if len(order_groups) != 0 or len(orders) != 0: 

2104 logger.info( 

2105 'Waveform orders standing for download: %i (%i)' 

2106 % (len(order_groups), len(orders))) 

2107 

2108 task = make_task('Waveform orders processed', n_order_groups) 

2109 else: 

2110 task = None 

2111 

2112 def release_order_group(order): 

2113 okey = order_key(order) 

2114 for followup in order_groups[okey]: 

2115 if followup is not order: 

2116 split_promise(followup) 

2117 

2118 del order_groups[okey] 

2119 

2120 if task: 

2121 task.update(n_order_groups - len(order_groups)) 

2122 

2123 def noop(order): 

2124 pass 

2125 

2126 def success(order, trs): 

2127 release_order_group(order) 

2128 if order.is_near_real_time(): 

2129 if not trs: 

2130 return # keep promise when no data received at real time 

2131 else: 

2132 tmax = max(tr.tmax+tr.deltat for tr in trs) 

2133 tmax = order.tmin \ 

2134 + round((tmax - order.tmin) / order.deltat) \ 

2135 * order.deltat 

2136 split_promise(order, tmax) 

2137 else: 

2138 split_promise(order) 

2139 

2140 def batch_add(paths): 

2141 self.add(paths) 

2142 

2143 calls = queue.Queue() 

2144 

2145 def enqueue(f): 

2146 def wrapper(*args): 

2147 calls.put((f, args)) 

2148 

2149 return wrapper 

2150 

2151 while order_groups: 

2152 

2153 orders_now = [] 

2154 empty = [] 

2155 for k, order_group in order_groups.items(): 

2156 try: 

2157 orders_now.append(order_group.pop(0)) 

2158 except IndexError: 

2159 empty.append(k) 

2160 

2161 for k in empty: 

2162 del order_groups[k] 

2163 

2164 by_source_id = defaultdict(list) 

2165 for order in orders_now: 

2166 by_source_id[order.source_id].append(order) 

2167 

2168 threads = [] 

2169 for source_id in by_source_id: 

2170 def download(): 

2171 try: 

2172 sources[source_id].download_waveforms( 

2173 by_source_id[source_id], 

2174 success=enqueue(success), 

2175 error_permanent=enqueue(split_promise), 

2176 error_temporary=noop, 

2177 batch_add=enqueue(batch_add)) 

2178 

2179 finally: 

2180 calls.put(None) 

2181 

2182 thread = threading.Thread(target=download) 

2183 thread.start() 

2184 threads.append(thread) 

2185 

2186 ndone = 0 

2187 while ndone < len(threads): 

2188 ret = calls.get() 

2189 if ret is None: 

2190 ndone += 1 

2191 else: 

2192 ret[0](*ret[1]) 

2193 

2194 for thread in threads: 

2195 thread.join() 

2196 

2197 if task: 

2198 task.update(n_order_groups - len(order_groups)) 

2199 

2200 if task: 

2201 task.done() 

2202 

2203 @filldocs 

2204 def get_waveform_nuts( 

2205 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2206 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2207 order_only=False): 

2208 

2209 ''' 

2210 Get waveform content entities matching given constraints. 

2211 

2212 %(query_args)s 

2213 

2214 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally 

2215 resolves matching waveform promises (downloads waveforms from remote 

2216 sources). 

2217 

2218 See :py:meth:`iter_nuts` for details on time span matching. 

2219 ''' 

2220 

2221 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2222 

2223 if self.downloads_enabled: 

2224 self._redeem_promises( 

2225 *args, 

2226 codes_exclude, 

2227 sample_rate_min, 

2228 sample_rate_max, 

2229 order_only=order_only) 

2230 

2231 nuts = sorted( 

2232 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey) 

2233 

2234 return nuts 

2235 

2236 @filldocs 

2237 def have_waveforms( 

2238 self, obj=None, tmin=None, tmax=None, time=None, codes=None): 

2239 

2240 ''' 

2241 Check if any waveforms or waveform promises are available for given 

2242 constraints. 

2243 

2244 %(query_args)s 

2245 ''' 

2246 

2247 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes) 

2248 return bool(list( 

2249 self.iter_nuts('waveform', *args, limit=1))) \ 

2250 or (self.downloads_enabled and bool(list( 

2251 self.iter_nuts('waveform_promise', *args, limit=1)))) 

2252 

2253 @filldocs 

2254 def get_waveforms( 

2255 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2256 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2257 uncut=False, want_incomplete=True, degap=True, 

2258 maxgap=5, maxlap=None, snap=None, include_last=False, 

2259 load_data=True, accessor_id='default', operator_params=None, 

2260 order_only=False, channel_priorities=None): 

2261 

2262 ''' 

2263 Get waveforms matching given constraints. 

2264 

2265 %(query_args)s 

2266 

2267 :param sample_rate_min: 

2268 Consider only waveforms with a sampling rate equal to or greater 

2269 than the given value [Hz]. 

2270 :type sample_rate_min: 

2271 float 

2272 

2273 :param sample_rate_max: 

2274 Consider only waveforms with a sampling rate equal to or less than 

2275 the given value [Hz]. 

2276 :type sample_rate_max: 

2277 float 

2278 

2279 :param uncut: 

2280 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] 

2281 and to disable degapping/deoverlapping. Returns untouched traces as 

2282 they are read from file segment. File segments are always read in 

2283 their entirety. 

2284 :type uncut: 

2285 bool 

2286 

2287 :param want_incomplete: 

2288 If ``True``, gappy/incomplete traces are included in the result. 

2289 :type want_incomplete: 

2290 bool 

2291 

2292 :param degap: 

2293 If ``True``, connect traces and remove gaps and overlaps. 

2294 :type degap: 

2295 bool 

2296 

2297 :param maxgap: 

2298 Maximum gap size in samples which is filled with interpolated 

2299 samples when ``degap`` is ``True``. 

2300 :type maxgap: 

2301 int 

2302 

2303 :param maxlap: 

2304 Maximum overlap size in samples which is removed when ``degap`` is 

2305 ``True``. 

2306 :type maxlap: 

2307 int 

2308 

2309 :param snap: 

2310 Rounding functions used when computing sample index from time 

2311 instance, for trace start and trace end, respectively. By default, 

2312 ``(round, round)`` is used. 

2313 :type snap: 

2314 tuple of 2 callables 

2315 

2316 :param include_last: 

2317 If ``True``, add one more sample to the returned traces (the sample 

2318 which would be the first sample of a query with ``tmin`` set to the 

2319 current value of ``tmax``). 

2320 :type include_last: 

2321 bool 

2322 

2323 :param load_data: 

2324 If ``True``, waveform data samples are read from files (or cache). 

2325 If ``False``, meta-information-only traces are returned (dummy 

2326 traces with no data samples). 

2327 :type load_data: 

2328 bool 

2329 

2330 :param accessor_id: 

2331 Name of consumer on who's behalf data is accessed. Used in cache 

2332 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2333 to distinguish different points of extraction for the decision of 

2334 when to release cached waveform data. Should be used when data is 

2335 alternately extracted from more than one region / selection. 

2336 :type accessor_id: 

2337 str 

2338 

2339 :param channel_priorities: 

2340 List of band/instrument code combinations to try. For example, 

2341 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and 

2342 then fallback to ``BH?`` if these are not available. The first 

2343 matching waveforms are returned. Use in combination with 

2344 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample 

2345 rate. 

2346 :type channel_priorities: 

2347 list of str 

2348 

2349 See :py:meth:`iter_nuts` for details on time span matching. 

2350 

2351 Loaded data is kept in memory (at least) until 

2352 :py:meth:`clear_accessor` has been called or 

2353 :py:meth:`advance_accessor` has been called two consecutive times 

2354 without data being accessed between the two calls (by this accessor). 

2355 Data may still be further kept in the memory cache if held alive by 

2356 consumers with a different ``accessor_id``. 

2357 ''' 

2358 

2359 tmin, tmax, codes = self._get_selection_args( 

2360 WAVEFORM, obj, tmin, tmax, time, codes) 

2361 

2362 if channel_priorities is not None: 

2363 return self._get_waveforms_prioritized( 

2364 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude, 

2365 sample_rate_min=sample_rate_min, 

2366 sample_rate_max=sample_rate_max, 

2367 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2368 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2369 include_last=include_last, load_data=load_data, 

2370 accessor_id=accessor_id, operator_params=operator_params, 

2371 order_only=order_only, channel_priorities=channel_priorities) 

2372 

2373 kinds = ['waveform'] 

2374 if self.downloads_enabled: 

2375 kinds.append('waveform_promise') 

2376 

2377 self_tmin, self_tmax = self.get_time_span(kinds) 

2378 

2379 if None in (self_tmin, self_tmax): 

2380 logger.warning( 

2381 'No waveforms available.') 

2382 return [] 

2383 

2384 tmin = tmin if tmin is not None else self_tmin 

2385 tmax = tmax if tmax is not None else self_tmax 

2386 

2387 if codes is not None and len(codes) == 1: 

2388 # TODO: fix for multiple / mixed codes 

2389 operator = self.get_operator(codes[0]) 

2390 if operator is not None: 

2391 return operator.get_waveforms( 

2392 self, codes[0], 

2393 tmin=tmin, tmax=tmax, 

2394 uncut=uncut, want_incomplete=want_incomplete, degap=degap, 

2395 maxgap=maxgap, maxlap=maxlap, snap=snap, 

2396 include_last=include_last, load_data=load_data, 

2397 accessor_id=accessor_id, params=operator_params) 

2398 

2399 nuts = self.get_waveform_nuts( 

2400 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min, 

2401 sample_rate_max, order_only=order_only) 

2402 

2403 if order_only: 

2404 return [] 

2405 

2406 if load_data: 

2407 traces = [ 

2408 self.get_content(nut, 'waveform', accessor_id) for nut in nuts] 

2409 

2410 else: 

2411 traces = [ 

2412 trace.Trace(**nut.trace_kwargs) for nut in nuts] 

2413 

2414 if uncut: 

2415 return traces 

2416 

2417 if snap is None: 

2418 snap = (round, round) 

2419 

2420 chopped = [] 

2421 for tr in traces: 

2422 if not load_data and tr.ydata is not None: 

2423 tr = tr.copy(data=False) 

2424 tr.ydata = None 

2425 

2426 try: 

2427 chopped.append(tr.chop( 

2428 tmin, tmax, 

2429 inplace=False, 

2430 snap=snap, 

2431 include_last=include_last)) 

2432 

2433 except trace.NoData: 

2434 pass 

2435 

2436 processed = self._process_chopped( 

2437 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax) 

2438 

2439 return processed 

2440 

2441 def _get_waveforms_prioritized( 

2442 self, tmin=None, tmax=None, codes=None, codes_exclude=None, 

2443 channel_priorities=None, **kwargs): 

2444 

2445 trs_all = [] 

2446 codes_have = set() 

2447 for channel in channel_priorities: 

2448 assert len(channel) == 2 

2449 if codes is not None: 

2450 codes_now = [ 

2451 codes_.replace(channel=channel+'?') for codes_ in codes] 

2452 else: 

2453 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?') 

2454 

2455 codes_exclude_now = list(set( 

2456 codes_.replace(channel=channel+codes_.channel[-1]) 

2457 for codes_ in codes_have)) 

2458 

2459 if codes_exclude: 

2460 codes_exclude_now.extend(codes_exclude) 

2461 

2462 trs = self.get_waveforms( 

2463 tmin=tmin, 

2464 tmax=tmax, 

2465 codes=codes_now, 

2466 codes_exclude=codes_exclude_now, 

2467 **kwargs) 

2468 

2469 codes_have.update(set(tr.codes for tr in trs)) 

2470 trs_all.extend(trs) 

2471 

2472 return trs_all 

2473 

2474 @filldocs 

2475 def chopper_waveforms( 

2476 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2477 codes_exclude=None, sample_rate_min=None, sample_rate_max=None, 

2478 tinc=None, tpad=0., 

2479 want_incomplete=True, snap_window=False, 

2480 degap=True, maxgap=5, maxlap=None, 

2481 snap=None, include_last=False, load_data=True, 

2482 accessor_id=None, clear_accessor=True, operator_params=None, 

2483 grouping=None, channel_priorities=None): 

2484 

2485 ''' 

2486 Iterate window-wise over waveform archive. 

2487 

2488 %(query_args)s 

2489 

2490 :param tinc: 

2491 Time increment (window shift time) (default uses ``tmax-tmin``). 

2492 :type tinc: 

2493 timestamp 

2494 

2495 :param tpad: 

2496 Padding time appended on either side of the data window (window 

2497 overlap is ``2*tpad``). 

2498 :type tpad: 

2499 timestamp 

2500 

2501 :param want_incomplete: 

2502 If ``True``, gappy/incomplete traces are included in the result. 

2503 :type want_incomplete: 

2504 bool 

2505 

2506 :param snap_window: 

2507 If ``True``, start time windows at multiples of tinc with respect 

2508 to system time zero. 

2509 :type snap_window: 

2510 bool 

2511 

2512 :param degap: 

2513 If ``True``, connect traces and remove gaps and overlaps. 

2514 :type degap: 

2515 bool 

2516 

2517 :param maxgap: 

2518 Maximum gap size in samples which is filled with interpolated 

2519 samples when ``degap`` is ``True``. 

2520 :type maxgap: 

2521 int 

2522 

2523 :param maxlap: 

2524 Maximum overlap size in samples which is removed when ``degap`` is 

2525 ``True``. 

2526 :type maxlap: 

2527 int 

2528 

2529 :param snap: 

2530 Rounding functions used when computing sample index from time 

2531 instance, for trace start and trace end, respectively. By default, 

2532 ``(round, round)`` is used. 

2533 :type snap: 

2534 tuple of 2 callables 

2535 

2536 :param include_last: 

2537 If ``True``, add one more sample to the returned traces (the sample 

2538 which would be the first sample of a query with ``tmin`` set to the 

2539 current value of ``tmax``). 

2540 :type include_last: 

2541 bool 

2542 

2543 :param load_data: 

2544 If ``True``, waveform data samples are read from files (or cache). 

2545 If ``False``, meta-information-only traces are returned (dummy 

2546 traces with no data samples). 

2547 :type load_data: 

2548 bool 

2549 

2550 :param accessor_id: 

2551 Name of consumer on who's behalf data is accessed. Used in cache 

2552 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key 

2553 to distinguish different points of extraction for the decision of 

2554 when to release cached waveform data. Should be used when data is 

2555 alternately extracted from more than one region / selection. 

2556 :type accessor_id: 

2557 str 

2558 

2559 :param clear_accessor: 

2560 If ``True`` (default), :py:meth:`clear_accessor` is called when the 

2561 chopper finishes. Set to ``False`` to keep loaded waveforms in 

2562 memory when the generator returns. 

2563 :type clear_accessor: 

2564 bool 

2565 

2566 :param grouping: 

2567 By default, traversal over the data is over time and all matching 

2568 traces of a time window are yielded. Using this option, it is 

2569 possible to traverse the data first by group (e.g. station or 

2570 network) and second by time. This can reduce the number of traces 

2571 in each batch and thus reduce the memory footprint of the process. 

2572 :type grouping: 

2573 :py:class:`~pyrocko.squirrel.operator.Grouping` 

2574 

2575 :yields: 

2576 A list of :py:class:`~pyrocko.trace.Trace` objects for every 

2577 extracted time window. 

2578 

2579 See :py:meth:`iter_nuts` for details on time span matching. 

2580 ''' 

2581 

2582 tmin, tmax, codes = self._get_selection_args( 

2583 WAVEFORM, obj, tmin, tmax, time, codes) 

2584 

2585 kinds = ['waveform'] 

2586 if self.downloads_enabled: 

2587 kinds.append('waveform_promise') 

2588 

2589 self_tmin, self_tmax = self.get_time_span(kinds) 

2590 

2591 if None in (self_tmin, self_tmax): 

2592 logger.warning( 

2593 'Content has undefined time span. No waveforms and no ' 

2594 'waveform promises?') 

2595 return 

2596 

2597 if snap_window and tinc is not None: 

2598 tmin = tmin if tmin is not None else self_tmin 

2599 tmax = tmax if tmax is not None else self_tmax 

2600 tmin = math.floor(tmin / tinc) * tinc 

2601 tmax = math.ceil(tmax / tinc) * tinc 

2602 else: 

2603 tmin = tmin if tmin is not None else self_tmin + tpad 

2604 tmax = tmax if tmax is not None else self_tmax - tpad 

2605 

2606 tinc = tinc if tinc is not None else tmax - tmin 

2607 

2608 try: 

2609 if accessor_id is None: 

2610 accessor_id = 'chopper%i' % self._n_choppers_active 

2611 

2612 self._n_choppers_active += 1 

2613 

2614 eps = tinc * 1e-6 

2615 if tinc != 0.0: 

2616 nwin = int(((tmax - eps) - tmin) / tinc) + 1 

2617 else: 

2618 nwin = 1 

2619 

2620 if grouping is None: 

2621 codes_list = [codes] 

2622 else: 

2623 operator = Operator( 

2624 filtering=CodesPatternFiltering(codes=codes), 

2625 grouping=grouping) 

2626 

2627 available = set(self.get_codes(kind='waveform')) 

2628 if self.downloads_enabled: 

2629 available.update(self.get_codes(kind='waveform_promise')) 

2630 operator.update_mappings(sorted(available)) 

2631 

2632 codes_list = [ 

2633 codes_patterns_list(scl) 

2634 for scl in operator.iter_in_codes()] 

2635 

2636 ngroups = len(codes_list) 

2637 for igroup, scl in enumerate(codes_list): 

2638 for iwin in range(nwin): 

2639 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax) 

2640 

2641 chopped = self.get_waveforms( 

2642 tmin=wmin-tpad, 

2643 tmax=wmax+tpad, 

2644 codes=scl, 

2645 codes_exclude=codes_exclude, 

2646 sample_rate_min=sample_rate_min, 

2647 sample_rate_max=sample_rate_max, 

2648 snap=snap, 

2649 include_last=include_last, 

2650 load_data=load_data, 

2651 want_incomplete=want_incomplete, 

2652 degap=degap, 

2653 maxgap=maxgap, 

2654 maxlap=maxlap, 

2655 accessor_id=accessor_id, 

2656 operator_params=operator_params, 

2657 channel_priorities=channel_priorities) 

2658 

2659 self.advance_accessor(accessor_id) 

2660 

2661 yield Batch( 

2662 tmin=wmin, 

2663 tmax=wmax, 

2664 i=iwin, 

2665 n=nwin, 

2666 igroup=igroup, 

2667 ngroups=ngroups, 

2668 traces=chopped) 

2669 

2670 finally: 

2671 self._n_choppers_active -= 1 

2672 if clear_accessor: 

2673 self.clear_accessor(accessor_id, 'waveform') 

2674 

2675 def _process_chopped( 

2676 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax): 

2677 

2678 chopped.sort(key=lambda a: a.full_id) 

2679 if degap: 

2680 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap) 

2681 

2682 if not want_incomplete: 

2683 chopped_weeded = [] 

2684 for tr in chopped: 

2685 emin = tr.tmin - tmin 

2686 emax = tr.tmax + tr.deltat - tmax 

2687 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): 

2688 chopped_weeded.append(tr) 

2689 

2690 elif degap: 

2691 if (0. < emin <= 5. * tr.deltat 

2692 and -5. * tr.deltat <= emax < 0.): 

2693 

2694 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') 

2695 chopped_weeded.append(tr) 

2696 

2697 chopped = chopped_weeded 

2698 

2699 return chopped 

2700 

2701 def _get_pyrocko_stations( 

2702 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2703 on_error='raise'): 

2704 

2705 from pyrocko import model as pmodel 

2706 

2707 if codes is not None: 

2708 codes = codes_patterns_for_kind(STATION, codes) 

2709 

2710 by_nsl = defaultdict(lambda: (list(), list())) 

2711 for station in self.get_stations(obj, tmin, tmax, time, codes): 

2712 sargs = station._get_pyrocko_station_args() 

2713 by_nsl[station.codes.nsl][0].append(sargs) 

2714 

2715 if codes is not None: 

2716 codes = [model.CodesNSLCE(c) for c in codes] 

2717 

2718 for channel in self.get_channels(obj, tmin, tmax, time, codes): 

2719 sargs = channel._get_pyrocko_station_args() 

2720 sargs_list, channels_list = by_nsl[channel.codes.nsl] 

2721 sargs_list.append(sargs) 

2722 channels_list.append(channel) 

2723 

2724 pstations = [] 

2725 nsls = list(by_nsl.keys()) 

2726 nsls.sort() 

2727 for nsl in nsls: 

2728 sargs_list, channels_list = by_nsl[nsl] 

2729 sargs = util.consistency_merge( 

2730 [('',) + x for x in sargs_list], 

2731 error=on_error) 

2732 

2733 by_c = defaultdict(list) 

2734 for ch in channels_list: 

2735 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args()) 

2736 

2737 chas = list(by_c.keys()) 

2738 chas.sort() 

2739 pchannels = [] 

2740 for cha in chas: 

2741 list_of_cargs = by_c[cha] 

2742 cargs = util.consistency_merge( 

2743 [('',) + x for x in list_of_cargs], 

2744 error=on_error) 

2745 pchannels.append(pmodel.Channel(*cargs)) 

2746 

2747 pstations.append( 

2748 pmodel.Station(*sargs, channels=pchannels)) 

2749 

2750 return pstations 

2751 

2752 @property 

2753 def pile(self): 

2754 

2755 ''' 

2756 Emulates the older :py:class:`pyrocko.pile.Pile` interface. 

2757 

2758 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, 

2759 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods 

2760 but uses the fluffy power of the Squirrel under the hood. 

2761 

2762 This interface can be used as a drop-in replacement for piles which are 

2763 used in existing scripts and programs for efficient waveform data 

2764 access. The Squirrel-based pile scales better for large datasets. Newer 

2765 scripts should use Squirrel's native methods to avoid the emulation 

2766 overhead. 

2767 ''' 

2768 from . import pile 

2769 

2770 if self._pile is None: 

2771 self._pile = pile.Pile(self) 

2772 

2773 return self._pile 

2774 

2775 def snuffle(self, **kwargs): 

2776 ''' 

2777 Look at dataset in Snuffler. 

2778 ''' 

2779 self.pile.snuffle(**kwargs) 

2780 

2781 def _gather_codes_keys(self, kind, gather, selector): 

2782 return set( 

2783 gather(codes) 

2784 for codes in self.iter_codes(kind) 

2785 if selector is None or selector(codes)) 

2786 

2787 def __str__(self): 

2788 return str(self.get_stats()) 

2789 

2790 def get_coverage( 

2791 self, kind, tmin=None, tmax=None, codes=None, limit=None): 

2792 

2793 ''' 

2794 Get coverage information. 

2795 

2796 Get information about strips of gapless data coverage. 

2797 

2798 :param kind: 

2799 Content kind to be queried. 

2800 :type kind: 

2801 str 

2802 

2803 :param tmin: 

2804 Start time of query interval. 

2805 :type tmin: 

2806 timestamp 

2807 

2808 :param tmax: 

2809 End time of query interval. 

2810 :type tmax: 

2811 timestamp 

2812 

2813 :param codes: 

2814 If given, restrict query to given content codes patterns. 

2815 :type codes: 

2816 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes` 

2817 objects appropriate for the queried content type, or anything which 

2818 can be converted to such objects. 

2819 

2820 :param limit: 

2821 Limit query to return only up to a given maximum number of entries 

2822 per matching time series (without setting this option, very gappy 

2823 data could cause the query to execute for a very long time). 

2824 :type limit: 

2825 int 

2826 

2827 :returns: 

2828 Information about time spans covered by the requested time series 

2829 data. 

2830 :rtype: 

2831 :py:class:`list` of :py:class:`Coverage` objects 

2832 ''' 

2833 

2834 tmin_seconds, tmin_offset = model.tsplit(tmin) 

2835 tmax_seconds, tmax_offset = model.tsplit(tmax) 

2836 kind_id = to_kind_id(kind) 

2837 

2838 codes_info = list(self._iter_codes_info(kind=kind)) 

2839 

2840 kdata_all = [] 

2841 if codes is None: 

2842 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2843 kdata_all.append( 

2844 (codes_entry, kind_codes_id, codes_entry, deltat)) 

2845 

2846 else: 

2847 for codes_entry in codes: 

2848 pattern = to_codes(kind_id, codes_entry) 

2849 for _, codes_entry, deltat, kind_codes_id, _ in codes_info: 

2850 if model.match_codes(pattern, codes_entry): 

2851 kdata_all.append( 

2852 (pattern, kind_codes_id, codes_entry, deltat)) 

2853 

2854 kind_codes_ids = [x[1] for x in kdata_all] 

2855 

2856 counts_at_tmin = {} 

2857 if tmin is not None: 

2858 for nut in self.iter_nuts( 

2859 kind, tmin, tmin, kind_codes_ids=kind_codes_ids): 

2860 

2861 k = nut.codes, nut.deltat 

2862 if k not in counts_at_tmin: 

2863 counts_at_tmin[k] = 0 

2864 

2865 counts_at_tmin[k] += 1 

2866 

2867 coverages = [] 

2868 for pattern, kind_codes_id, codes_entry, deltat in kdata_all: 

2869 entry = [pattern, codes_entry, deltat, None, None, []] 

2870 for i, order in [(0, 'ASC'), (1, 'DESC')]: 

2871 sql = self._sql(''' 

2872 SELECT 

2873 time_seconds, 

2874 time_offset 

2875 FROM %(db)s.%(coverage)s 

2876 WHERE 

2877 kind_codes_id == ? 

2878 ORDER BY 

2879 kind_codes_id ''' + order + ''', 

2880 time_seconds ''' + order + ''', 

2881 time_offset ''' + order + ''' 

2882 LIMIT 1 

2883 ''') 

2884 

2885 for row in self._conn.execute(sql, [kind_codes_id]): 

2886 entry[3+i] = model.tjoin(row[0], row[1]) 

2887 

2888 if None in entry[3:5]: 

2889 continue 

2890 

2891 args = [kind_codes_id] 

2892 

2893 sql_time = '' 

2894 if tmin is not None: 

2895 # intentionally < because (== tmin) is queried from nuts 

2896 sql_time += ' AND ( ? < time_seconds ' \ 

2897 'OR ( ? == time_seconds AND ? < time_offset ) ) ' 

2898 args.extend([tmin_seconds, tmin_seconds, tmin_offset]) 

2899 

2900 if tmax is not None: 

2901 sql_time += ' AND ( time_seconds < ? ' \ 

2902 'OR ( ? == time_seconds AND time_offset <= ? ) ) ' 

2903 args.extend([tmax_seconds, tmax_seconds, tmax_offset]) 

2904 

2905 sql_limit = '' 

2906 if limit is not None: 

2907 sql_limit = ' LIMIT ?' 

2908 args.append(limit) 

2909 

2910 sql = self._sql(''' 

2911 SELECT 

2912 time_seconds, 

2913 time_offset, 

2914 step 

2915 FROM %(db)s.%(coverage)s 

2916 WHERE 

2917 kind_codes_id == ? 

2918 ''' + sql_time + ''' 

2919 ORDER BY 

2920 kind_codes_id, 

2921 time_seconds, 

2922 time_offset 

2923 ''' + sql_limit) 

2924 

2925 rows = list(self._conn.execute(sql, args)) 

2926 

2927 if limit is not None and len(rows) == limit: 

2928 entry[-1] = None 

2929 else: 

2930 counts = counts_at_tmin.get((codes_entry, deltat), 0) 

2931 tlast = None 

2932 if tmin is not None: 

2933 entry[-1].append((tmin, counts)) 

2934 tlast = tmin 

2935 

2936 for row in rows: 

2937 t = model.tjoin(row[0], row[1]) 

2938 counts += row[2] 

2939 entry[-1].append((t, counts)) 

2940 tlast = t 

2941 

2942 if tmax is not None and (tlast is None or tlast != tmax): 

2943 entry[-1].append((tmax, counts)) 

2944 

2945 coverages.append(model.Coverage.from_values(entry + [kind_id])) 

2946 

2947 return coverages 

2948 

2949 def get_stationxml( 

2950 self, obj=None, tmin=None, tmax=None, time=None, codes=None, 

2951 level='response', on_error='raise'): 

2952 

2953 ''' 

2954 Get station/channel/response metadata in StationXML representation. 

2955 

2956 %(query_args)s 

2957 

2958 :returns: 

2959 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object. 

2960 ''' 

2961 

2962 if level not in ('network', 'station', 'channel', 'response'): 

2963 raise ValueError('Invalid level: %s' % level) 

2964 

2965 tmin, tmax, codes = self._get_selection_args( 

2966 CHANNEL, obj, tmin, tmax, time, codes) 

2967 

2968 def tts(t): 

2969 if t is None: 

2970 return '<none>' 

2971 else: 

2972 return util.tts(t, format='%Y-%m-%d %H:%M:%S') 

2973 

2974 if on_error == 'ignore': 

2975 def handle_error(exc): 

2976 pass 

2977 

2978 elif on_error == 'warn': 

2979 def handle_error(exc): 

2980 logger.warning(str(exc)) 

2981 

2982 elif on_error == 'raise': 

2983 def handle_error(exc): 

2984 raise exc 

2985 

2986 def use_first(node_type_name, codes, k, group): 

2987 if on_error == 'warn': 

2988 logger.warning( 

2989 'Duplicates for %s %s, %s - %s -> using first' % ( 

2990 node_type_name, 

2991 '.'.join(codes), 

2992 tts(k[0]), tts(k[1]))) 

2993 

2994 return group[0] 

2995 

2996 def deduplicate(node_type_name, codes, nodes): 

2997 groups = defaultdict(list) 

2998 for node in nodes: 

2999 k = (node.start_date, node.end_date) 

3000 groups[k].append(node) 

3001 

3002 return [ 

3003 use_first(node_type_name, codes, k, group) 

3004 for (k, group) in groups.items()] 

3005 

3006 filtering = CodesPatternFiltering(codes=codes) 

3007 

3008 nslcs = list(set( 

3009 codes.nslc for codes in 

3010 filtering.filter(self.get_codes(kind='channel')))) 

3011 

3012 from pyrocko.io import stationxml as sx 

3013 

3014 networks = [] 

3015 for net, stas in prefix_tree(nslcs): 

3016 network = sx.Network(code=net) 

3017 networks.append(network) 

3018 

3019 if level not in ('station', 'channel', 'response'): 

3020 continue 

3021 

3022 for sta, locs in stas: 

3023 stations = self.get_stations( 

3024 tmin=tmin, 

3025 tmax=tmax, 

3026 codes=(net, sta, '*'), 

3027 model='stationxml') 

3028 

3029 if on_error != 'raise': 

3030 stations = deduplicate( 

3031 'Station', (net, sta), stations) 

3032 

3033 errors = sx.check_overlaps( 

3034 'Station', (net, sta), stations) 

3035 

3036 if errors: 

3037 handle_error(error.Duplicate( 

3038 'Overlapping/duplicate station info:\n %s' 

3039 % '\n '.join(errors))) 

3040 

3041 network.station_list.extend(stations) 

3042 

3043 if level not in ('channel', 'response'): 

3044 continue 

3045 

3046 for loc, chas in locs: 

3047 for cha, _ in chas: 

3048 channels = self.get_channels( 

3049 tmin=tmin, 

3050 tmax=tmax, 

3051 codes=(net, sta, loc, cha), 

3052 model='stationxml') 

3053 

3054 if on_error != 'raise': 

3055 channels = deduplicate( 

3056 'Channel', (net, sta, loc, cha), channels) 

3057 

3058 errors = sx.check_overlaps( 

3059 'Channel', (net, sta, loc, cha), channels) 

3060 

3061 if errors: 

3062 handle_error(error.Duplicate( 

3063 'Overlapping/duplicate channel info:\n %s' 

3064 % '\n '.join(errors))) 

3065 

3066 for channel in channels: 

3067 station = sx.find_containing(stations, channel) 

3068 if station is not None: 

3069 station.channel_list.append(channel) 

3070 else: 

3071 handle_error(error.NotAvailable( 

3072 'No station or station epoch found ' 

3073 'for channel: %s' % '.'.join( 

3074 (net, sta, loc, cha)))) 

3075 

3076 continue 

3077 

3078 if level != 'response': 

3079 continue 

3080 

3081 try: 

3082 response_sq, response_sx = self.get_response( 

3083 codes=(net, sta, loc, cha), 

3084 tmin=channel.start_date, 

3085 tmax=channel.end_date, 

3086 model='stationxml+', 

3087 on_duplicate=on_error) 

3088 

3089 except error.NotAvailable as e: 

3090 handle_error(e) 

3091 continue 

3092 

3093 if not ( 

3094 sx.eq_open( 

3095 channel.start_date, response_sq.tmin) 

3096 and sx.eq_open( 

3097 channel.end_date, response_sq.tmax)): 

3098 

3099 handle_error(error.Inconsistencies( 

3100 'Response time span does not match ' 

3101 'channel time span: %s' % '.'.join( 

3102 (net, sta, loc, cha)))) 

3103 

3104 channel.response = response_sx 

3105 

3106 return sx.FDSNStationXML( 

3107 source='Generated by Pyrocko Squirrel.', 

3108 network_list=networks) 

3109 

3110 def add_operator(self, op): 

3111 self._operators.append(op) 

3112 

3113 def update_operator_mappings(self): 

3114 available = self.get_codes(kind=('channel')) 

3115 

3116 for operator in self._operators: 

3117 operator.update_mappings(available, self._operator_registry) 

3118 

3119 def iter_operator_mappings(self): 

3120 for operator in self._operators: 

3121 for in_codes, out_codes in operator.iter_mappings(): 

3122 yield operator, in_codes, out_codes 

3123 

3124 def get_operator_mappings(self): 

3125 return list(self.iter_operator_mappings()) 

3126 

3127 def get_operator(self, codes): 

3128 try: 

3129 return self._operator_registry[codes][0] 

3130 except KeyError: 

3131 return None 

3132 

3133 def get_operator_group(self, codes): 

3134 try: 

3135 return self._operator_registry[codes] 

3136 except KeyError: 

3137 return None, (None, None, None) 

3138 

3139 def iter_operator_codes(self): 

3140 for _, _, out_codes in self.iter_operator_mappings(): 

3141 for codes in out_codes: 

3142 yield codes 

3143 

3144 def get_operator_codes(self): 

3145 return list(self.iter_operator_codes()) 

3146 

3147 def print_tables(self, table_names=None, stream=None): 

3148 ''' 

3149 Dump raw database tables in textual form (for debugging purposes). 

3150 

3151 :param table_names: 

3152 Names of tables to be dumped or ``None`` to dump all. 

3153 :type table_names: 

3154 :py:class:`list` of :py:class:`str` 

3155 

3156 :param stream: 

3157 Open file or ``None`` to dump to standard output. 

3158 ''' 

3159 

3160 if stream is None: 

3161 stream = sys.stdout 

3162 

3163 if isinstance(table_names, str): 

3164 table_names = [table_names] 

3165 

3166 if table_names is None: 

3167 table_names = [ 

3168 'selection_file_states', 

3169 'selection_nuts', 

3170 'selection_kind_codes_count', 

3171 'files', 'nuts', 'kind_codes', 'kind_codes_count'] 

3172 

3173 m = { 

3174 'selection_file_states': '%(db)s.%(file_states)s', 

3175 'selection_nuts': '%(db)s.%(nuts)s', 

3176 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 

3177 'files': 'files', 

3178 'nuts': 'nuts', 

3179 'kind_codes': 'kind_codes', 

3180 'kind_codes_count': 'kind_codes_count'} 

3181 

3182 for table_name in table_names: 

3183 self._database.print_table( 

3184 m[table_name] % self._names, stream=stream) 

3185 

3186 

3187class SquirrelStats(Object): 

3188 ''' 

3189 Container to hold statistics about contents available from a Squirrel. 

3190 

3191 See also :py:meth:`Squirrel.get_stats`. 

3192 ''' 

3193 

3194 nfiles = Int.T( 

3195 help='Number of files in selection.') 

3196 nnuts = Int.T( 

3197 help='Number of index nuts in selection.') 

3198 codes = List.T( 

3199 Tuple.T(content_t=String.T()), 

3200 help='Available code sequences in selection, e.g. ' 

3201 '(agency, network, station, location) for stations nuts.') 

3202 kinds = List.T( 

3203 String.T(), 

3204 help='Available content types in selection.') 

3205 total_size = Int.T( 

3206 help='Aggregated file size of files is selection.') 

3207 counts = Dict.T( 

3208 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), 

3209 help='Breakdown of how many nuts of any content type and code ' 

3210 'sequence are available in selection, ``counts[kind][codes]``.') 

3211 time_spans = Dict.T( 

3212 String.T(), Tuple.T(content_t=Timestamp.T()), 

3213 help='Time spans by content type.') 

3214 sources = List.T( 

3215 String.T(), 

3216 help='Descriptions of attached sources.') 

3217 operators = List.T( 

3218 String.T(), 

3219 help='Descriptions of attached operators.') 

3220 

3221 def __str__(self): 

3222 kind_counts = dict( 

3223 (kind, sum(self.counts[kind].values())) for kind in self.kinds) 

3224 

3225 scodes = model.codes_to_str_abbreviated(self.codes) 

3226 

3227 ssources = '<none>' if not self.sources else '\n' + '\n'.join( 

3228 ' ' + s for s in self.sources) 

3229 

3230 soperators = '<none>' if not self.operators else '\n' + '\n'.join( 

3231 ' ' + s for s in self.operators) 

3232 

3233 def stime(t): 

3234 return util.tts(t) if t is not None and t not in ( 

3235 model.g_tmin, model.g_tmax) else '<none>' 

3236 

3237 def stable(rows): 

3238 ns = [max(len(w) for w in col) for col in zip(*rows)] 

3239 return '\n'.join( 

3240 ' '.join(w.ljust(n) for n, w in zip(ns, row)) 

3241 for row in rows) 

3242 

3243 def indent(s): 

3244 return '\n'.join(' '+line for line in s.splitlines()) 

3245 

3246 stspans = '<none>' if not self.kinds else '\n' + indent(stable([( 

3247 kind + ':', 

3248 str(kind_counts[kind]), 

3249 stime(self.time_spans[kind][0]), 

3250 '-', 

3251 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)])) 

3252 

3253 s = ''' 

3254Number of files: %i 

3255Total size of known files: %s 

3256Number of index nuts: %i 

3257Available content kinds: %s 

3258Available codes: %s 

3259Sources: %s 

3260Operators: %s''' % ( 

3261 self.nfiles, 

3262 util.human_bytesize(self.total_size), 

3263 self.nnuts, 

3264 stspans, scodes, ssources, soperators) 

3265 

3266 return s.lstrip() 

3267 

3268 

3269__all__ = [ 

3270 'Squirrel', 

3271 'SquirrelStats', 

3272]