Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%

1# http://pyrocko.org - GPLv3

3# The Pyrocko Developers, 21st Century

4# ---|P------/S----------~Lg----------

6'''

7Squirrel main classes.

8'''

10import sys

11import os

12import time

13import math

14import logging

15import threading

16import queue

17from collections import defaultdict

19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict

20from pyrocko import util, trace

21from pyrocko import progress

22from pyrocko.plot import nice_time_tick_inc_approx_secs

24from . import model, io, cache, dataset

26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \

27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \

28 codes_patterns_for_kind

29from .client import fdsn, catalog

30from .selection import Selection, filldocs

31from .database import abspath

32from .operators.base import Operator, CodesPatternFiltering

33from . import client, environment, error

35logger = logging.getLogger('psq.base')

37guts_prefix = 'squirrel'

40def nonef(f, xs):

41 xs_ = [x for x in xs if x is not None]

42 if xs_:

43 return f(xs_)

44 else:

45 return None

48def make_task(*args):

49 return progress.task(*args, logger=logger)

52def lpick(condition, seq):

53 ft = [], []

54 for ele in seq:

55 ft[int(bool(condition(ele)))].append(ele)

57 return ft

60def len_plural(obj):

61 return len(obj), '' if len(obj) == 1 else 's'

64def blocks(tmin, tmax, deltat, nsamples_block=100000):

65 tblock = nice_time_tick_inc_approx_secs(

66 util.to_time_float(deltat * nsamples_block))

67 iblock_min = int(math.floor(tmin / tblock))

68 iblock_max = int(math.ceil(tmax / tblock))

69 for iblock in range(iblock_min, iblock_max):

70 yield iblock * tblock, (iblock+1) * tblock

73def gaps(avail, tmin, tmax):

74 assert tmin < tmax

76 data = [(tmax, 1), (tmin, -1)]

77 for (tmin_a, tmax_a) in avail:

78 assert tmin_a < tmax_a

79 data.append((tmin_a, 1))

80 data.append((tmax_a, -1))

82 data.sort()

83 s = 1

84 gaps = []

85 tmin_g = None

86 for t, x in data:

87 if s == 1 and x == -1:

88 tmin_g = t

89 elif s == 0 and x == 1 and tmin_g is not None:

90 tmax_g = t

91 if tmin_g != tmax_g:

92 gaps.append((tmin_g, tmax_g))

94 s += x

96 return gaps

99def order_key(order):

100 return (order.codes, order.tmin, order.tmax)

101

102

103def _is_exact(pat):

104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)

105

106

107def prefix_tree(tups):

108 if not tups:

109 return []

110

111 if len(tups[0]) == 1:

112 return sorted((tup[0], []) for tup in tups)

113

114 d = defaultdict(list)

115 for tup in tups:

116 d[tup[0]].append(tup[1:])

117

118 sub = []

119 for k in sorted(d.keys()):

120 sub.append((k, prefix_tree(d[k])))

121

122 return sub

123

124

125def match_time_span(tmin, tmax, obj):

126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \

127 and (tmin is None or obj.tmax is None or tmin < obj.tmax)

128

129

130class Batch(object):

131 '''

132 Batch of waveforms from window-wise data extraction.

133

134 Encapsulates state and results yielded for each window in window-wise

135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.

136

137 *Attributes:*

138

139 .. py:attribute:: tmin

140

141 Start of this time window.

142

143 .. py:attribute:: tmax

144

145 End of this time window.

146

147 .. py:attribute:: tpad

148

149 Padding time length.

150

151 .. py:attribute:: i

152

153 Index of this time window in sequence.

154

155 .. py:attribute:: n

156

157 Total number of time windows in sequence.

158

159 .. py:attribute:: igroup

160

161 Index of this time window's sequence group.

162

163 .. py:attribute:: ngroups

164

165 Total number of sequence groups.

166

167 .. py:attribute:: traces

168

169 Extracted waveforms for this time window.

170 '''

171

172 def __init__(self, tmin, tmax, tpad, i, n, igroup, ngroups, traces):

173 self.tmin = tmin

174 self.tmax = tmax

175 self.tpad = tpad

176 self.i = i

177 self.n = n

178 self.igroup = igroup

179 self.ngroups = ngroups

180 self.traces = traces

181

182 def as_multitrace(self):

183 from pyrocko import multitrace

184

185 data, codes, tmin, deltat = trace.merge_traces_data_as_array(

186 self.traces, tmin=self.tmin-self.tpad, tmax=self.tmax+self.tpad)

187

188 return multitrace.MultiTrace(

189 data=data,

190 codes=codes,

191 tmin=tmin,

192 deltat=deltat)

193

194

195class Squirrel(Selection):

196 '''

197 Prompt, lazy, indexing, caching, dynamic seismological dataset access.

198

199 :param env:

200 Squirrel environment instance or directory path to use as starting

201 point for its detection. By default, the current directory is used as

202 starting point. When searching for a usable environment the directory

203 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)

204 directory is used if it exists, otherwise the parent directories are

205 search upwards for the existence of such a directory. If no such

206 directory is found, the user's global Squirrel environment

207 ``'$HOME/.pyrocko/squirrel'`` is used.

208 :type env:

209 :py:class:`~pyrocko.squirrel.environment.Environment` or

210 :py:class:`str`

211

212 :param database:

213 Database instance or path to database. By default the

214 database found in the detected Squirrel environment is used.

215 :type database:

216 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`

217

218 :param cache_path:

219 Directory path to use for data caching. By default, the ``'cache'``

220 directory in the detected Squirrel environment is used.

221 :type cache_path:

222 :py:class:`str`

223

224 :param persistent:

225 If given a name, create a persistent selection.

226 :type persistent:

227 :py:class:`str`

228

229 This is the central class of the Squirrel framework. It provides a unified

230 interface to query and access seismic waveforms, station meta-data and

231 event information from local file collections and remote data sources. For

232 prompt responses, a profound database setup is used under the hood. To

233 speed up assemblage of ad-hoc data selections, files are indexed on first

234 use and the extracted meta-data is remembered in the database for

235 subsequent accesses. Bulk data is lazily loaded from disk and remote

236 sources, just when requested. Once loaded, data is cached in memory to

237 expedite typical access patterns. Files and data sources can be dynamically

238 added to and removed from the Squirrel selection at runtime.

239

240 Queries are restricted to the contents of the files currently added to the

241 Squirrel selection (usually a subset of the file meta-information

242 collection in the database). This list of files is referred to here as the

243 "selection". By default, temporary tables are created in the attached

244 database to hold the names of the files in the selection as well as various

245 indices and counters. These tables are only visible inside the application

246 which created them and are deleted when the database connection is closed

247 or the application exits. To create a selection which is not deleted at

248 exit, supply a name to the ``persistent`` argument of the Squirrel

249 constructor. Persistent selections are shared among applications using the

250 same database.

251

252 **Method summary**

253

254 Some of the methods are implemented in :py:class:`Squirrel`'s base class

255 :py:class:`~pyrocko.squirrel.selection.Selection`.

256

257 .. autosummary::

258

259 ~Squirrel.add

260 ~Squirrel.add_source

261 ~Squirrel.add_fdsn

262 ~Squirrel.add_catalog

263 ~Squirrel.add_dataset

264 ~Squirrel.add_virtual

265 ~Squirrel.update

266 ~Squirrel.update_waveform_promises

267 ~Squirrel.advance_accessor

268 ~Squirrel.clear_accessor

269 ~Squirrel.reload

270 ~pyrocko.squirrel.selection.Selection.iter_paths

271 ~Squirrel.iter_nuts

272 ~Squirrel.iter_kinds

273 ~Squirrel.iter_deltats

274 ~Squirrel.iter_codes

275 ~pyrocko.squirrel.selection.Selection.get_paths

276 ~Squirrel.get_nuts

277 ~Squirrel.get_kinds

278 ~Squirrel.get_deltats

279 ~Squirrel.get_codes

280 ~Squirrel.get_counts

281 ~Squirrel.get_time_span

282 ~Squirrel.get_deltat_span

283 ~Squirrel.get_nfiles

284 ~Squirrel.get_nnuts

285 ~Squirrel.get_total_size

286 ~Squirrel.get_stats

287 ~Squirrel.get_content

288 ~Squirrel.get_stations

289 ~Squirrel.get_channels

290 ~Squirrel.get_responses

291 ~Squirrel.get_events

292 ~Squirrel.get_waveform_nuts

293 ~Squirrel.get_waveforms

294 ~Squirrel.chopper_waveforms

295 ~Squirrel.get_coverage

296 ~Squirrel.pile

297 ~Squirrel.snuffle

298 ~Squirrel.glob_codes

299 ~pyrocko.squirrel.selection.Selection.get_database

300 ~Squirrel.print_tables

301 '''

302

303 def __init__(

304 self, env=None, database=None, cache_path=None, persistent=None):

305

306 if not isinstance(env, environment.Environment):

307 env = environment.get_environment(env)

308

309 if database is None:

310 database = env.expand_path(env.database_path)

311

312 if cache_path is None:

313 cache_path = env.expand_path(env.cache_path)

314

315 if persistent is None:

316 persistent = env.persistent

317

318 Selection.__init__(

319 self, database=database, persistent=persistent)

320

321 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))

322

323 self._content_caches = {

324 'waveform': cache.ContentCache(),

325 'default': cache.ContentCache()}

326

327 self._cache_path = cache_path

328

329 self._sources = []

330 self._operators = []

331 self._operator_registry = {}

332

333 self._pending_orders = []

334

335 self._pile = None

336 self._n_choppers_active = 0

337

338 self.downloads_enabled = True

339

340 self._names.update({

341 'nuts': self.name + '_nuts',

342 'kind_codes_count': self.name + '_kind_codes_count',

343 'coverage': self.name + '_coverage'})

344

345 with self.transaction('create tables') as cursor:

346 self._create_tables_squirrel(cursor)

347

348 def _create_tables_squirrel(self, cursor):

349

350 cursor.execute(self._register_table(self._sql(

351 '''

352 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (

353 nut_id integer PRIMARY KEY,

354 file_id integer,

355 file_segment integer,

356 file_element integer,

357 kind_id integer,

358 kind_codes_id integer,

359 tmin_seconds integer,

360 tmin_offset integer,

361 tmax_seconds integer,

362 tmax_offset integer,

363 kscale integer)

364 ''')))

365

366 cursor.execute(self._register_table(self._sql(

367 '''

368 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (

369 kind_codes_id integer PRIMARY KEY,

370 count integer)

371 ''')))

372

373 cursor.execute(self._sql(

374 '''

375 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element

376 ON %(nuts)s (file_id, file_segment, file_element)

377 '''))

378

379 cursor.execute(self._sql(

380 '''

381 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id

382 ON %(nuts)s (file_id)

383 '''))

384

385 cursor.execute(self._sql(

386 '''

387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds

388 ON %(nuts)s (kind_id, tmin_seconds)

389 '''))

390

391 cursor.execute(self._sql(

392 '''

393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds

394 ON %(nuts)s (kind_id, tmax_seconds)

395 '''))

396

397 cursor.execute(self._sql(

398 '''

399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale

400 ON %(nuts)s (kind_id, kscale, tmin_seconds)

401 '''))

402

403 cursor.execute(self._sql(

404 '''

405 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts

406 BEFORE DELETE ON main.files FOR EACH ROW

407 BEGIN

408 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

409 END

410 '''))

411

412 # trigger only on size to make silent update of mtime possible

413 cursor.execute(self._sql(

414 '''

415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2

416 BEFORE UPDATE OF size ON main.files FOR EACH ROW

417 BEGIN

418 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

419 END

420 '''))

421

422 cursor.execute(self._sql(

423 '''

424 CREATE TRIGGER IF NOT EXISTS

425 %(db)s.%(file_states)s_delete_files

426 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW

427 BEGIN

428 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

429 END

430 '''))

431

432 cursor.execute(self._sql(

433 '''

434 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes

435 BEFORE INSERT ON %(nuts)s FOR EACH ROW

436 BEGIN

437 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES

438 (new.kind_codes_id, 0);

439 UPDATE %(kind_codes_count)s

440 SET count = count + 1

441 WHERE new.kind_codes_id

442 == %(kind_codes_count)s.kind_codes_id;

443 END

444 '''))

445

446 cursor.execute(self._sql(

447 '''

448 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes

449 BEFORE DELETE ON %(nuts)s FOR EACH ROW

450 BEGIN

451 UPDATE %(kind_codes_count)s

452 SET count = count - 1

453 WHERE old.kind_codes_id

454 == %(kind_codes_count)s.kind_codes_id;

455 END

456 '''))

457

458 cursor.execute(self._register_table(self._sql(

459 '''

460 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (

461 kind_codes_id integer,

462 time_seconds integer,

463 time_offset integer,

464 step integer)

465 ''')))

466

467 cursor.execute(self._sql(

468 '''

469 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time

470 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)

471 '''))

472

473 cursor.execute(self._sql(

474 '''

475 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage

476 AFTER INSERT ON %(nuts)s FOR EACH ROW

477 BEGIN

478 INSERT OR IGNORE INTO %(coverage)s VALUES

479 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)

480 ;

481 UPDATE %(coverage)s

482 SET step = step + 1

483 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

484 AND new.tmin_seconds == %(coverage)s.time_seconds

485 AND new.tmin_offset == %(coverage)s.time_offset

486 ;

487 INSERT OR IGNORE INTO %(coverage)s VALUES

488 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)

489 ;

490 UPDATE %(coverage)s

491 SET step = step - 1

492 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

493 AND new.tmax_seconds == %(coverage)s.time_seconds

494 AND new.tmax_offset == %(coverage)s.time_offset

495 ;

496 DELETE FROM %(coverage)s

497 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

498 AND new.tmin_seconds == %(coverage)s.time_seconds

499 AND new.tmin_offset == %(coverage)s.time_offset

500 AND step == 0

501 ;

502 DELETE FROM %(coverage)s

503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

504 AND new.tmax_seconds == %(coverage)s.time_seconds

505 AND new.tmax_offset == %(coverage)s.time_offset

506 AND step == 0

507 ;

508 END

509 '''))

510

511 cursor.execute(self._sql(

512 '''

513 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage

514 BEFORE DELETE ON %(nuts)s FOR EACH ROW

515 BEGIN

516 INSERT OR IGNORE INTO %(coverage)s VALUES

517 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)

518 ;

519 UPDATE %(coverage)s

520 SET step = step - 1

521 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

522 AND old.tmin_seconds == %(coverage)s.time_seconds

523 AND old.tmin_offset == %(coverage)s.time_offset

524 ;

525 INSERT OR IGNORE INTO %(coverage)s VALUES

526 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)

527 ;

528 UPDATE %(coverage)s

529 SET step = step + 1

530 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

531 AND old.tmax_seconds == %(coverage)s.time_seconds

532 AND old.tmax_offset == %(coverage)s.time_offset

533 ;

534 DELETE FROM %(coverage)s

535 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

536 AND old.tmin_seconds == %(coverage)s.time_seconds

537 AND old.tmin_offset == %(coverage)s.time_offset

538 AND step == 0

539 ;

540 DELETE FROM %(coverage)s

541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

542 AND old.tmax_seconds == %(coverage)s.time_seconds

543 AND old.tmax_offset == %(coverage)s.time_offset

544 AND step == 0

545 ;

546 END

547 '''))

548

549 def _delete(self):

550 '''Delete database tables associated with this Squirrel.'''

551

552 with self.transaction('delete tables') as cursor:

553 for s in '''

554 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;

555 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;

556 DROP TRIGGER %(db)s.%(file_states)s_delete_files;

557 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;

558 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;

559 DROP TABLE %(db)s.%(nuts)s;

560 DROP TABLE %(db)s.%(kind_codes_count)s;

561 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;

562 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;

563 DROP TABLE IF EXISTS %(db)s.%(coverage)s;

564 '''.strip().splitlines():

565

566 cursor.execute(self._sql(s))

567

568 Selection._delete(self)

569

570 @filldocs

571 def add(self,

572 paths,

573 kinds=None,

574 format='detect',

575 include=None,

576 exclude=None,

577 check=True):

578

579 '''

580 Add files to the selection.

581

582 :param paths:

583 Iterator yielding paths to files or directories to be added to the

584 selection. Recurses into directories. If given a ``str``, it

585 is treated as a single path to be added.

586 :type paths:

587 :py:class:`list` of :py:class:`str`

588

589 :param kinds:

590 Content types to be made available through the Squirrel selection.

591 By default, all known content types are accepted.

592 :type kinds:

593 :py:class:`list` of :py:class:`str`

594

595 :param format:

596 File format identifier or ``'detect'`` to enable auto-detection

597 (available: %(file_formats)s).

598 :type format:

599 str

600

601 :param include:

602 If not ``None``, files are only included if their paths match the

603 given regular expression pattern.

604 :type format:

605 str

606

607 :param exclude:

608 If not ``None``, files are only included if their paths do not

609 match the given regular expression pattern.

610 :type format:

611 str

612

613 :param check:

614 If ``True``, all file modification times are checked to see if

615 cached information has to be updated (slow). If ``False``, only

616 previously unknown files are indexed and cached information is used

617 for known files, regardless of file state (fast, corrresponds to

618 Squirrel's ``--optimistic`` mode). File deletions will go

619 undetected in the latter case.

620 :type check:

621 bool

622

623 :Complexity:

624 O(log N)

625 '''

626

627 if isinstance(kinds, str):

628 kinds = (kinds,)

629

630 if isinstance(paths, str):

631 paths = [paths]

632

633 kind_mask = model.to_kind_mask(kinds)

634

635 Selection.add(

636 self, util.iter_select_files(

637 paths,

638 show_progress=False,

639 include=include,

640 exclude=exclude,

641 pass_through=lambda path: path.startswith('virtual:')

642 ), kind_mask, format)

643

644 self._load(check)

645 self._update_nuts()

646

647 def reload(self):

648 '''

649 Check for modifications and reindex modified files.

650

651 Based on file modification times.

652 '''

653

654 self._set_file_states_force_check()

655 self._load(check=True)

656 self._update_nuts()

657

658 def add_virtual(self, nuts, virtual_paths=None):

659 '''

660 Add content which is not backed by files.

661

662 :param nuts:

663 Content pieces to be added.

664 :type nuts:

665 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects

666

667 :param virtual_paths:

668 List of virtual paths to prevent creating a temporary list of the

669 nuts while aggregating the file paths for the selection.

670 :type virtual_paths:

671 :py:class:`list` of :py:class:`str`

672

673 Stores to the main database and the selection.

674 '''

675

676 if isinstance(virtual_paths, str):

677 virtual_paths = [virtual_paths]

678

679 if virtual_paths is None:

680 if not isinstance(nuts, list):

681 nuts = list(nuts)

682 virtual_paths = set(nut.file_path for nut in nuts)

683

684 Selection.add(self, virtual_paths)

685 self.get_database().dig(nuts)

686 self._update_nuts()

687

688 def add_volatile(self, nuts):

689 if not isinstance(nuts, list):

690 nuts = list(nuts)

691

692 paths = list(set(nut.file_path for nut in nuts))

693 io.backends.virtual.add_nuts(nuts)

694 self.add_virtual(nuts, paths)

695 self._volatile_paths.extend(paths)

696

697 def add_volatile_waveforms(self, traces):

698 '''

699 Add in-memory waveforms which will be removed when the app closes.

700 '''

701

702 name = model.random_name()

703

704 path = 'virtual:volatile:%s' % name

705

706 nuts = []

707 for itr, tr in enumerate(traces):

708 assert tr.tmin <= tr.tmax

709 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)

710 tmax_seconds, tmax_offset = model.tsplit(

711 tr.tmin + tr.data_len()*tr.deltat)

712

713 nuts.append(model.Nut(

714 file_path=path,

715 file_format='virtual',

716 file_segment=itr,

717 file_element=0,

718 file_mtime=0,

719 codes=tr.codes,

720 tmin_seconds=tmin_seconds,

721 tmin_offset=tmin_offset,

722 tmax_seconds=tmax_seconds,

723 tmax_offset=tmax_offset,

724 deltat=tr.deltat,

725 kind_id=to_kind_id('waveform'),

726 content=tr))

727

728 self.add_volatile(nuts)

729 return path

730

731 def _load(self, check):

732 for _ in io.iload(

733 self,

734 content=[],

735 skip_unchanged=True,

736 check=check):

737 pass

738

739 def _update_nuts(self, transaction=None):

740 transaction = transaction or self.transaction('update nuts')

741 with make_task('Aggregating selection') as task, \

742 transaction as cursor:

743

744 self._conn.set_progress_handler(task.update, 100000)

745 nrows = cursor.execute(self._sql(

746 '''

747 INSERT INTO %(db)s.%(nuts)s

748 SELECT NULL,

749 nuts.file_id, nuts.file_segment, nuts.file_element,

750 nuts.kind_id, nuts.kind_codes_id,

751 nuts.tmin_seconds, nuts.tmin_offset,

752 nuts.tmax_seconds, nuts.tmax_offset,

753 nuts.kscale

754 FROM %(db)s.%(file_states)s

755 INNER JOIN nuts

756 ON %(db)s.%(file_states)s.file_id == nuts.file_id

757 INNER JOIN kind_codes

758 ON nuts.kind_codes_id ==

759 kind_codes.kind_codes_id

760 WHERE %(db)s.%(file_states)s.file_state != 2

761 AND (((1 << kind_codes.kind_id)

762 & %(db)s.%(file_states)s.kind_mask) != 0)

763 ''')).rowcount

764

765 task.update(nrows)

766 self._set_file_states_known(transaction)

767 self._conn.set_progress_handler(None, 0)

768

769 def add_source(self, source, check=True):

770 '''

771 Add remote resource.

772

773 :param source:

774 Remote data access client instance.

775 :type source:

776 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`

777 '''

778

779 self._sources.append(source)

780 source.setup(self, check=check)

781

782 def add_fdsn(self, *args, **kwargs):

783 '''

784 Add FDSN site for transparent remote data access.

785

786 Arguments are passed to

787 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.

788 '''

789

790 self.add_source(fdsn.FDSNSource(*args, **kwargs))

791

792 def add_catalog(self, *args, **kwargs):

793 '''

794 Add online catalog for transparent event data access.

795

796 Arguments are passed to

797 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.

798 '''

799

800 self.add_source(catalog.CatalogSource(*args, **kwargs))

801

802 def add_dataset(self, ds, check=True):

803 '''

804 Read dataset description from file and add its contents.

805

806 :param ds:

807 Path to dataset description file, dataset description object

808 or name of a built-in dataset. See

809 :py:mod:`~pyrocko.squirrel.dataset`.

810 :type ds:

811 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`

812

813 :param check:

814 If ``True``, all file modification times are checked to see if

815 cached information has to be updated (slow). If ``False``, only

816 previously unknown files are indexed and cached information is used

817 for known files, regardless of file state (fast, corrresponds to

818 Squirrel's ``--optimistic`` mode). File deletions will go

819 undetected in the latter case.

820 :type check:

821 bool

822 '''

823 if isinstance(ds, str):

824 ds = dataset.read_dataset(ds)

825

826 ds.setup(self, check=check)

827

828 def _get_selection_args(

829 self, kind_id,

830 obj=None, tmin=None, tmax=None, time=None, codes=None):

831

832 if codes is not None:

833 codes = codes_patterns_for_kind(kind_id, codes)

834

835 if time is not None:

836 tmin = time

837 tmax = time

838

839 if obj is not None:

840 tmin = tmin if tmin is not None else obj.tmin

841 tmax = tmax if tmax is not None else obj.tmax

842 codes = codes if codes is not None else codes_patterns_for_kind(

843 kind_id, obj.codes)

844

845 return tmin, tmax, codes

846

847 def _get_selection_args_str(self, *args, **kwargs):

848

849 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)

850 return 'tmin: %s, tmax: %s, codes: %s' % (

851 util.time_to_str(tmin) if tmin is not None else 'none',

852 util.time_to_str(tmax) if tmax is not None else 'none',

853 ','.join(str(entry) for entry in codes))

854

855 def _selection_args_to_kwargs(

856 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

857

858 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)

859

860 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):

861

862 tmin_seconds, tmin_offset = model.tsplit(tmin)

863 tmax_seconds, tmax_offset = model.tsplit(tmax)

864 if naiv:

865 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')

866 args.append(tmax_seconds)

867 else:

868 tscale_edges = model.tscale_edges

869 tmin_cond = []

870 for kscale in range(tscale_edges.size + 1):

871 if kscale != tscale_edges.size:

872 tscale = int(tscale_edges[kscale])

873 tmin_cond.append('''

874 (%(db)s.%(nuts)s.kind_id = ?

875 AND %(db)s.%(nuts)s.kscale == ?

876 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)

877 ''')

878 args.extend(

879 (to_kind_id(kind), kscale,

880 tmin_seconds - tscale - 1, tmax_seconds + 1))

881

882 else:

883 tmin_cond.append('''

884 (%(db)s.%(nuts)s.kind_id == ?

885 AND %(db)s.%(nuts)s.kscale == ?

886 AND %(db)s.%(nuts)s.tmin_seconds <= ?)

887 ''')

888

889 args.extend(

890 (to_kind_id(kind), kscale, tmax_seconds + 1))

891 if tmin_cond:

892 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')

893

894 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')

895 args.append(tmin_seconds)

896

897 def _codes_match_sql(self, positive, kind_id, codes, cond, args):

898 pats = codes_patterns_for_kind(kind_id, codes)

899 if pats is None:

900 return

901

902 pats_exact = []

903 pats_nonexact = []

904 for pat in pats:

905 spat = pat.safe_str

906 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)

907

908 codes_cond = []

909 if pats_exact:

910 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(

911 '?'*len(pats_exact)))

912

913 args.extend(pats_exact)

914

915 if pats_nonexact:

916 codes_cond.append(' ( %s ) ' % ' OR '.join(

917 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)))

918

919 args.extend(pats_nonexact)

920

921 if codes_cond:

922 cond.append('%s ( %s )' % (

923 'NOT' if not positive else '',

924 ' OR '.join(codes_cond)))

925

926 def iter_nuts(

927 self, kind=None, tmin=None, tmax=None, codes=None,

928 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,

929 naiv=False, kind_codes_ids=None, path=None, limit=None):

930

931 '''

932 Iterate over content entities matching given constraints.

933

934 :param kind:

935 Content kind (or kinds) to extract.

936 :type kind:

937 :py:class:`str`, :py:class:`list` of :py:class:`str`

938

939 :param tmin:

940 Start time of query interval.

941 :type tmin:

942 :py:func:`~pyrocko.util.get_time_float`

943

944 :param tmax:

945 End time of query interval.

946 :type tmax:

947 :py:func:`~pyrocko.util.get_time_float`

948

949 :param codes:

950 List of code patterns to query.

951 :type codes:

952 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`

953 objects appropriate for the queried content type, or anything which

954 can be converted to such objects.

955

956 :param naiv:

957 Bypass time span lookup through indices (slow, for testing).

958 :type naiv:

959 :py:class:`bool`

960

961 :param kind_codes_ids:

962 Kind-codes IDs of contents to be retrieved (internal use).

963 :type kind_codes_ids:

964 :py:class:`list` of :py:class:`int`

965

966 :yields:

967 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the

968 intersecting content.

969

970 :complexity:

971 O(log N) for the time selection part due to heavy use of database

972 indices.

973

974 Query time span is treated as a half-open interval ``[tmin, tmax)``.

975 However, if ``tmin`` equals ``tmax``, the edge logics are modified to

976 closed-interval so that content intersecting with the time instant ``t

977 = tmin = tmax`` is returned (otherwise nothing would be returned as

978 ``[t, t)`` never matches anything).

979

980 Time spans of content entities to be matched are also treated as half

981 open intervals, e.g. content span ``[0, 1)`` is matched by query span

982 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are

983 modified to closed-interval when the content time span is an empty

984 interval, i.e. to indicate a time instant. E.g. time instant 0 is

985 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.

986 '''

987

988 if not isinstance(kind, str):

989 if kind is None:

990 kind = model.g_content_kinds

991 for kind_ in kind:

992 for nut in self.iter_nuts(kind_, tmin, tmax, codes):

993 yield nut

994

995 return

996

997 kind_id = to_kind_id(kind)

998

999 cond = []

1000 args = []

1001 if tmin is not None or tmax is not None:

1002 assert kind is not None

1003 if tmin is None:

1004 tmin = self.get_time_span()[0]

1005 if tmax is None:

1006 tmax = self.get_time_span()[1] + 1.0

1007

1008 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)

1009

1010 cond.append('kind_codes.kind_id == ?')

1011 args.append(kind_id)

1012

1013 if codes is not None:

1014 self._codes_match_sql(True, kind_id, codes, cond, args)

1015

1016 if codes_exclude is not None:

1017 self._codes_match_sql(False, kind_id, codes_exclude, cond, args)

1018

1019 if sample_rate_min is not None:

1020 cond.append('kind_codes.deltat <= ?')

1021 args.append(1.0/sample_rate_min)

1022

1023 if sample_rate_max is not None:

1024 cond.append('? <= kind_codes.deltat')

1025 args.append(1.0/sample_rate_max)

1026

1027 if kind_codes_ids is not None:

1028 cond.append(

1029 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(

1030 '?'*len(kind_codes_ids)))

1031

1032 args.extend(kind_codes_ids)

1033

1034 db = self.get_database()

1035 if path is not None:

1036 cond.append('files.path == ?')

1037 args.append(db.relpath(abspath(path)))

1038

1039 sql = ('''

1040 SELECT

1041 files.path,

1042 files.format,

1043 files.mtime,

1044 files.size,

1045 %(db)s.%(nuts)s.file_segment,

1046 %(db)s.%(nuts)s.file_element,

1047 kind_codes.kind_id,

1048 kind_codes.codes,

1049 %(db)s.%(nuts)s.tmin_seconds,

1050 %(db)s.%(nuts)s.tmin_offset,

1051 %(db)s.%(nuts)s.tmax_seconds,

1052 %(db)s.%(nuts)s.tmax_offset,

1053 kind_codes.deltat

1054 FROM files

1055 INNER JOIN %(db)s.%(nuts)s

1056 ON files.file_id == %(db)s.%(nuts)s.file_id

1057 INNER JOIN kind_codes

1058 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id

1059 ''')

1060

1061 if cond:

1062 sql += ''' WHERE ''' + ' AND '.join(cond)

1063

1064 if limit is not None:

1065 sql += ''' LIMIT %i''' % limit

1066

1067 sql = self._sql(sql)

1068 if tmin is None and tmax is None:

1069 for row in self._conn.execute(sql, args):

1070 row = (db.abspath(row[0]),) + row[1:]

1071 nut = model.Nut(values_nocheck=row)

1072 yield nut

1073 else:

1074 assert tmin is not None and tmax is not None

1075 if tmin == tmax:

1076 for row in self._conn.execute(sql, args):

1077 row = (db.abspath(row[0]),) + row[1:]

1078 nut = model.Nut(values_nocheck=row)

1079 if (nut.tmin <= tmin < nut.tmax) \

1080 or (nut.tmin == nut.tmax and tmin == nut.tmin):

1081

1082 yield nut

1083 else:

1084 for row in self._conn.execute(sql, args):

1085 row = (db.abspath(row[0]),) + row[1:]

1086 nut = model.Nut(values_nocheck=row)

1087 if (tmin < nut.tmax and nut.tmin < tmax) \

1088 or (nut.tmin == nut.tmax

1089 and tmin <= nut.tmin < tmax):

1090

1091 yield nut

1092

1093 def get_nuts(self, *args, **kwargs):

1094 '''

1095 Get content entities matching given constraints.

1096

1097 Like :py:meth:`iter_nuts` but returns results as a list.

1098 '''

1099

1100 return list(self.iter_nuts(*args, **kwargs))

1101

1102 def _split_nuts(

1103 self, kind, tmin=None, tmax=None, codes=None, path=None):

1104

1105 kind_id = to_kind_id(kind)

1106 tmin_seconds, tmin_offset = model.tsplit(tmin)

1107 tmax_seconds, tmax_offset = model.tsplit(tmax)

1108

1109 names_main_nuts = dict(self._names)

1110 names_main_nuts.update(db='main', nuts='nuts')

1111

1112 db = self.get_database()

1113

1114 def main_nuts(s):

1115 return s % names_main_nuts

1116

1117 with self.transaction('split nuts') as cursor:

1118 # modify selection and main

1119 for sql_subst in [

1120 self._sql, main_nuts]:

1121

1122 cond = []

1123 args = []

1124

1125 self._timerange_sql(tmin, tmax, kind, cond, args, False)

1126

1127 if codes is not None:

1128 self._codes_match_sql(True, kind_id, codes, cond, args)

1129

1130 if path is not None:

1131 cond.append('files.path == ?')

1132 args.append(db.relpath(abspath(path)))

1133

1134 sql = sql_subst('''

1135 SELECT

1136 %(db)s.%(nuts)s.nut_id,

1137 %(db)s.%(nuts)s.tmin_seconds,

1138 %(db)s.%(nuts)s.tmin_offset,

1139 %(db)s.%(nuts)s.tmax_seconds,

1140 %(db)s.%(nuts)s.tmax_offset,

1141 kind_codes.deltat

1142 FROM files

1143 INNER JOIN %(db)s.%(nuts)s

1144 ON files.file_id == %(db)s.%(nuts)s.file_id

1145 INNER JOIN kind_codes

1146 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id

1147 WHERE ''' + ' AND '.join(cond)) # noqa

1148

1149 insert = []

1150 delete = []

1151 for row in cursor.execute(sql, args):

1152 nut_id, nut_tmin_seconds, nut_tmin_offset, \

1153 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row

1154

1155 nut_tmin = model.tjoin(

1156 nut_tmin_seconds, nut_tmin_offset)

1157 nut_tmax = model.tjoin(

1158 nut_tmax_seconds, nut_tmax_offset)

1159

1160 if nut_tmin < tmax and tmin < nut_tmax:

1161 if nut_tmin < tmin:

1162 insert.append((

1163 nut_tmin_seconds, nut_tmin_offset,

1164 tmin_seconds, tmin_offset,

1165 model.tscale_to_kscale(

1166 tmin_seconds - nut_tmin_seconds),

1167 nut_id))

1168

1169 if tmax < nut_tmax:

1170 insert.append((

1171 tmax_seconds, tmax_offset,

1172 nut_tmax_seconds, nut_tmax_offset,

1173 model.tscale_to_kscale(

1174 nut_tmax_seconds - tmax_seconds),

1175 nut_id))

1176

1177 delete.append((nut_id,))

1178

1179 sql_add = '''

1180 INSERT INTO %(db)s.%(nuts)s (

1181 file_id, file_segment, file_element, kind_id,

1182 kind_codes_id, tmin_seconds, tmin_offset,

1183 tmax_seconds, tmax_offset, kscale )

1184 SELECT

1185 file_id, file_segment, file_element,

1186 kind_id, kind_codes_id, ?, ?, ?, ?, ?

1187 FROM %(db)s.%(nuts)s

1188 WHERE nut_id == ?

1189 '''

1190 cursor.executemany(sql_subst(sql_add), insert)

1191

1192 sql_delete = '''

1193 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?

1194 '''

1195 cursor.executemany(sql_subst(sql_delete), delete)

1196

1197 def get_time_span(self, kinds=None, tight=True, dummy_limits=True):

1198 '''

1199 Get time interval over all content in selection.

1200

1201 :param kinds:

1202 If not ``None``, restrict query to given content kinds.

1203 :type kind:

1204 list of str

1205

1206 :complexity:

1207 O(1), independent of the number of nuts.

1208

1209 :returns:

1210 ``(tmin, tmax)``, combined time interval of queried content kinds.

1211 '''

1212

1213 sql_min = self._sql('''

1214 SELECT MIN(tmin_seconds), MIN(tmin_offset)

1215 FROM %(db)s.%(nuts)s

1216 WHERE kind_id == ?

1217 AND tmin_seconds == (

1218 SELECT MIN(tmin_seconds)

1219 FROM %(db)s.%(nuts)s

1220 WHERE kind_id == ?)

1221 ''')

1222

1223 sql_max = self._sql('''

1224 SELECT MAX(tmax_seconds), MAX(tmax_offset)

1225 FROM %(db)s.%(nuts)s

1226 WHERE kind_id == ?

1227 AND tmax_seconds == (

1228 SELECT MAX(tmax_seconds)

1229 FROM %(db)s.%(nuts)s

1230 WHERE kind_id == ?)

1231 ''')

1232

1233 gtmin = None

1234 gtmax = None

1235

1236 if isinstance(kinds, str):

1237 kinds = [kinds]

1238

1239 if kinds is None:

1240 kind_ids = model.g_content_kind_ids

1241 else:

1242 kind_ids = model.to_kind_ids(kinds)

1243

1244 tmins = []

1245 tmaxs = []

1246 for kind_id in kind_ids:

1247 for tmin_seconds, tmin_offset in self._conn.execute(

1248 sql_min, (kind_id, kind_id)):

1249 tmins.append(model.tjoin(tmin_seconds, tmin_offset))

1250

1251 for (tmax_seconds, tmax_offset) in self._conn.execute(

1252 sql_max, (kind_id, kind_id)):

1253 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset))

1254

1255 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins]

1256 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs]

1257

1258 if tight:

1259 gtmin = nonef(min, tmins)

1260 gtmax = nonef(max, tmaxs)

1261 else:

1262 gtmin = None if None in tmins else nonef(min, tmins)

1263 gtmax = None if None in tmaxs else nonef(max, tmaxs)

1264

1265 if dummy_limits:

1266 if gtmin is None:

1267 gtmin = model.g_tmin

1268 if gtmax is None:

1269 gtmax = model.g_tmax

1270

1271 return gtmin, gtmax

1272

1273 def has(self, kinds):

1274 '''

1275 Check availability of given content kinds.

1276

1277 :param kinds:

1278 Content kinds to query.

1279 :type kind:

1280 list of str

1281

1282 :returns:

1283 ``True`` if any of the queried content kinds is available

1284 in the selection.

1285 '''

1286 self_tmin, self_tmax = self.get_time_span(

1287 kinds, dummy_limits=False)

1288

1289 return None not in (self_tmin, self_tmax)

1290

1291 def get_deltat_span(self, kind):

1292 '''

1293 Get min and max sampling interval of all content of given kind.

1294

1295 :param kind:

1296 Content kind

1297 :type kind:

1298 str

1299

1300 :returns: ``(deltat_min, deltat_max)``

1301 '''

1302

1303 deltats = [

1304 deltat for deltat in self.get_deltats(kind)

1305 if deltat is not None]

1306

1307 if deltats:

1308 return min(deltats), max(deltats)

1309 else:

1310 return None, None

1311

1312 def iter_kinds(self, codes=None):

1313 '''

1314 Iterate over content types available in selection.

1315

1316 :param codes:

1317 If given, get kinds only for selected codes identifier.

1318 Only a single identifier may be given here and no pattern matching

1319 is done, currently.

1320 :type codes:

1321 :py:class:`~pyrocko.squirrel.model.Codes`

1322

1323 :yields:

1324 Available content kinds as :py:class:`str`.

1325

1326 :complexity:

1327 O(1), independent of number of nuts.

1328 '''

1329

1330 return self._database._iter_kinds(

1331 codes=codes,

1332 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1333

1334 def iter_deltats(self, kind=None):

1335 '''

1336 Iterate over sampling intervals available in selection.

1337

1338 :param kind:

1339 If given, get sampling intervals only for a given content type.

1340 :type kind:

1341 str

1342

1343 :yields:

1344 :py:class:`float` values.

1345

1346 :complexity:

1347 O(1), independent of number of nuts.

1348 '''

1349 return self._database._iter_deltats(

1350 kind=kind,

1351 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1352

1353 def iter_codes(self, kind=None):

1354 '''

1355 Iterate over content identifier code sequences available in selection.

1356

1357 :param kind:

1358 If given, get codes only for a given content type.

1359 :type kind:

1360 str

1361

1362 :yields:

1363 :py:class:`tuple` of :py:class:`str`

1364

1365 :complexity:

1366 O(1), independent of number of nuts.

1367 '''

1368 return self._database._iter_codes(

1369 kind=kind,

1370 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1371

1372 def _iter_codes_info(self, kind=None, codes=None):

1373 '''

1374 Iterate over number of occurrences of any (kind, codes) combination.

1375

1376 :param kind:

1377 If given, get counts only for selected content type.

1378 :type kind:

1379 str

1380

1381 :yields:

1382 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.

1383

1384 :complexity:

1385 O(1), independent of number of nuts.

1386 '''

1387 return self._database._iter_codes_info(

1388 kind=kind,

1389 codes=codes,

1390 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1391

1392 def get_kinds(self, codes=None):

1393 '''

1394 Get content types available in selection.

1395

1396 :param codes:

1397 If given, get kinds only for selected codes identifier.

1398 Only a single identifier may be given here and no pattern matching

1399 is done, currently.

1400 :type codes:

1401 :py:class:`~pyrocko.squirrel.model.Codes`

1402

1403 :returns:

1404 Sorted list of available content types.

1405 :rtype:

1406 py:class:`list` of :py:class:`str`

1407

1408 :complexity:

1409 O(1), independent of number of nuts.

1410

1411 '''

1412 return sorted(list(self.iter_kinds(codes=codes)))

1413

1414 def get_deltats(self, kind=None):

1415 '''

1416 Get sampling intervals available in selection.

1417

1418 :param kind:

1419 If given, get sampling intervals only for selected content type.

1420 :type kind:

1421 str

1422

1423 :complexity:

1424 O(1), independent of number of nuts.

1425

1426 :returns: Sorted list of available sampling intervals.

1427 '''

1428 return sorted(list(self.iter_deltats(kind=kind)))

1429

1430 def get_codes(self, kind=None):

1431 '''

1432 Get identifier code sequences available in selection.

1433

1434 :param kind:

1435 If given, get codes only for selected content type.

1436 :type kind:

1437 str

1438

1439 :complexity:

1440 O(1), independent of number of nuts.

1441

1442 :returns: Sorted list of available codes as tuples of strings.

1443 '''

1444 return sorted(list(self.iter_codes(kind=kind)))

1445

1446 def get_counts(self, kind=None):

1447 '''

1448 Get number of occurrences of any (kind, codes) combination.

1449

1450 :param kind:

1451 If given, get codes only for selected content type.

1452 :type kind:

1453 str

1454

1455 :complexity:

1456 O(1), independent of number of nuts.

1457

1458 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``

1459 if kind is not ``None``

1460 '''

1461 d = {}

1462 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):

1463 if kind_id not in d:

1464 v = d[kind_id] = {}

1465 else:

1466 v = d[kind_id]

1467

1468 if codes not in v:

1469 v[codes] = 0

1470

1471 v[codes] += count

1472

1473 if kind is not None:

1474 return d[to_kind_id(kind)]

1475 else:

1476 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())

1477

1478 def glob_codes(self, kind, codes):

1479 '''

1480 Find codes matching given patterns.

1481

1482 :param kind:

1483 Content kind to be queried.

1484 :type kind:

1485 str

1486

1487 :param codes:

1488 List of code patterns to query.

1489 :type codes:

1490 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`

1491 objects appropriate for the queried content type, or anything which

1492 can be converted to such objects.

1493

1494 :returns:

1495 List of matches of the form ``[kind_codes_id, codes, deltat]``.

1496 '''

1497

1498 kind_id = to_kind_id(kind)

1499 args = [kind_id]

1500 pats = codes_patterns_for_kind(kind_id, codes)

1501

1502 if pats:

1503 codes_cond = 'AND ( %s ) ' % ' OR '.join(

1504 ('kind_codes.codes GLOB ?',) * len(pats))

1505

1506 args.extend(pat.safe_str for pat in pats)

1507 else:

1508 codes_cond = ''

1509

1510 sql = self._sql('''

1511 SELECT kind_codes_id, codes, deltat FROM kind_codes

1512 WHERE

1513 kind_id == ? ''' + codes_cond)

1514

1515 return list(map(list, self._conn.execute(sql, args)))

1516

1517 def update(self, constraint=None, **kwargs):

1518 '''

1519 Update or partially update channel and event inventories.

1520

1521 :param constraint:

1522 Selection of times or areas to be brought up to date.

1523 :type constraint:

1524 :py:class:`~pyrocko.squirrel.client.base.Constraint`

1525

1526 :param \\*\\*kwargs:

1527 Shortcut for setting ``constraint=Constraint(**kwargs)``.

1528

1529 This function triggers all attached remote sources, to check for

1530 updates in the meta-data. The sources will only submit queries when

1531 their expiration date has passed, or if the selection spans into

1532 previously unseen times or areas.

1533 '''

1534

1535 if constraint is None:

1536 constraint = client.Constraint(**kwargs)

1537

1538 task = make_task('Updating sources')

1539 for source in task(self._sources):

1540 source.update_channel_inventory(self, constraint)

1541 source.update_event_inventory(self, constraint)

1542

1543 def update_waveform_promises(self, constraint=None, **kwargs):

1544 '''

1545 Permit downloading of remote waveforms.

1546

1547 :param constraint:

1548 Remote waveforms compatible with the given constraint are enabled

1549 for download.

1550 :type constraint:

1551 :py:class:`~pyrocko.squirrel.client.base.Constraint`

1552

1553 :param \\*\\*kwargs:

1554 Shortcut for setting ``constraint=Constraint(**kwargs)``.

1555

1556 Calling this method permits Squirrel to download waveforms from remote

1557 sources when processing subsequent waveform requests. This works by

1558 inserting so called waveform promises into the database. It will look

1559 into the available channels for each remote source and create a promise

1560 for each channel compatible with the given constraint. If the promise

1561 then matches in a waveform request, Squirrel tries to download the

1562 waveform. If the download is successful, the downloaded waveform is

1563 added to the Squirrel and the promise is deleted. If the download

1564 fails, the promise is kept if the reason of failure looks like being

1565 temporary, e.g. because of a network failure. If the cause of failure

1566 however seems to be permanent, the promise is deleted so that no

1567 further attempts are made to download a waveform which might not be

1568 available from that server at all. To force re-scheduling after a

1569 permanent failure, call :py:meth:`update_waveform_promises`

1570 yet another time.

1571 '''

1572

1573 if constraint is None:

1574 constraint = client.Constraint(**kwargs)

1575

1576 for source in self._sources:

1577 source.update_waveform_promises(self, constraint)

1578

1579 def remove_waveform_promises(self, from_database='selection'):

1580 '''

1581 Remove waveform promises from live selection or global database.

1582

1583 Calling this function removes all waveform promises provided by the

1584 attached sources.

1585

1586 :param from_database:

1587 Remove from live selection ``'selection'`` or global database

1588 ``'global'``.

1589 '''

1590 for source in self._sources:

1591 source.remove_waveform_promises(self, from_database=from_database)

1592

1593 def update_responses(self, constraint=None, **kwargs):

1594 if constraint is None:

1595 constraint = client.Constraint(**kwargs)

1596

1597 for source in self._sources:

1598 source.update_response_inventory(self, constraint)

1599

1600 def get_nfiles(self):

1601 '''

1602 Get number of files in selection.

1603 '''

1604

1605 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')

1606 for row in self._conn.execute(sql):

1607 return row[0]

1608

1609 def get_nnuts(self):

1610 '''

1611 Get number of nuts in selection.

1612 '''

1613

1614 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')

1615 for row in self._conn.execute(sql):

1616 return row[0]

1617

1618 def get_total_size(self):

1619 '''

1620 Get aggregated file size available in selection.

1621 '''

1622

1623 sql = self._sql('''

1624 SELECT SUM(files.size) FROM %(db)s.%(file_states)s

1625 INNER JOIN files

1626 ON %(db)s.%(file_states)s.file_id = files.file_id

1627 ''')

1628

1629 for row in self._conn.execute(sql):

1630 return row[0] or 0

1631

1632 def get_stats(self):

1633 '''

1634 Get statistics on contents available through this selection.

1635 '''

1636

1637 kinds = self.get_kinds()

1638 time_spans = {}

1639 for kind in kinds:

1640 time_spans[kind] = self.get_time_span([kind])

1641

1642 return SquirrelStats(

1643 nfiles=self.get_nfiles(),

1644 nnuts=self.get_nnuts(),

1645 kinds=kinds,

1646 codes=self.get_codes(),

1647 total_size=self.get_total_size(),

1648 counts=self.get_counts(),

1649 time_spans=time_spans,

1650 sources=[s.describe() for s in self._sources],

1651 operators=[op.describe() for op in self._operators])

1652

1653 @filldocs

1654 def check(

1655 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1656 ignore=[]):

1657 '''

1658 Check for common data/metadata problems.

1659

1660 %(query_args)s

1661

1662 :param ignore:

1663 Problem types to be ignored.

1664 :type ignore:

1665 :class:`list` of :class:`str`

1666 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)

1667

1668 :returns:

1669 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object

1670 containing the results of the check.

1671

1672 See :py:func:`~pyrocko.squirrel.check.do_check`.

1673 '''

1674

1675 from .check import do_check

1676 tmin, tmax, codes = self._get_selection_args(

1677 CHANNEL, obj, tmin, tmax, time, codes)

1678

1679 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)

1680

1681 def get_content(

1682 self,

1683 nut,

1684 cache_id='default',

1685 accessor_id='default',

1686 show_progress=False,

1687 model='squirrel'):

1688

1689 '''

1690 Get and possibly load full content for a given index entry from file.

1691

1692 Loads the actual content objects (channel, station, waveform, ...) from

1693 file. For efficiency, sibling content (all stuff in the same file

1694 segment) will also be loaded as a side effect. The loaded contents are

1695 cached in the Squirrel object.

1696 '''

1697

1698 content_cache = self._content_caches[cache_id]

1699 if not content_cache.has(nut):

1700

1701 for nut_loaded in io.iload(

1702 nut.file_path,

1703 segment=nut.file_segment,

1704 format=nut.file_format,

1705 database=self._database,

1706 update_selection=self,

1707 show_progress=show_progress):

1708

1709 content_cache.put(nut_loaded)

1710

1711 try:

1712 return content_cache.get(nut, accessor_id, model)

1713

1714 except KeyError:

1715 raise error.NotAvailable(

1716 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)

1717

1718 def advance_accessor(self, accessor_id='default', cache_id=None):

1719 '''

1720 Notify memory caches about consumer moving to a new data batch.

1721

1722 :param accessor_id:

1723 Name of accessing consumer to be advanced.

1724 :type accessor_id:

1725 str

1726

1727 :param cache_id:

1728 Name of cache to for which the accessor should be advanced. By

1729 default the named accessor is advanced in all registered caches.

1730 By default, two caches named ``'default'`` and ``'waveform'`` are

1731 available.

1732 :type cache_id:

1733 str

1734

1735 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how

1736 Squirrel's memory caching works and can be tuned. Default behaviour is

1737 to release data when it has not been used in the latest data

1738 window/batch. If the accessor is never advanced, data is cached

1739 indefinitely - which is often desired e.g. for station meta-data.

1740 Methods for consecutive data traversal, like

1741 :py:meth:`chopper_waveforms` automatically advance and clear

1742 their accessor.

1743 '''

1744 for cache_ in (

1745 self._content_caches.keys()

1746 if cache_id is None

1747 else [cache_id]):

1748

1749 self._content_caches[cache_].advance_accessor(accessor_id)

1750

1751 def clear_accessor(self, accessor_id, cache_id=None):

1752 '''

1753 Notify memory caches about a consumer having finished.

1754

1755 :param accessor_id:

1756 Name of accessor to be cleared.

1757 :type accessor_id:

1758 str

1759

1760 :param cache_id:

1761 Name of cache for which the accessor should be cleared. By default

1762 the named accessor is cleared from all registered caches. By

1763 default, two caches named ``'default'`` and ``'waveform'`` are

1764 available.

1765 :type cache_id:

1766 str

1767

1768 Calling this method clears all references to cache entries held by the

1769 named accessor. Cache entries are then freed if not referenced by any

1770 other accessor.

1771 '''

1772

1773 for cache_ in (

1774 self._content_caches.keys()

1775 if cache_id is None

1776 else [cache_id]):

1777

1778 self._content_caches[cache_].clear_accessor(accessor_id)

1779

1780 def get_cache_stats(self, cache_id):

1781 return self._content_caches[cache_id].get_stats()

1782

1783 @filldocs

1784 def get_stations(

1785 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1786 model='squirrel', on_error='raise'):

1787

1788 '''

1789 Get stations matching given constraints.

1790

1791 %(query_args)s

1792

1793 :param model:

1794 Select object model for returned values: ``'squirrel'`` to get

1795 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station

1796 objects with channel information attached.

1797 :type model:

1798 str

1799

1800 :returns:

1801 List of :py:class:`pyrocko.squirrel.Station

1802 <pyrocko.squirrel.model.Station>` objects by default or list of

1803 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`

1804 objects if ``model='pyrocko'`` is requested.

1805

1806 See :py:meth:`iter_nuts` for details on time span matching.

1807 '''

1808

1809 if model == 'pyrocko':

1810 return self._get_pyrocko_stations(

1811 obj, tmin, tmax, time, codes, on_error=on_error)

1812 elif model in ('squirrel', 'stationxml', 'stationxml+'):

1813 args = self._get_selection_args(

1814 STATION, obj, tmin, tmax, time, codes)

1815

1816 nuts = sorted(

1817 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)

1818

1819 return [self.get_content(nut, model=model) for nut in nuts]

1820 else:

1821 raise ValueError('Invalid station model: %s' % model)

1822

1823 @filldocs

1824 def get_channels(

1825 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1826 model='squirrel'):

1827

1828 '''

1829 Get channels matching given constraints.

1830

1831 %(query_args)s

1832

1833 :returns:

1834 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.

1835

1836 See :py:meth:`iter_nuts` for details on time span matching.

1837 '''

1838

1839 args = self._get_selection_args(

1840 CHANNEL, obj, tmin, tmax, time, codes)

1841

1842 nuts = sorted(

1843 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)

1844

1845 return [self.get_content(nut, model=model) for nut in nuts]

1846

1847 @filldocs

1848 def get_sensors(

1849 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1850

1851 '''

1852 Get sensors matching given constraints.

1853

1854 %(query_args)s

1855

1856 :returns:

1857 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.

1858

1859 See :py:meth:`iter_nuts` for details on time span matching.

1860 '''

1861

1862 tmin, tmax, codes = self._get_selection_args(

1863 CHANNEL, obj, tmin, tmax, time, codes)

1864

1865 if codes is not None:

1866 codes = codes_patterns_list(

1867 (entry.replace(channel=entry.channel[:-1] + '?')

1868 if entry.channel != '*' else entry)

1869 for entry in codes)

1870

1871 nuts = sorted(

1872 self.iter_nuts(

1873 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)

1874

1875 return [

1876 sensor for sensor in model.Sensor.from_channels(

1877 self.get_content(nut) for nut in nuts)

1878 if match_time_span(tmin, tmax, sensor)]

1879

1880 @filldocs

1881 def get_responses(

1882 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1883 model='squirrel'):

1884

1885 '''

1886 Get instrument responses matching given constraints.

1887

1888 %(query_args)s

1889

1890 :param model:

1891 Select data model for returned objects. Choices: ``'squirrel'``,

1892 ``'stationxml'``, ``'stationxml+'``. See return value description.

1893 :type model:

1894 str

1895

1896 :returns:

1897 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model ==

1898 'squirrel'`` or list of

1899 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`

1900 if ``model == 'stationxml'`` or list of

1901 (:py:class:`~pyrocko.squirrel.model.Response`,

1902 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==

1903 'stationxml+'``.

1904

1905 See :py:meth:`iter_nuts` for details on time span matching.

1906 '''

1907

1908 args = self._get_selection_args(

1909 RESPONSE, obj, tmin, tmax, time, codes)

1910

1911 nuts = sorted(

1912 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)

1913

1914 return [self.get_content(nut, model=model) for nut in nuts]

1915

1916 @filldocs

1917 def get_response(

1918 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1919 model='squirrel', on_duplicate='raise'):

1920

1921 '''

1922 Get instrument response matching given constraints.

1923

1924 %(query_args)s

1925

1926 :param model:

1927 Select data model for returned object. Choices: ``'squirrel'``,

1928 ``'stationxml'``, ``'stationxml+'``. See return value description.

1929 :type model:

1930 str

1931

1932 :param on_duplicate:

1933 Determines how duplicates/multiple matching responses are handled.

1934 Choices: ``'raise'`` - raise

1935 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a

1936 warning and return first match, ``'ignore'`` - silently return

1937 first match.

1938 :type on_duplicate:

1939 str

1940

1941 :returns:

1942 :py:class:`~pyrocko.squirrel.model.Response` if

1943 ``model == 'squirrel'`` or

1944 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model ==

1945 'stationxml'`` or

1946 (:py:class:`~pyrocko.squirrel.model.Response`,

1947 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==

1948 'stationxml+'``.

1949

1950 Same as :py:meth:`get_responses` but returning exactly one response.

1951 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is

1952 available. Duplicates are handled according to the ``on_duplicate``

1953 argument.

1954

1955 See :py:meth:`iter_nuts` for details on time span matching.

1956 '''

1957

1958 if model == 'stationxml':

1959 model_ = 'stationxml+'

1960 else:

1961 model_ = model

1962

1963 responses = self.get_responses(

1964 obj, tmin, tmax, time, codes, model=model_)

1965 if len(responses) == 0:

1966 raise error.NotAvailable(

1967 'No instrument response available (%s).'

1968 % self._get_selection_args_str(

1969 RESPONSE, obj, tmin, tmax, time, codes))

1970

1971 elif len(responses) > 1:

1972

1973 if on_duplicate in ('raise', 'warn'):

1974 if model_ == 'squirrel':

1975 resps_sq = responses

1976 elif model_ == 'stationxml+':

1977 resps_sq = [resp[0] for resp in responses]

1978 else:

1979 raise ValueError('Invalid response model: %s' % model)

1980

1981 rinfo = ':\n' + '\n'.join(

1982 ' ' + resp.summary for resp in resps_sq)

1983

1984 message = \

1985 'Multiple instrument responses matching given ' \

1986 'constraints (%s)%s%s' % (

1987 self._get_selection_args_str(

1988 RESPONSE, obj, tmin, tmax, time, codes),

1989 ' -> using first' if on_duplicate == 'warn' else '',

1990 rinfo)

1991

1992 if on_duplicate == 'raise':

1993 raise error.Duplicate(message)

1994

1995 elif on_duplicate == 'warn':

1996 logger.warning(message)

1997

1998 elif on_duplicate == 'ignore':

1999 pass

2000

2001 else:

2002 ValueError(

2003 'Invalid argument for on_duplicate: %s' % on_duplicate)

2004

2005 if model == 'stationxml':

2006 return responses[0][1]

2007 else:

2008 return responses[0]

2009

2010 @filldocs

2011 def get_events(

2012 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

2013

2014 '''

2015 Get events matching given constraints.

2016

2017 %(query_args)s

2018

2019 :returns:

2020 List of :py:class:`~pyrocko.model.event.Event` objects.

2021

2022 See :py:meth:`iter_nuts` for details on time span matching.

2023 '''

2024

2025 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)

2026 nuts = sorted(

2027 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)

2028

2029 return [self.get_content(nut) for nut in nuts]

2030

2031 def _redeem_promises(self, *args, order_only=False):

2032

2033 def split_promise(order, tmax=None):

2034 self._split_nuts(

2035 'waveform_promise',

2036 order.tmin, tmax if tmax is not None else order.tmax,

2037 codes=order.codes,

2038 path=order.source_id)

2039

2040 tmin, tmax = args[:2]

2041

2042 waveforms = list(self.iter_nuts('waveform', *args))

2043 promises = list(self.iter_nuts('waveform_promise', *args))

2044

2045 codes_to_avail = defaultdict(list)

2046 for nut in waveforms:

2047 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))

2048

2049 def tts(x):

2050 if isinstance(x, tuple):

2051 return tuple(tts(e) for e in x)

2052 elif isinstance(x, list):

2053 return list(tts(e) for e in x)

2054 else:

2055 return util.time_to_str(x)

2056

2057 now = time.time()

2058 orders = []

2059 for promise in promises:

2060 waveforms_avail = codes_to_avail[promise.codes]

2061 for block_tmin, block_tmax in blocks(

2062 max(tmin, promise.tmin),

2063 min(tmax, promise.tmax),

2064 promise.deltat):

2065

2066 if block_tmin > now:

2067 continue

2068

2069 orders.append(

2070 WaveformOrder(

2071 source_id=promise.file_path,

2072 codes=promise.codes,

2073 tmin=block_tmin,

2074 tmax=block_tmax,

2075 deltat=promise.deltat,

2076 gaps=gaps(waveforms_avail, block_tmin, block_tmax),

2077 time_created=now))

2078

2079 orders_noop, orders = lpick(lambda order: order.gaps, orders)

2080

2081 order_keys_noop = set(order_key(order) for order in orders_noop)

2082 if len(order_keys_noop) != 0 or len(orders_noop) != 0:

2083 logger.info(

2084 'Waveform orders already satisified with cached/local data: '

2085 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))

2086

2087 for order in orders_noop:

2088 split_promise(order)

2089

2090 if order_only:

2091 if orders:

2092 self._pending_orders.extend(orders)

2093 logger.info(

2094 'Enqueuing %i waveform order%s.'

2095 % len_plural(orders))

2096 return

2097 else:

2098 if self._pending_orders:

2099 orders.extend(self._pending_orders)

2100 logger.info(

2101 'Adding %i previously enqueued order%s.'

2102 % len_plural(self._pending_orders))

2103

2104 self._pending_orders = []

2105

2106 source_ids = []

2107 sources = {}

2108 for source in self._sources:

2109 if isinstance(source, fdsn.FDSNSource):

2110 source_ids.append(source._source_id)

2111 sources[source._source_id] = source

2112

2113 source_priority = dict(

2114 (source_id, i) for (i, source_id) in enumerate(source_ids))

2115

2116 order_groups = defaultdict(list)

2117 for order in orders:

2118 order_groups[order_key(order)].append(order)

2119

2120 for k, order_group in order_groups.items():

2121 order_group.sort(

2122 key=lambda order: source_priority[order.source_id])

2123

2124 n_order_groups = len(order_groups)

2125

2126 if len(order_groups) != 0 or len(orders) != 0:

2127 logger.info(

2128 'Waveform orders standing for download: %i (%i)'

2129 % (len(order_groups), len(orders)))

2130

2131 task = make_task('Waveform orders processed', n_order_groups)

2132 else:

2133 task = None

2134

2135 def release_order_group(order):

2136 okey = order_key(order)

2137 for followup in order_groups[okey]:

2138 if followup is not order:

2139 split_promise(followup)

2140

2141 del order_groups[okey]

2142

2143 if task:

2144 task.update(n_order_groups - len(order_groups))

2145

2146 def noop(order):

2147 pass

2148

2149 def success(order, trs):

2150 release_order_group(order)

2151 if order.is_near_real_time():

2152 if not trs:

2153 return # keep promise when no data received at real time

2154 else:

2155 tmax = max(tr.tmax+tr.deltat for tr in trs)

2156 tmax = order.tmin \

2157 + round((tmax - order.tmin) / order.deltat) \

2158 * order.deltat

2159 split_promise(order, tmax)

2160 else:

2161 split_promise(order)

2162

2163 def batch_add(paths):

2164 self.add(paths)

2165

2166 calls = queue.Queue()

2167

2168 def enqueue(f):

2169 def wrapper(*args):

2170 calls.put((f, args))

2171

2172 return wrapper

2173

2174 while order_groups:

2175

2176 orders_now = []

2177 empty = []

2178 for k, order_group in order_groups.items():

2179 try:

2180 orders_now.append(order_group.pop(0))

2181 except IndexError:

2182 empty.append(k)

2183

2184 for k in empty:

2185 del order_groups[k]

2186

2187 by_source_id = defaultdict(list)

2188 for order in orders_now:

2189 by_source_id[order.source_id].append(order)

2190

2191 threads = []

2192 for source_id in by_source_id:

2193 def download():

2194 try:

2195 sources[source_id].download_waveforms(

2196 by_source_id[source_id],

2197 success=enqueue(success),

2198 error_permanent=enqueue(split_promise),

2199 error_temporary=noop,

2200 batch_add=enqueue(batch_add))

2201

2202 finally:

2203 calls.put(None)

2204

2205 if len(by_source_id) > 1:

2206 thread = threading.Thread(target=download)

2207 thread.start()

2208 threads.append(thread)

2209 else:

2210 download()

2211 calls.put(None)

2212

2213 ndone = 0

2214 while ndone < len(by_source_id):

2215 ret = calls.get()

2216 if ret is None:

2217 ndone += 1

2218 else:

2219 ret[0](*ret[1])

2220

2221 for thread in threads:

2222 thread.join()

2223

2224 if task:

2225 task.update(n_order_groups - len(order_groups))

2226

2227 if task:

2228 task.done()

2229

2230 @filldocs

2231 def get_waveform_nuts(

2232 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2233 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,

2234 order_only=False):

2235

2236 '''

2237 Get waveform content entities matching given constraints.

2238

2239 %(query_args)s

2240

2241 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally

2242 resolves matching waveform promises (downloads waveforms from remote

2243 sources).

2244

2245 See :py:meth:`iter_nuts` for details on time span matching.

2246 '''

2247

2248 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)

2249

2250 if self.downloads_enabled:

2251 self._redeem_promises(

2252 *args,

2253 codes_exclude,

2254 sample_rate_min,

2255 sample_rate_max,

2256 order_only=order_only)

2257

2258 nuts = sorted(

2259 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)

2260

2261 return nuts

2262

2263 @filldocs

2264 def have_waveforms(

2265 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

2266

2267 '''

2268 Check if any waveforms or waveform promises are available for given

2269 constraints.

2270

2271 %(query_args)s

2272 '''

2273

2274 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)

2275 return bool(list(

2276 self.iter_nuts('waveform', *args, limit=1))) \

2277 or (self.downloads_enabled and bool(list(

2278 self.iter_nuts('waveform_promise', *args, limit=1))))

2279

2280 @filldocs

2281 def get_waveforms(

2282 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2283 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,

2284 uncut=False, want_incomplete=True, degap=True,

2285 maxgap=5, maxlap=None, snap=None, include_last=False,

2286 load_data=True, accessor_id='default', operator_params=None,

2287 order_only=False, channel_priorities=None):

2288

2289 '''

2290 Get waveforms matching given constraints.

2291

2292 %(query_args)s

2293

2294 :param sample_rate_min:

2295 Consider only waveforms with a sampling rate equal to or greater

2296 than the given value [Hz].

2297 :type sample_rate_min:

2298 float

2299

2300 :param sample_rate_max:

2301 Consider only waveforms with a sampling rate equal to or less than

2302 the given value [Hz].

2303 :type sample_rate_max:

2304 float

2305

2306 :param uncut:

2307 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]

2308 and to disable degapping/deoverlapping. Returns untouched traces as

2309 they are read from file segment. File segments are always read in

2310 their entirety.

2311 :type uncut:

2312 bool

2313

2314 :param want_incomplete:

2315 If ``True``, gappy/incomplete traces are included in the result.

2316 :type want_incomplete:

2317 bool

2318

2319 :param degap:

2320 If ``True``, connect traces and remove gaps and overlaps.

2321 :type degap:

2322 bool

2323

2324 :param maxgap:

2325 Maximum gap size in samples which is filled with interpolated

2326 samples when ``degap`` is ``True``.

2327 :type maxgap:

2328 int

2329

2330 :param maxlap:

2331 Maximum overlap size in samples which is removed when ``degap`` is

2332 ``True``.

2333 :type maxlap:

2334 int

2335

2336 :param snap:

2337 Rounding functions used when computing sample index from time

2338 instance, for trace start and trace end, respectively. By default,

2339 ``(round, round)`` is used.

2340 :type snap:

2341 :py:class:`tuple` of 2 callables

2342

2343 :param include_last:

2344 If ``True``, add one more sample to the returned traces (the sample

2345 which would be the first sample of a query with ``tmin`` set to the

2346 current value of ``tmax``).

2347 :type include_last:

2348 bool

2349

2350 :param load_data:

2351 If ``True``, waveform data samples are read from files (or cache).

2352 If ``False``, meta-information-only traces are returned (dummy

2353 traces with no data samples).

2354 :type load_data:

2355 bool

2356

2357 :param accessor_id:

2358 Name of consumer on who's behalf data is accessed. Used in cache

2359 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key

2360 to distinguish different points of extraction for the decision of

2361 when to release cached waveform data. Should be used when data is

2362 alternately extracted from more than one region / selection.

2363 :type accessor_id:

2364 str

2365

2366 :param channel_priorities:

2367 List of band/instrument code combinations to try. For example,

2368 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and

2369 then fallback to ``BH?`` if these are not available. The first

2370 matching waveforms are returned. Use in combination with

2371 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample

2372 rate.

2373 :type channel_priorities:

2374 :py:class:`list` of :py:class:`str`

2375

2376 See :py:meth:`iter_nuts` for details on time span matching.

2377

2378 Loaded data is kept in memory (at least) until

2379 :py:meth:`clear_accessor` has been called or

2380 :py:meth:`advance_accessor` has been called two consecutive times

2381 without data being accessed between the two calls (by this accessor).

2382 Data may still be further kept in the memory cache if held alive by

2383 consumers with a different ``accessor_id``.

2384 '''

2385

2386 tmin, tmax, codes = self._get_selection_args(

2387 WAVEFORM, obj, tmin, tmax, time, codes)

2388

2389 if channel_priorities is not None:

2390 return self._get_waveforms_prioritized(

2391 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude,

2392 sample_rate_min=sample_rate_min,

2393 sample_rate_max=sample_rate_max,

2394 uncut=uncut, want_incomplete=want_incomplete, degap=degap,

2395 maxgap=maxgap, maxlap=maxlap, snap=snap,

2396 include_last=include_last, load_data=load_data,

2397 accessor_id=accessor_id, operator_params=operator_params,

2398 order_only=order_only, channel_priorities=channel_priorities)

2399

2400 kinds = ['waveform']

2401 if self.downloads_enabled:

2402 kinds.append('waveform_promise')

2403

2404 self_tmin, self_tmax = self.get_time_span(kinds)

2405

2406 if None in (self_tmin, self_tmax):

2407 logger.warning(

2408 'No waveforms available.')

2409 return []

2410

2411 tmin = tmin if tmin is not None else self_tmin

2412 tmax = tmax if tmax is not None else self_tmax

2413

2414 if codes is not None and len(codes) == 1:

2415 # TODO: fix for multiple / mixed codes

2416 operator = self.get_operator(codes[0])

2417 if operator is not None:

2418 return operator.get_waveforms(

2419 self, codes[0],

2420 tmin=tmin, tmax=tmax,

2421 uncut=uncut, want_incomplete=want_incomplete, degap=degap,

2422 maxgap=maxgap, maxlap=maxlap, snap=snap,

2423 include_last=include_last, load_data=load_data,

2424 accessor_id=accessor_id, params=operator_params)

2425

2426 nuts = self.get_waveform_nuts(

2427 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min,

2428 sample_rate_max, order_only=order_only)

2429

2430 if order_only:

2431 return []

2432

2433 if load_data:

2434 traces = [

2435 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]

2436

2437 else:

2438 traces = [

2439 trace.Trace(**nut.trace_kwargs) for nut in nuts]

2440

2441 if uncut:

2442 return traces

2443

2444 if snap is None:

2445 snap = (round, round)

2446

2447 chopped = []

2448 for tr in traces:

2449 if not load_data and tr.ydata is not None:

2450 tr = tr.copy(data=False)

2451 tr.ydata = None

2452

2453 try:

2454 chopped.append(tr.chop(

2455 tmin, tmax,

2456 inplace=False,

2457 snap=snap,

2458 include_last=include_last))

2459

2460 except trace.NoData:

2461 pass

2462

2463 processed = self._process_chopped(

2464 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)

2465

2466 return processed

2467

2468 def _get_waveforms_prioritized(

2469 self, tmin=None, tmax=None, codes=None, codes_exclude=None,

2470 channel_priorities=None, **kwargs):

2471

2472 trs_all = []

2473 codes_have = set()

2474 for channel in channel_priorities:

2475 assert len(channel) == 2

2476 if codes is not None:

2477 codes_now = [

2478 codes_.replace(channel=channel+'?') for codes_ in codes]

2479 else:

2480 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?')

2481

2482 codes_exclude_now = list(set(

2483 codes_.replace(channel=channel+codes_.channel[-1])

2484 for codes_ in codes_have))

2485

2486 if codes_exclude:

2487 codes_exclude_now.extend(codes_exclude)

2488

2489 trs = self.get_waveforms(

2490 tmin=tmin,

2491 tmax=tmax,

2492 codes=codes_now,

2493 codes_exclude=codes_exclude_now,

2494 **kwargs)

2495

2496 codes_have.update(set(tr.codes for tr in trs))

2497 trs_all.extend(trs)

2498

2499 return trs_all

2500

2501 @filldocs

2502 def chopper_waveforms(

2503 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2504 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,

2505 tinc=None, tpad=0.,

2506 want_incomplete=True, snap_window=False,

2507 degap=True, maxgap=5, maxlap=None,

2508 snap=None, include_last=False, load_data=True,

2509 accessor_id=None, clear_accessor=True, operator_params=None,

2510 grouping=None, channel_priorities=None):

2511

2512 '''

2513 Iterate window-wise over waveform archive.

2514

2515 %(query_args)s

2516

2517 :param tinc:

2518 Time increment (window shift time) (default uses ``tmax-tmin``).

2519 :type tinc:

2520 :py:func:`~pyrocko.util.get_time_float`

2521

2522 :param tpad:

2523 Padding time appended on either side of the data window (window

2524 overlap is ``2*tpad``).

2525 :type tpad:

2526 :py:func:`~pyrocko.util.get_time_float`

2527

2528 :param want_incomplete:

2529 If ``True``, gappy/incomplete traces are included in the result.

2530 :type want_incomplete:

2531 bool

2532

2533 :param snap_window:

2534 If ``True``, start time windows at multiples of tinc with respect

2535 to system time zero.

2536 :type snap_window:

2537 bool

2538

2539 :param degap:

2540 If ``True``, connect traces and remove gaps and overlaps.

2541 :type degap:

2542 bool

2543

2544 :param maxgap:

2545 Maximum gap size in samples which is filled with interpolated

2546 samples when ``degap`` is ``True``.

2547 :type maxgap:

2548 int

2549

2550 :param maxlap:

2551 Maximum overlap size in samples which is removed when ``degap`` is

2552 ``True``.

2553 :type maxlap:

2554 int

2555

2556 :param snap:

2557 Rounding functions used when computing sample index from time

2558 instance, for trace start and trace end, respectively. By default,

2559 ``(round, round)`` is used.

2560 :type snap:

2561 :py:class:`tuple` of 2 callables

2562

2563 :param include_last:

2564 If ``True``, add one more sample to the returned traces (the sample

2565 which would be the first sample of a query with ``tmin`` set to the

2566 current value of ``tmax``).

2567 :type include_last:

2568 bool

2569

2570 :param load_data:

2571 If ``True``, waveform data samples are read from files (or cache).

2572 If ``False``, meta-information-only traces are returned (dummy

2573 traces with no data samples).

2574 :type load_data:

2575 bool

2576

2577 :param accessor_id:

2578 Name of consumer on who's behalf data is accessed. Used in cache

2579 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key

2580 to distinguish different points of extraction for the decision of

2581 when to release cached waveform data. Should be used when data is

2582 alternately extracted from more than one region / selection.

2583 :type accessor_id:

2584 str

2585

2586 :param clear_accessor:

2587 If ``True`` (default), :py:meth:`clear_accessor` is called when the

2588 chopper finishes. Set to ``False`` to keep loaded waveforms in

2589 memory when the generator returns.

2590 :type clear_accessor:

2591 bool

2592

2593 :param grouping:

2594 By default, traversal over the data is over time and all matching

2595 traces of a time window are yielded. Using this option, it is

2596 possible to traverse the data first by group (e.g. station or

2597 network) and second by time. This can reduce the number of traces

2598 in each batch and thus reduce the memory footprint of the process.

2599 :type grouping:

2600 :py:class:`~pyrocko.squirrel.operators.base.Grouping`

2601

2602 :yields:

2603 For each extracted time window or waveform group a

2604 :py:class:`Batch` object is yielded.

2605

2606 See :py:meth:`iter_nuts` for details on time span matching.

2607 '''

2608

2609 tmin, tmax, codes = self._get_selection_args(

2610 WAVEFORM, obj, tmin, tmax, time, codes)

2611

2612 kinds = ['waveform']

2613 if self.downloads_enabled:

2614 kinds.append('waveform_promise')

2615

2616 self_tmin, self_tmax = self.get_time_span(kinds)

2617

2618 if None in (self_tmin, self_tmax):

2619 logger.warning(

2620 'Content has undefined time span. No waveforms and no '

2621 'waveform promises?')

2622 return

2623

2624 if snap_window and tinc is not None:

2625 tmin = tmin if tmin is not None else self_tmin

2626 tmax = tmax if tmax is not None else self_tmax

2627 tmin = math.floor(tmin / tinc) * tinc

2628 tmax = math.ceil(tmax / tinc) * tinc

2629 else:

2630 tmin = tmin if tmin is not None else self_tmin + tpad

2631 tmax = tmax if tmax is not None else self_tmax - tpad

2632

2633 if tinc is None:

2634 tinc = tmax - tmin

2635 nwin = 1

2636 elif tinc == 0.0:

2637 nwin = 1

2638 else:

2639 eps = 1e-6

2640 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1)

2641

2642 try:

2643 if accessor_id is None:

2644 accessor_id = 'chopper%i' % self._n_choppers_active

2645

2646 self._n_choppers_active += 1

2647

2648 if grouping is None:

2649 codes_list = [codes]

2650 else:

2651 operator = Operator(

2652 filtering=CodesPatternFiltering(codes=codes),

2653 grouping=grouping)

2654

2655 available = set(self.get_codes(kind='waveform'))

2656 if self.downloads_enabled:

2657 available.update(self.get_codes(kind='waveform_promise'))

2658 operator.update_mappings(sorted(available))

2659

2660 codes_list = [

2661 codes_patterns_list(scl)

2662 for scl in operator.iter_in_codes()]

2663

2664 ngroups = len(codes_list)

2665 for igroup, scl in enumerate(codes_list):

2666 for iwin in range(nwin):

2667 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)

2668

2669 chopped = self.get_waveforms(

2670 tmin=wmin-tpad,

2671 tmax=wmax+tpad,

2672 codes=scl,

2673 codes_exclude=codes_exclude,

2674 sample_rate_min=sample_rate_min,

2675 sample_rate_max=sample_rate_max,

2676 snap=snap,

2677 include_last=include_last,

2678 load_data=load_data,

2679 want_incomplete=want_incomplete,

2680 degap=degap,

2681 maxgap=maxgap,

2682 maxlap=maxlap,

2683 accessor_id=accessor_id,

2684 operator_params=operator_params,

2685 channel_priorities=channel_priorities)

2686

2687 self.advance_accessor(accessor_id)

2688

2689 yield Batch(

2690 tmin=wmin,

2691 tmax=wmax,

2692 tpad=tpad,

2693 i=iwin,

2694 n=nwin,

2695 igroup=igroup,

2696 ngroups=ngroups,

2697 traces=chopped)

2698

2699 finally:

2700 self._n_choppers_active -= 1

2701 if clear_accessor:

2702 self.clear_accessor(accessor_id, 'waveform')

2703

2704 def _process_chopped(

2705 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):

2706

2707 chopped.sort(key=lambda a: a.full_id)

2708 if degap:

2709 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)

2710

2711 if not want_incomplete:

2712 chopped_weeded = []

2713 for tr in chopped:

2714 emin = tr.tmin - tmin

2715 emax = tr.tmax + tr.deltat - tmax

2716 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):

2717 chopped_weeded.append(tr)

2718

2719 elif degap:

2720 if (0. < emin <= 5. * tr.deltat

2721 and -5. * tr.deltat <= emax < 0.):

2722

2723 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')

2724 chopped_weeded.append(tr)

2725

2726 chopped = chopped_weeded

2727

2728 return chopped

2729

2730 def _get_pyrocko_stations(

2731 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2732 on_error='raise'):

2733

2734 from pyrocko import model as pmodel

2735

2736 if codes is not None:

2737 codes = codes_patterns_for_kind(STATION, codes)

2738

2739 by_nsl = defaultdict(lambda: (list(), list()))

2740 for station in self.get_stations(obj, tmin, tmax, time, codes):

2741 sargs = station._get_pyrocko_station_args()

2742 by_nsl[station.codes.nsl][0].append(sargs)

2743

2744 if codes is not None:

2745 codes = [model.CodesNSLCE(c) for c in codes]

2746

2747 for channel in self.get_channels(obj, tmin, tmax, time, codes):

2748 sargs = channel._get_pyrocko_station_args()

2749 sargs_list, channels_list = by_nsl[channel.codes.nsl]

2750 sargs_list.append(sargs)

2751 channels_list.append(channel)

2752

2753 pstations = []

2754 nsls = list(by_nsl.keys())

2755 nsls.sort()

2756 for nsl in nsls:

2757 sargs_list, channels_list = by_nsl[nsl]

2758 sargs = util.consistency_merge(

2759 [('',) + x for x in sargs_list],

2760 error=on_error)

2761

2762 by_c = defaultdict(list)

2763 for ch in channels_list:

2764 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())

2765

2766 chas = list(by_c.keys())

2767 chas.sort()

2768 pchannels = []

2769 for cha in chas:

2770 list_of_cargs = by_c[cha]

2771 cargs = util.consistency_merge(

2772 [('',) + x for x in list_of_cargs],

2773 error=on_error)

2774 pchannels.append(pmodel.Channel(*cargs))

2775

2776 pstations.append(

2777 pmodel.Station(*sargs, channels=pchannels))

2778

2779 return pstations

2780

2781 @property

2782 def pile(self):

2783

2784 '''

2785 Emulates the older :py:class:`pyrocko.pile.Pile` interface.

2786

2787 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,

2788 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods

2789 but uses the fluffy power of the Squirrel under the hood.

2790

2791 This interface can be used as a drop-in replacement for piles which are

2792 used in existing scripts and programs for efficient waveform data

2793 access. The Squirrel-based pile scales better for large datasets. Newer

2794 scripts should use Squirrel's native methods to avoid the emulation

2795 overhead.

2796 '''

2797 from . import pile

2798

2799 if self._pile is None:

2800 self._pile = pile.Pile(self)

2801

2802 return self._pile

2803

2804 def snuffle(self, **kwargs):

2805 '''

2806 Look at dataset in Snuffler.

2807 '''

2808 self.pile.snuffle(**kwargs)

2809

2810 def _gather_codes_keys(self, kind, gather, selector):

2811 return set(

2812 gather(codes)

2813 for codes in self.iter_codes(kind)

2814 if selector is None or selector(codes))

2815

2816 def __str__(self):

2817 return str(self.get_stats())

2818

2819 def get_coverage(

2820 self, kind, tmin=None, tmax=None, codes=None, limit=None):

2821

2822 '''

2823 Get coverage information.

2824

2825 Get information about strips of gapless data coverage.

2826

2827 :param kind:

2828 Content kind to be queried.

2829 :type kind:

2830 str

2831

2832 :param tmin:

2833 Start time of query interval.

2834 :type tmin:

2835 :py:func:`~pyrocko.util.get_time_float`

2836

2837 :param tmax:

2838 End time of query interval.

2839 :type tmax:

2840 :py:func:`~pyrocko.util.get_time_float`

2841

2842 :param codes:

2843 If given, restrict query to given content codes patterns.

2844 :type codes:

2845 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`

2846 objects appropriate for the queried content type, or anything which

2847 can be converted to such objects.

2848

2849 :param limit:

2850 Limit query to return only up to a given maximum number of entries

2851 per matching time series (without setting this option, very gappy

2852 data could cause the query to execute for a very long time).

2853 :type limit:

2854 int

2855

2856 :returns:

2857 Information about time spans covered by the requested time series

2858 data.

2859 :rtype:

2860 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage`

2861 '''

2862

2863 tmin_seconds, tmin_offset = model.tsplit(tmin)

2864 tmax_seconds, tmax_offset = model.tsplit(tmax)

2865 kind_id = to_kind_id(kind)

2866

2867 codes_info = list(self._iter_codes_info(kind=kind))

2868

2869 kdata_all = []

2870 if codes is None:

2871 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:

2872 kdata_all.append(

2873 (codes_entry, kind_codes_id, codes_entry, deltat))

2874

2875 else:

2876 for codes_entry in codes:

2877 pattern = to_codes(kind_id, codes_entry)

2878 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:

2879 if model.match_codes(pattern, codes_entry):

2880 kdata_all.append(

2881 (pattern, kind_codes_id, codes_entry, deltat))

2882

2883 kind_codes_ids = [x[1] for x in kdata_all]

2884

2885 counts_at_tmin = {}

2886 if tmin is not None:

2887 for nut in self.iter_nuts(

2888 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):

2889

2890 k = nut.codes, nut.deltat

2891 if k not in counts_at_tmin:

2892 counts_at_tmin[k] = 0

2893

2894 counts_at_tmin[k] += 1

2895

2896 coverages = []

2897 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:

2898 entry = [pattern, codes_entry, deltat, None, None, []]

2899 for i, order in [(0, 'ASC'), (1, 'DESC')]:

2900 sql = self._sql('''

2901 SELECT

2902 time_seconds,

2903 time_offset

2904 FROM %(db)s.%(coverage)s

2905 WHERE

2906 kind_codes_id == ?

2907 ORDER BY

2908 kind_codes_id ''' + order + ''',

2909 time_seconds ''' + order + ''',

2910 time_offset ''' + order + '''

2911 LIMIT 1

2912 ''')

2913

2914 for row in self._conn.execute(sql, [kind_codes_id]):

2915 entry[3+i] = model.tjoin(row[0], row[1])

2916

2917 if None in entry[3:5]:

2918 continue

2919

2920 args = [kind_codes_id]

2921

2922 sql_time = ''

2923 if tmin is not None:

2924 # intentionally < because (== tmin) is queried from nuts

2925 sql_time += ' AND ( ? < time_seconds ' \

2926 'OR ( ? == time_seconds AND ? < time_offset ) ) '

2927 args.extend([tmin_seconds, tmin_seconds, tmin_offset])

2928

2929 if tmax is not None:

2930 sql_time += ' AND ( time_seconds < ? ' \

2931 'OR ( ? == time_seconds AND time_offset <= ? ) ) '

2932 args.extend([tmax_seconds, tmax_seconds, tmax_offset])

2933

2934 sql_limit = ''

2935 if limit is not None:

2936 sql_limit = ' LIMIT ?'

2937 args.append(limit)

2938

2939 sql = self._sql('''

2940 SELECT

2941 time_seconds,

2942 time_offset,

2943 step

2944 FROM %(db)s.%(coverage)s

2945 WHERE

2946 kind_codes_id == ?

2947 ''' + sql_time + '''

2948 ORDER BY

2949 kind_codes_id,

2950 time_seconds,

2951 time_offset

2952 ''' + sql_limit)

2953

2954 rows = list(self._conn.execute(sql, args))

2955

2956 if limit is not None and len(rows) == limit:

2957 entry[-1] = None

2958 else:

2959 counts = counts_at_tmin.get((codes_entry, deltat), 0)

2960 tlast = None

2961 if tmin is not None:

2962 entry[-1].append((tmin, counts))

2963 tlast = tmin

2964

2965 for row in rows:

2966 t = model.tjoin(row[0], row[1])

2967 counts += row[2]

2968 entry[-1].append((t, counts))

2969 tlast = t

2970

2971 if tmax is not None and (tlast is None or tlast != tmax):

2972 entry[-1].append((tmax, counts))

2973

2974 coverages.append(model.Coverage.from_values(entry + [kind_id]))

2975

2976 return coverages

2977

2978 def get_stationxml(

2979 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2980 level='response', on_error='raise'):

2981

2982 '''

2983 Get station/channel/response metadata in StationXML representation.

2984

2985 %(query_args)s

2986

2987 :returns:

2988 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.

2989 '''

2990

2991 if level not in ('network', 'station', 'channel', 'response'):

2992 raise ValueError('Invalid level: %s' % level)

2993

2994 tmin, tmax, codes = self._get_selection_args(

2995 CHANNEL, obj, tmin, tmax, time, codes)

2996

2997 def tts(t):

2998 if t is None:

2999 return '<none>'

3000 else:

3001 return util.tts(t, format='%Y-%m-%d %H:%M:%S')

3002

3003 if on_error == 'ignore':

3004 def handle_error(exc):

3005 pass

3006

3007 elif on_error == 'warn':

3008 def handle_error(exc):

3009 logger.warning(str(exc))

3010

3011 elif on_error == 'raise':

3012 def handle_error(exc):

3013 raise exc

3014

3015 def use_first(node_type_name, codes, k, group):

3016 if on_error == 'warn':

3017 logger.warning(

3018 'Duplicates for %s %s, %s - %s -> using first' % (

3019 node_type_name,

3020 '.'.join(codes),

3021 tts(k[0]), tts(k[1])))

3022

3023 return group[0]

3024

3025 def deduplicate(node_type_name, codes, nodes):

3026 groups = defaultdict(list)

3027 for node in nodes:

3028 k = (node.start_date, node.end_date)

3029 groups[k].append(node)

3030

3031 return [

3032 use_first(node_type_name, codes, k, group)

3033 for (k, group) in groups.items()]

3034

3035 filtering = CodesPatternFiltering(codes=codes)

3036

3037 nslcs = list(set(

3038 codes.nslc for codes in

3039 filtering.filter(self.get_codes(kind='channel'))))

3040

3041 from pyrocko.io import stationxml as sx

3042

3043 networks = []

3044 for net, stas in prefix_tree(nslcs):

3045 network = sx.Network(code=net)

3046 networks.append(network)

3047

3048 if level not in ('station', 'channel', 'response'):

3049 continue

3050

3051 for sta, locs in stas:

3052 stations = self.get_stations(

3053 tmin=tmin,

3054 tmax=tmax,

3055 codes=(net, sta, '*'),

3056 model='stationxml')

3057

3058 if on_error != 'raise':

3059 stations = deduplicate(

3060 'Station', (net, sta), stations)

3061

3062 errors = sx.check_overlaps(

3063 'Station', (net, sta), stations)

3064

3065 if errors:

3066 handle_error(error.Duplicate(

3067 'Overlapping/duplicate station info:\n %s'

3068 % '\n '.join(errors)))

3069

3070 network.station_list.extend(stations)

3071

3072 if level not in ('channel', 'response'):

3073 continue

3074

3075 for loc, chas in locs:

3076 for cha, _ in chas:

3077 channels = self.get_channels(

3078 tmin=tmin,

3079 tmax=tmax,

3080 codes=(net, sta, loc, cha),

3081 model='stationxml')

3082

3083 if on_error != 'raise':

3084 channels = deduplicate(

3085 'Channel', (net, sta, loc, cha), channels)

3086

3087 errors = sx.check_overlaps(

3088 'Channel', (net, sta, loc, cha), channels)

3089

3090 if errors:

3091 handle_error(error.Duplicate(

3092 'Overlapping/duplicate channel info:\n %s'

3093 % '\n '.join(errors)))

3094

3095 for channel in channels:

3096 station = sx.find_containing(stations, channel)

3097 if station is not None:

3098 station.channel_list.append(channel)

3099 else:

3100 handle_error(error.NotAvailable(

3101 'No station or station epoch found '

3102 'for channel: %s' % '.'.join(

3103 (net, sta, loc, cha))))

3104

3105 continue

3106

3107 if level != 'response':

3108 continue

3109

3110 try:

3111 response_sq, response_sx = self.get_response(

3112 codes=(net, sta, loc, cha),

3113 tmin=channel.start_date,

3114 tmax=channel.end_date,

3115 model='stationxml+',

3116 on_duplicate=on_error)

3117

3118 except error.NotAvailable as e:

3119 handle_error(e)

3120 continue

3121

3122 if not (

3123 sx.eq_open(

3124 channel.start_date, response_sq.tmin)

3125 and sx.eq_open(

3126 channel.end_date, response_sq.tmax)):

3127

3128 handle_error(error.Inconsistencies(

3129 'Response time span does not match '

3130 'channel time span: %s' % '.'.join(

3131 (net, sta, loc, cha))))

3132

3133 channel.response = response_sx

3134

3135 return sx.FDSNStationXML(

3136 source='Generated by Pyrocko Squirrel.',

3137 network_list=networks)

3138

3139 def add_operator(self, op):

3140 self._operators.append(op)

3141

3142 def update_operator_mappings(self):

3143 available = self.get_codes(kind=('channel'))

3144

3145 for operator in self._operators:

3146 operator.update_mappings(available, self._operator_registry)

3147

3148 def iter_operator_mappings(self):

3149 for operator in self._operators:

3150 for in_codes, out_codes in operator.iter_mappings():

3151 yield operator, in_codes, out_codes

3152

3153 def get_operator_mappings(self):

3154 return list(self.iter_operator_mappings())

3155

3156 def get_operator(self, codes):

3157 try:

3158 return self._operator_registry[codes][0]

3159 except KeyError:

3160 return None

3161

3162 def get_operator_group(self, codes):

3163 try:

3164 return self._operator_registry[codes]

3165 except KeyError:

3166 return None, (None, None, None)

3167

3168 def iter_operator_codes(self):

3169 for _, _, out_codes in self.iter_operator_mappings():

3170 for codes in out_codes:

3171 yield codes

3172

3173 def get_operator_codes(self):

3174 return list(self.iter_operator_codes())

3175

3176 def print_tables(self, table_names=None, stream=None):

3177 '''

3178 Dump raw database tables in textual form (for debugging purposes).

3179

3180 :param table_names:

3181 Names of tables to be dumped or ``None`` to dump all.

3182 :type table_names:

3183 :py:class:`list` of :py:class:`str`

3184

3185 :param stream:

3186 Open file or ``None`` to dump to standard output.

3187 '''

3188

3189 if stream is None:

3190 stream = sys.stdout

3191

3192 if isinstance(table_names, str):

3193 table_names = [table_names]

3194

3195 if table_names is None:

3196 table_names = [

3197 'selection_file_states',

3198 'selection_nuts',

3199 'selection_kind_codes_count',

3200 'files', 'nuts', 'kind_codes', 'kind_codes_count']

3201

3202 m = {

3203 'selection_file_states': '%(db)s.%(file_states)s',

3204 'selection_nuts': '%(db)s.%(nuts)s',

3205 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',

3206 'files': 'files',

3207 'nuts': 'nuts',

3208 'kind_codes': 'kind_codes',

3209 'kind_codes_count': 'kind_codes_count'}

3210

3211 for table_name in table_names:

3212 self._database.print_table(

3213 m[table_name] % self._names, stream=stream)

3214

3215

3216class SquirrelStats(Object):

3217 '''

3218 Container to hold statistics about contents available from a Squirrel.

3219

3220 See also :py:meth:`Squirrel.get_stats`.

3221 '''

3222

3223 nfiles = Int.T(

3224 help='Number of files in selection.')

3225 nnuts = Int.T(

3226 help='Number of index nuts in selection.')

3227 codes = List.T(

3228 Tuple.T(content_t=String.T()),

3229 help='Available code sequences in selection, e.g. '

3230 '(agency, network, station, location) for stations nuts.')

3231 kinds = List.T(

3232 String.T(),

3233 help='Available content types in selection.')

3234 total_size = Int.T(

3235 help='Aggregated file size of files is selection.')

3236 counts = Dict.T(

3237 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),

3238 help='Breakdown of how many nuts of any content type and code '

3239 'sequence are available in selection, ``counts[kind][codes]``.')

3240 time_spans = Dict.T(

3241 String.T(), Tuple.T(content_t=Timestamp.T()),

3242 help='Time spans by content type.')

3243 sources = List.T(

3244 String.T(),

3245 help='Descriptions of attached sources.')

3246 operators = List.T(

3247 String.T(),

3248 help='Descriptions of attached operators.')

3249

3250 def __str__(self):

3251 kind_counts = dict(

3252 (kind, sum(self.counts[kind].values())) for kind in self.kinds)

3253

3254 scodes = model.codes_to_str_abbreviated(self.codes)

3255

3256 ssources = '<none>' if not self.sources else '\n' + '\n'.join(

3257 ' ' + s for s in self.sources)

3258

3259 soperators = '<none>' if not self.operators else '\n' + '\n'.join(

3260 ' ' + s for s in self.operators)

3261

3262 def stime(t):

3263 return util.tts(t) if t is not None and t not in (

3264 model.g_tmin, model.g_tmax) else '<none>'

3265

3266 def stable(rows):

3267 ns = [max(len(w) for w in col) for col in zip(*rows)]

3268 return '\n'.join(

3269 ' '.join(w.ljust(n) for n, w in zip(ns, row))

3270 for row in rows)

3271

3272 def indent(s):

3273 return '\n'.join(' '+line for line in s.splitlines())

3274

3275 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(

3276 kind + ':',

3277 str(kind_counts[kind]),

3278 stime(self.time_spans[kind][0]),

3279 '-',

3280 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))

3281

3282 s = '''

3283Number of files: %i

3284Total size of known files: %s

3285Number of index nuts: %i

3286Available content kinds: %s

3287Available codes: %s

3288Sources: %s

3289Operators: %s''' % (

3290 self.nfiles,

3291 util.human_bytesize(self.total_size),

3292 self.nnuts,

3293 stspans, scodes, ssources, soperators)

3294

3295 return s.lstrip()

3296

3297

3298__all__ = [

3299 'Squirrel',

3300 'SquirrelStats',

3301]