Coverage for /usr/local/lib/python3.9/dist-packages/pyrocko/squirrel/base.py: 83%

1# http://pyrocko.org - GPLv3

3# The Pyrocko Developers, 21st Century

4# ---|P------/S----------~Lg----------

6from __future__ import absolute_import, print_function

8import sys

9import os

11import math

12import logging

13import threading

14import queue

15from collections import defaultdict

17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict

18from pyrocko import util, trace

19from pyrocko.progress import progress

21from . import model, io, cache, dataset

23from .model import to_kind_id, separator, WaveformOrder

24from .client import fdsn, catalog

25from .selection import Selection, filldocs

26from .database import abspath

27from . import client, environment, error

29logger = logging.getLogger('psq.base')

31guts_prefix = 'squirrel'

34def make_task(*args):

35 return progress.task(*args, logger=logger)

38def lpick(condition, seq):

39 ft = [], []

40 for ele in seq:

41 ft[int(bool(condition(ele)))].append(ele)

43 return ft

46def codes_fill(n, codes):

47 return codes[:n] + ('*',) * (n-len(codes))

50c_kind_to_ncodes = {

51 'station': 4,

52 'channel': 6,

53 'response': 6,

54 'waveform': 6,

55 'event': 1,

56 'waveform_promise': 6,

57 'undefined': 1}

60c_inflated = ['', '*', '*', '*', '*', '*']

61c_offsets = [0, 2, 1, 1, 1, 1, 0]

64def codes_inflate(codes):

65 codes = codes[:6]

66 inflated = list(c_inflated)

67 ncodes = len(codes)

68 offset = c_offsets[ncodes]

69 inflated[offset:offset+ncodes] = codes

70 return inflated

73def codes_inflate2(codes):

74 inflated = list(c_inflated)

75 ncodes = len(codes)

76 inflated[:ncodes] = codes

77 return tuple(inflated)

80def codes_patterns_for_kind(kind, codes):

81 if not codes:

82 return []

84 if not isinstance(codes[0], str):

85 out = []

86 for subcodes in codes:

87 out.extend(codes_patterns_for_kind(kind, subcodes))

88 return out

90 if kind in ('event', 'undefined'):

91 return [codes]

93 cfill = codes_inflate(codes)[:c_kind_to_ncodes[kind]]

95 if kind == 'station':

96 cfill2 = list(cfill)

97 cfill2[3] = '[*]'

98 return [cfill, cfill2]

100 return [cfill]

101

102

103def group_channels(channels):

104 groups = defaultdict(list)

105 for channel in channels:

106 codes = channel.codes

107 gcodes = codes[:-1] + (codes[-1][:-1],)

108 groups[gcodes].append(channel)

109

110 return groups

111

112

113def pyrocko_station_from_channel_group(group, extra_args):

114 list_of_args = [channel._get_pyrocko_station_args() for channel in group]

115 args = util.consistency_merge(list_of_args + extra_args)

116 from pyrocko import model as pmodel

117 return pmodel.Station(

118 network=args[0],

119 station=args[1],

120 location=args[2],

121 lat=args[3],

122 lon=args[4],

123 elevation=args[5],

124 depth=args[6],

125 channels=[ch.get_pyrocko_channel() for ch in group])

126

127

128def blocks(tmin, tmax, deltat, nsamples_block=100000):

129 tblock = deltat * nsamples_block

130 iblock_min = int(math.floor(tmin / tblock))

131 iblock_max = int(math.ceil(tmax / tblock))

132 for iblock in range(iblock_min, iblock_max):

133 yield iblock * tblock, (iblock+1) * tblock

134

135

136def gaps(avail, tmin, tmax):

137 assert tmin < tmax

138

139 data = [(tmax, 1), (tmin, -1)]

140 for (tmin_a, tmax_a) in avail:

141 assert tmin_a < tmax_a

142 data.append((tmin_a, 1))

143 data.append((tmax_a, -1))

144

145 data.sort()

146 s = 1

147 gaps = []

148 tmin_g = None

149 for t, x in data:

150 if s == 1 and x == -1:

151 tmin_g = t

152 elif s == 0 and x == 1 and tmin_g is not None:

153 tmax_g = t

154 if tmin_g != tmax_g:

155 gaps.append((tmin_g, tmax_g))

157 s += x

159 return gaps

162def order_key(order):

163 return (order.codes, order.tmin, order.tmax)

164

165

166class Batch(object):

167 '''

168 Batch of waveforms from window wise data extraction.

169

170 Encapsulates state and results yielded for each window in window-wise

171 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.

172

173 *Attributes:*

174

175 .. py:attribute:: tmin

176

177 Start of this time window.

178

179 .. py:attribute:: tmax

180

181 End of this time window.

182

183 .. py:attribute:: i

184

185 Index of this time window in sequence.

186

187 .. py:attribute:: n

188

189 Total number of time windows in sequence.

190

191 .. py:attribute:: traces

192

193 Extracted waveforms for this time window.

194 '''

195

196 def __init__(self, tmin, tmax, i, n, traces):

197 self.tmin = tmin

198 self.tmax = tmax

199 self.i = i

200 self.n = n

201 self.traces = traces

202

203

204class Squirrel(Selection):

205 '''

206 Prompt, lazy, indexing, caching, dynamic seismological dataset access.

207

208 :param env:

209 Squirrel environment instance or directory path to use as starting

210 point for its detection. By default, the current directory is used as

211 starting point. When searching for a usable environment the directory

212 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)

213 directory is used if it exists, otherwise the parent directories are

214 search upwards for the existence of such a directory. If no such

215 directory is found, the user's global Squirrel environment

216 ``'$HOME/.pyrocko/squirrel'`` is used.

217 :type env:

218 :py:class:`~pyrocko.squirrel.environment.Environment` or

219 :py:class:`str`

220

221 :param database:

222 Database instance or path to database. By default the

223 database found in the detected Squirrel environment is used.

224 :type database:

225 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`

226

227 :param cache_path:

228 Directory path to use for data caching. By default, the ``'cache'``

229 directory in the detected Squirrel environment is used.

230 :type cache_path:

231 :py:class:`str`

232

233 :param persistent:

234 If given a name, create a persistent selection.

235 :type persistent:

236 :py:class:`str`

237

238 This is the central class of the Squirrel framework. It provides a unified

239 interface to query and access seismic waveforms, station meta-data and

240 event information from local file collections and remote data sources. For

241 prompt responses, a profound database setup is used under the hood. To

242 speed up assemblage of ad-hoc data selections, files are indexed on first

243 use and the extracted meta-data is remembered in the database for

244 subsequent accesses. Bulk data is lazily loaded from disk and remote

245 sources, just when requested. Once loaded, data is cached in memory to

246 expedite typical access patterns. Files and data sources can be dynamically

247 added to and removed from the Squirrel selection at runtime.

248

249 Queries are restricted to the contents of the files currently added to the

250 Squirrel selection (usually a subset of the file meta-information

251 collection in the database). This list of files is referred to here as the

252 "selection". By default, temporary tables are created in the attached

253 database to hold the names of the files in the selection as well as various

254 indices and counters. These tables are only visible inside the application

255 which created them and are deleted when the database connection is closed

256 or the application exits. To create a selection which is not deleted at

257 exit, supply a name to the ``persistent`` argument of the Squirrel

258 constructor. Persistent selections are shared among applications using the

259 same database.

260

261 **Method summary**

262

263 Some of the methods are implemented in :py:class:`Squirrel`'s base class

264 :py:class:`~pyrocko.squirrel.selection.Selection`.

265

266 .. autosummary::

267

268 ~Squirrel.add

269 ~Squirrel.add_source

270 ~Squirrel.add_fdsn

271 ~Squirrel.add_catalog

272 ~Squirrel.add_dataset

273 ~Squirrel.add_virtual

274 ~Squirrel.update

275 ~Squirrel.update_waveform_promises

276 ~Squirrel.advance_accessor

277 ~Squirrel.clear_accessor

278 ~Squirrel.reload

279 ~pyrocko.squirrel.selection.Selection.iter_paths

280 ~Squirrel.iter_nuts

281 ~Squirrel.iter_kinds

282 ~Squirrel.iter_deltats

283 ~Squirrel.iter_codes

284 ~Squirrel.iter_counts

285 ~pyrocko.squirrel.selection.Selection.get_paths

286 ~Squirrel.get_nuts

287 ~Squirrel.get_kinds

288 ~Squirrel.get_deltats

289 ~Squirrel.get_codes

290 ~Squirrel.get_counts

291 ~Squirrel.get_time_span

292 ~Squirrel.get_deltat_span

293 ~Squirrel.get_nfiles

294 ~Squirrel.get_nnuts

295 ~Squirrel.get_total_size

296 ~Squirrel.get_stats

297 ~Squirrel.get_content

298 ~Squirrel.get_stations

299 ~Squirrel.get_channels

300 ~Squirrel.get_responses

301 ~Squirrel.get_events

302 ~Squirrel.get_waveform_nuts

303 ~Squirrel.get_waveforms

304 ~Squirrel.chopper_waveforms

305 ~Squirrel.get_coverage

306 ~Squirrel.pile

307 ~Squirrel.snuffle

308 ~Squirrel.glob_codes

309 ~pyrocko.squirrel.selection.Selection.get_database

310 ~Squirrel.print_tables

311 '''

312

313 def __init__(

314 self, env=None, database=None, cache_path=None, persistent=None):

315

316 if not isinstance(env, environment.Environment):

317 env = environment.get_environment(env)

318

319 if database is None:

320 database = env.expand_path(env.database_path)

321

322 if cache_path is None:

323 cache_path = env.expand_path(env.cache_path)

324

325 if persistent is None:

326 persistent = env.persistent

327

328 Selection.__init__(

329 self, database=database, persistent=persistent)

330

331 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))

332

333 self._content_caches = {

334 'waveform': cache.ContentCache(),

335 'default': cache.ContentCache()}

336

337 self._cache_path = cache_path

338

339 self._sources = []

340 self._operators = []

341 self._operator_registry = {}

342

343 self._pile = None

344 self._n_choppers_active = 0

345

346 self._names.update({

347 'nuts': self.name + '_nuts',

348 'kind_codes_count': self.name + '_kind_codes_count',

349 'coverage': self.name + '_coverage'})

350

351 with self.transaction() as cursor:

352 self._create_tables_squirrel(cursor)

353

354 def _create_tables_squirrel(self, cursor):

355

356 cursor.execute(self._register_table(self._sql(

357 '''

358 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (

359 nut_id integer PRIMARY KEY,

360 file_id integer,

361 file_segment integer,

362 file_element integer,

363 kind_id integer,

364 kind_codes_id integer,

365 tmin_seconds integer,

366 tmin_offset integer,

367 tmax_seconds integer,

368 tmax_offset integer,

369 kscale integer)

370 ''')))

371

372 cursor.execute(self._register_table(self._sql(

373 '''

374 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (

375 kind_codes_id integer PRIMARY KEY,

376 count integer)

377 ''')))

378

379 cursor.execute(self._sql(

380 '''

381 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element

382 ON %(nuts)s (file_id, file_segment, file_element)

383 '''))

384

385 cursor.execute(self._sql(

386 '''

387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id

388 ON %(nuts)s (file_id)

389 '''))

390

391 cursor.execute(self._sql(

392 '''

393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds

394 ON %(nuts)s (kind_id, tmin_seconds)

395 '''))

396

397 cursor.execute(self._sql(

398 '''

399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds

400 ON %(nuts)s (kind_id, tmax_seconds)

401 '''))

402

403 cursor.execute(self._sql(

404 '''

405 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale

406 ON %(nuts)s (kind_id, kscale, tmin_seconds)

407 '''))

408

409 cursor.execute(self._sql(

410 '''

411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts

412 BEFORE DELETE ON main.files FOR EACH ROW

413 BEGIN

414 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

415 END

416 '''))

417

418 # trigger only on size to make silent update of mtime possible

419 cursor.execute(self._sql(

420 '''

421 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2

422 BEFORE UPDATE OF size ON main.files FOR EACH ROW

423 BEGIN

424 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

425 END

426 '''))

427

428 cursor.execute(self._sql(

429 '''

430 CREATE TRIGGER IF NOT EXISTS

431 %(db)s.%(file_states)s_delete_files

432 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW

433 BEGIN

434 DELETE FROM %(nuts)s WHERE file_id == old.file_id;

435 END

436 '''))

437

438 cursor.execute(self._sql(

439 '''

440 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes

441 BEFORE INSERT ON %(nuts)s FOR EACH ROW

442 BEGIN

443 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES

444 (new.kind_codes_id, 0);

445 UPDATE %(kind_codes_count)s

446 SET count = count + 1

447 WHERE new.kind_codes_id

448 == %(kind_codes_count)s.kind_codes_id;

449 END

450 '''))

451

452 cursor.execute(self._sql(

453 '''

454 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes

455 BEFORE DELETE ON %(nuts)s FOR EACH ROW

456 BEGIN

457 UPDATE %(kind_codes_count)s

458 SET count = count - 1

459 WHERE old.kind_codes_id

460 == %(kind_codes_count)s.kind_codes_id;

461 END

462 '''))

463

464 cursor.execute(self._register_table(self._sql(

465 '''

466 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (

467 kind_codes_id integer,

468 time_seconds integer,

469 time_offset integer,

470 step integer)

471 ''')))

472

473 cursor.execute(self._sql(

474 '''

475 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time

476 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)

477 '''))

478

479 cursor.execute(self._sql(

480 '''

481 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage

482 AFTER INSERT ON %(nuts)s FOR EACH ROW

483 BEGIN

484 INSERT OR IGNORE INTO %(coverage)s VALUES

485 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)

486 ;

487 UPDATE %(coverage)s

488 SET step = step + 1

489 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

490 AND new.tmin_seconds == %(coverage)s.time_seconds

491 AND new.tmin_offset == %(coverage)s.time_offset

492 ;

493 INSERT OR IGNORE INTO %(coverage)s VALUES

494 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)

495 ;

496 UPDATE %(coverage)s

497 SET step = step - 1

498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

499 AND new.tmax_seconds == %(coverage)s.time_seconds

500 AND new.tmax_offset == %(coverage)s.time_offset

501 ;

502 DELETE FROM %(coverage)s

503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

504 AND new.tmin_seconds == %(coverage)s.time_seconds

505 AND new.tmin_offset == %(coverage)s.time_offset

506 AND step == 0

507 ;

508 DELETE FROM %(coverage)s

509 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id

510 AND new.tmax_seconds == %(coverage)s.time_seconds

511 AND new.tmax_offset == %(coverage)s.time_offset

512 AND step == 0

513 ;

514 END

515 '''))

516

517 cursor.execute(self._sql(

518 '''

519 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage

520 BEFORE DELETE ON %(nuts)s FOR EACH ROW

521 BEGIN

522 INSERT OR IGNORE INTO %(coverage)s VALUES

523 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)

524 ;

525 UPDATE %(coverage)s

526 SET step = step - 1

527 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

528 AND old.tmin_seconds == %(coverage)s.time_seconds

529 AND old.tmin_offset == %(coverage)s.time_offset

530 ;

531 INSERT OR IGNORE INTO %(coverage)s VALUES

532 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)

533 ;

534 UPDATE %(coverage)s

535 SET step = step + 1

536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

537 AND old.tmax_seconds == %(coverage)s.time_seconds

538 AND old.tmax_offset == %(coverage)s.time_offset

539 ;

540 DELETE FROM %(coverage)s

541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

542 AND old.tmin_seconds == %(coverage)s.time_seconds

543 AND old.tmin_offset == %(coverage)s.time_offset

544 AND step == 0

545 ;

546 DELETE FROM %(coverage)s

547 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id

548 AND old.tmax_seconds == %(coverage)s.time_seconds

549 AND old.tmax_offset == %(coverage)s.time_offset

550 AND step == 0

551 ;

552 END

553 '''))

554

555 def _delete(self):

556 '''Delete database tables associated with this Squirrel.'''

557

558 for s in '''

559 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;

560 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;

561 DROP TRIGGER %(db)s.%(file_states)s_delete_files;

562 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;

563 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;

564 DROP TABLE %(db)s.%(nuts)s;

565 DROP TABLE %(db)s.%(kind_codes_count)s;

566 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;

567 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;

568 DROP TABLE IF EXISTS %(db)s.%(coverage)s;

569 '''.strip().splitlines():

570

571 self._conn.execute(self._sql(s))

572

573 Selection._delete(self)

574

575 @filldocs

576 def add(self,

577 paths,

578 kinds=None,

579 format='detect',

580 regex=None,

581 check=True,

582 progress_viewer='terminal'):

583

584 '''

585 Add files to the selection.

586

587 :param paths:

588 Iterator yielding paths to files or directories to be added to the

589 selection. Recurses into directories. If given a ``str``, it

590 is treated as a single path to be added.

591 :type paths:

592 :py:class:`list` of :py:class:`str`

593

594 :param kinds:

595 Content types to be made available through the Squirrel selection.

596 By default, all known content types are accepted.

597 :type kinds:

598 :py:class:`list` of :py:class:`str`

599

600 :param format:

601 File format identifier or ``'detect'`` to enable auto-detection

602 (available: %(file_formats)s).

603 :type format:

604 str

605

606 :param regex:

607 If not ``None``, files are only included if their paths match the

608 given regular expression pattern.

609 :type format:

610 str

611

612 :param check:

613 If ``True``, all file modification times are checked to see if

614 cached information has to be updated (slow). If ``False``, only

615 previously unknown files are indexed and cached information is used

616 for known files, regardless of file state (fast, corrresponds to

617 Squirrel's ``--optimistic`` mode). File deletions will go

618 undetected in the latter case.

619 :type check:

620 bool

621

622 :Complexity:

623 O(log N)

624 '''

625

626 if isinstance(kinds, str):

627 kinds = (kinds,)

628

629 if isinstance(paths, str):

630 paths = [paths]

631

632 kind_mask = model.to_kind_mask(kinds)

633

634 with progress.view(progress_viewer):

635 Selection.add(

636 self, util.iter_select_files(

637 paths,

638 show_progress=False,

639 regex=regex,

640 pass_through=lambda path: path.startswith('virtual:')

641 ), kind_mask, format)

642

643 self._load(check)

644 self._update_nuts()

645

646 def reload(self):

647 '''

648 Check for modifications and reindex modified files.

649

650 Based on file modification times.

651 '''

652

653 self._set_file_states_force_check()

654 self._load(check=True)

655 self._update_nuts()

656

657 def add_virtual(self, nuts, virtual_paths=None):

658 '''

659 Add content which is not backed by files.

660

661 :param nuts:

662 Content pieces to be added.

663 :type nuts:

664 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects

665

666 :param virtual_paths:

667 List of virtual paths to prevent creating a temporary list of the

668 nuts while aggregating the file paths for the selection.

669 :type virtual_paths:

670 :py:class:`list` of :py:class:`str`

671

672 Stores to the main database and the selection.

673 '''

674

675 if isinstance(virtual_paths, str):

676 virtual_paths = [virtual_paths]

677

678 if virtual_paths is None:

679 if not isinstance(nuts, list):

680 nuts = list(nuts)

681 virtual_paths = set(nut.file_path for nut in nuts)

682

683 Selection.add(self, virtual_paths)

684 self.get_database().dig(nuts)

685 self._update_nuts()

686

687 def add_volatile(self, nuts):

688 if not isinstance(nuts, list):

689 nuts = list(nuts)

690

691 paths = list(set(nut.file_path for nut in nuts))

692 io.backends.virtual.add_nuts(nuts)

693 self.add_virtual(nuts, paths)

694 self._volatile_paths.extend(paths)

695

696 def add_volatile_waveforms(self, traces):

697 '''

698 Add in-memory waveforms which will be removed when the app closes.

699 '''

700

701 name = model.random_name()

702

703 path = 'virtual:volatile:%s' % name

704

705 nuts = []

706 for itr, tr in enumerate(traces):

707 assert tr.tmin <= tr.tmax

708 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)

709 tmax_seconds, tmax_offset = model.tsplit(tr.tmax)

710 nuts.append(model.Nut(

711 file_path=path,

712 file_format='virtual',

713 file_segment=itr,

714 file_element=0,

715 codes=separator.join(tr.codes),

716 tmin_seconds=tmin_seconds,

717 tmin_offset=tmin_offset,

718 tmax_seconds=tmax_seconds,

719 tmax_offset=tmax_offset,

720 deltat=tr.deltat,

721 kind_id=to_kind_id('waveform'),

722 content=tr))

723

724 self.add_volatile(nuts)

725 return path

726

727 def _load(self, check):

728 for _ in io.iload(

729 self,

730 content=[],

731 skip_unchanged=True,

732 check=check):

733 pass

734

735 def _update_nuts(self):

736 transaction = self.transaction()

737 with make_task('Aggregating selection') as task, \

738 transaction as cursor:

739

740 self._conn.set_progress_handler(task.update, 100000)

741 nrows = cursor.execute(self._sql(

742 '''

743 INSERT INTO %(db)s.%(nuts)s

744 SELECT NULL,

745 nuts.file_id, nuts.file_segment, nuts.file_element,

746 nuts.kind_id, nuts.kind_codes_id,

747 nuts.tmin_seconds, nuts.tmin_offset,

748 nuts.tmax_seconds, nuts.tmax_offset,

749 nuts.kscale

750 FROM %(db)s.%(file_states)s

751 INNER JOIN nuts

752 ON %(db)s.%(file_states)s.file_id == nuts.file_id

753 INNER JOIN kind_codes

754 ON nuts.kind_codes_id ==

755 kind_codes.kind_codes_id

756 WHERE %(db)s.%(file_states)s.file_state != 2

757 AND (((1 << kind_codes.kind_id)

758 & %(db)s.%(file_states)s.kind_mask) != 0)

759 ''')).rowcount

760

761 task.update(nrows)

762 self._set_file_states_known(transaction)

763 self._conn.set_progress_handler(None, 0)

764

765 def add_source(self, source, check=True, progress_viewer='terminal'):

766 '''

767 Add remote resource.

768

769 :param source:

770 Remote data access client instance.

771 :type source:

772 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`

773 '''

774

775 self._sources.append(source)

776 source.setup(self, check=check, progress_viewer=progress_viewer)

777

778 def add_fdsn(self, *args, **kwargs):

779 '''

780 Add FDSN site for transparent remote data access.

781

782 Arguments are passed to

783 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.

784 '''

785

786 self.add_source(fdsn.FDSNSource(*args, **kwargs))

787

788 def add_catalog(self, *args, **kwargs):

789 '''

790 Add online catalog for transparent event data access.

791

792 Arguments are passed to

793 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.

794 '''

795

796 self.add_source(catalog.CatalogSource(*args, **kwargs))

797

798 def add_dataset(

799 self, ds, check=True, progress_viewer='terminal',

800 warn_persistent=True):

801

802 '''

803 Read dataset description from file and add its contents.

804

805 :param ds:

806 Path to dataset description file or dataset description object

807 . See :py:mod:`~pyrocko.squirrel.dataset`.

808 :type ds:

809 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`

810

811 :param check:

812 If ``True``, all file modification times are checked to see if

813 cached information has to be updated (slow). If ``False``, only

814 previously unknown files are indexed and cached information is used

815 for known files, regardless of file state (fast, corrresponds to

816 Squirrel's ``--optimistic`` mode). File deletions will go

817 undetected in the latter case.

818 :type check:

819 bool

820 '''

821 if isinstance(ds, str):

822 ds = dataset.read_dataset(ds)

823 path = ds

824 else:

825 path = None

826

827 if warn_persistent and ds.persistent and (

828 not self._persistent or (self._persistent != ds.persistent)):

829

830 logger.warning(

831 'Dataset `persistent` flag ignored. Can not be set on already '

832 'existing Squirrel instance.%s' % (

833 ' Dataset: %s' % path if path else ''))

834

835 ds.setup(self, check=check, progress_viewer=progress_viewer)

836

837 def _get_selection_args(

838 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

839

840 if time is not None:

841 tmin = time

842 tmax = time

843

844 if obj is not None:

845 tmin = tmin if tmin is not None else obj.tmin

846 tmax = tmax if tmax is not None else obj.tmax

847 codes = codes if codes is not None else codes_inflate2(obj.codes)

848

849 if isinstance(codes, str):

850 codes = tuple(codes.split('.'))

851

852 return tmin, tmax, codes

853

854 def _selection_args_to_kwargs(

855 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

856

857 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)

858

859 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):

860

861 tmin_seconds, tmin_offset = model.tsplit(tmin)

862 tmax_seconds, tmax_offset = model.tsplit(tmax)

863 if naiv:

864 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')

865 args.append(tmax_seconds)

866 else:

867 tscale_edges = model.tscale_edges

868 tmin_cond = []

869 for kscale in range(tscale_edges.size + 1):

870 if kscale != tscale_edges.size:

871 tscale = int(tscale_edges[kscale])

872 tmin_cond.append('''

873 (%(db)s.%(nuts)s.kind_id = ?

874 AND %(db)s.%(nuts)s.kscale == ?

875 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)

876 ''')

877 args.extend(

878 (to_kind_id(kind), kscale,

879 tmin_seconds - tscale - 1, tmax_seconds + 1))

880

881 else:

882 tmin_cond.append('''

883 (%(db)s.%(nuts)s.kind_id == ?

884 AND %(db)s.%(nuts)s.kscale == ?

885 AND %(db)s.%(nuts)s.tmin_seconds <= ?)

886 ''')

887

888 args.extend(

889 (to_kind_id(kind), kscale, tmax_seconds + 1))

890 if tmin_cond:

891 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')

892

893 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')

894 args.append(tmin_seconds)

895

896 def iter_nuts(

897 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False,

898 kind_codes_ids=None, path=None):

899

900 '''

901 Iterate over content entities matching given constraints.

902

903 :param kind:

904 Content kind (or kinds) to extract.

905 :type kind:

906 :py:class:`str`, :py:class:`list` of :py:class:`str`

907

908 :param tmin:

909 Start time of query interval.

910 :type tmin:

911 timestamp

912

913 :param tmax:

914 End time of query interval.

915 :type tmax:

916 timestamp

917

918 :param codes:

919 Pattern of content codes to query.

920 :type codes:

921 :py:class:`tuple` of :py:class:`str`

922

923 :param naiv:

924 Bypass time span lookup through indices (slow, for testing).

925 :type naiv:

926 :py:class:`bool`

927

928 :param kind_codes_ids:

929 Kind-codes IDs of contents to be retrieved (internal use).

930 :type kind_codes_ids:

931 :py:class:`list` of :py:class:`str`

932

933 :yields:

934 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the

935 intersecting content.

936

937 :complexity:

938 O(log N) for the time selection part due to heavy use of database

939 indices.

940

941 Query time span is treated as a half-open interval ``[tmin, tmax)``.

942 However, if ``tmin`` equals ``tmax``, the edge logics are modified to

943 closed-interval so that content intersecting with the time instant ``t

944 = tmin = tmax`` is returned (otherwise nothing would be returned as

945 ``[t, t)`` never matches anything).

946

947 Time spans of content entities to be matched are also treated as half

948 open intervals, e.g. content span ``[0, 1)`` is matched by query span

949 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are

950 modified to closed-interval when the content time span is an empty

951 interval, i.e. to indicate a time instant. E.g. time instant 0 is

952 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.

953 '''

954

955 if not isinstance(kind, str):

956 if kind is None:

957 kind = model.g_content_kinds

958 for kind_ in kind:

959 for nut in self.iter_nuts(kind_, tmin, tmax, codes):

960 yield nut

961

962 return

963

964 cond = []

965 args = []

966 if tmin is not None or tmax is not None:

967 assert kind is not None

968 if tmin is None:

969 tmin = self.get_time_span()[0]

970 if tmax is None:

971 tmax = self.get_time_span()[1] + 1.0

972

973 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)

974

975 elif kind is not None:

976 cond.append('kind_codes.kind_id == ?')

977 args.append(to_kind_id(kind))

978

979 if codes is not None:

980 pats = codes_patterns_for_kind(kind, codes)

981 if pats:

982 cond.append(

983 ' ( %s ) ' % ' OR '.join(

984 ('kind_codes.codes GLOB ?',) * len(pats)))

985 args.extend(separator.join(pat) for pat in pats)

986

987 if kind_codes_ids is not None:

988 cond.append(

989 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(

990 '?'*len(kind_codes_ids)))

991

992 args.extend(kind_codes_ids)

993

994 db = self.get_database()

995 if path is not None:

996 cond.append('files.path == ?')

997 args.append(db.relpath(abspath(path)))

998

999 sql = ('''

1000 SELECT

1001 files.path,

1002 files.format,

1003 files.mtime,

1004 files.size,

1005 %(db)s.%(nuts)s.file_segment,

1006 %(db)s.%(nuts)s.file_element,

1007 kind_codes.kind_id,

1008 kind_codes.codes,

1009 %(db)s.%(nuts)s.tmin_seconds,

1010 %(db)s.%(nuts)s.tmin_offset,

1011 %(db)s.%(nuts)s.tmax_seconds,

1012 %(db)s.%(nuts)s.tmax_offset,

1013 kind_codes.deltat

1014 FROM files

1015 INNER JOIN %(db)s.%(nuts)s

1016 ON files.file_id == %(db)s.%(nuts)s.file_id

1017 INNER JOIN kind_codes

1018 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id

1019 ''')

1020

1021 if cond:

1022 sql += ''' WHERE ''' + ' AND '.join(cond)

1023

1024 sql = self._sql(sql)

1025 if tmin is None and tmax is None:

1026 for row in self._conn.execute(sql, args):

1027 row = (db.abspath(row[0]),) + row[1:]

1028 nut = model.Nut(values_nocheck=row)

1029 yield nut

1030 else:

1031 assert tmin is not None and tmax is not None

1032 if tmin == tmax:

1033 for row in self._conn.execute(sql, args):

1034 row = (db.abspath(row[0]),) + row[1:]

1035 nut = model.Nut(values_nocheck=row)

1036 if (nut.tmin <= tmin < nut.tmax) \

1037 or (nut.tmin == nut.tmax and tmin == nut.tmin):

1038

1039 yield nut

1040 else:

1041 for row in self._conn.execute(sql, args):

1042 row = (db.abspath(row[0]),) + row[1:]

1043 nut = model.Nut(values_nocheck=row)

1044 if (tmin < nut.tmax and nut.tmin < tmax) \

1045 or (nut.tmin == nut.tmax

1046 and tmin <= nut.tmin < tmax):

1047

1048 yield nut

1049

1050 def get_nuts(self, *args, **kwargs):

1051 '''

1052 Get content entities matching given constraints.

1053

1054 Like :py:meth:`iter_nuts` but returns results as a list.

1055 '''

1056

1057 return list(self.iter_nuts(*args, **kwargs))

1058

1059 def _split_nuts(

1060 self, kind, tmin=None, tmax=None, codes=None, path=None):

1061

1062 tmin_seconds, tmin_offset = model.tsplit(tmin)

1063 tmax_seconds, tmax_offset = model.tsplit(tmax)

1064

1065 names_main_nuts = dict(self._names)

1066 names_main_nuts.update(db='main', nuts='nuts')

1067

1068 db = self.get_database()

1069

1070 def main_nuts(s):

1071 return s % names_main_nuts

1072

1073 with self.transaction() as cursor:

1074 # modify selection and main

1075 for sql_subst in [

1076 self._sql, main_nuts]:

1077

1078 cond = []

1079 args = []

1080

1081 self._timerange_sql(tmin, tmax, kind, cond, args, False)

1082

1083 if codes is not None:

1084 pats = codes_patterns_for_kind(kind, codes)

1085 if pats:

1086 cond.append(

1087 ' ( %s ) ' % ' OR '.join(

1088 ('kind_codes.codes GLOB ?',) * len(pats)))

1089 args.extend(separator.join(pat) for pat in pats)

1090

1091 if path is not None:

1092 cond.append('files.path == ?')

1093 args.append(db.relpath(abspath(path)))

1094

1095 sql = sql_subst('''

1096 SELECT

1097 %(db)s.%(nuts)s.nut_id,

1098 %(db)s.%(nuts)s.tmin_seconds,

1099 %(db)s.%(nuts)s.tmin_offset,

1100 %(db)s.%(nuts)s.tmax_seconds,

1101 %(db)s.%(nuts)s.tmax_offset,

1102 kind_codes.deltat

1103 FROM files

1104 INNER JOIN %(db)s.%(nuts)s

1105 ON files.file_id == %(db)s.%(nuts)s.file_id

1106 INNER JOIN kind_codes

1107 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id

1108 WHERE ''' + ' AND '.join(cond)) # noqa

1109

1110 insert = []

1111 delete = []

1112 for row in cursor.execute(sql, args):

1113 nut_id, nut_tmin_seconds, nut_tmin_offset, \

1114 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row

1115

1116 nut_tmin = model.tjoin(

1117 nut_tmin_seconds, nut_tmin_offset)

1118 nut_tmax = model.tjoin(

1119 nut_tmax_seconds, nut_tmax_offset)

1120

1121 if nut_tmin < tmax and tmin < nut_tmax:

1122 if nut_tmin < tmin:

1123 insert.append((

1124 nut_tmin_seconds, nut_tmin_offset,

1125 tmin_seconds, tmin_offset,

1126 model.tscale_to_kscale(

1127 tmin_seconds - nut_tmin_seconds),

1128 nut_id))

1129

1130 if tmax < nut_tmax:

1131 insert.append((

1132 tmax_seconds, tmax_offset,

1133 nut_tmax_seconds, nut_tmax_offset,

1134 model.tscale_to_kscale(

1135 nut_tmax_seconds - tmax_seconds),

1136 nut_id))

1137

1138 delete.append((nut_id,))

1139

1140 sql_add = '''

1141 INSERT INTO %(db)s.%(nuts)s (

1142 file_id, file_segment, file_element, kind_id,

1143 kind_codes_id, tmin_seconds, tmin_offset,

1144 tmax_seconds, tmax_offset, kscale )

1145 SELECT

1146 file_id, file_segment, file_element,

1147 kind_id, kind_codes_id, ?, ?, ?, ?, ?

1148 FROM %(db)s.%(nuts)s

1149 WHERE nut_id == ?

1150 '''

1151 cursor.executemany(sql_subst(sql_add), insert)

1152

1153 sql_delete = '''

1154 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?

1155 '''

1156 cursor.executemany(sql_subst(sql_delete), delete)

1157

1158 def get_time_span(self, kinds=None):

1159 '''

1160 Get time interval over all content in selection.

1161

1162 :complexity:

1163 O(1), independent of the number of nuts.

1164

1165 :returns: (tmin, tmax)

1166 '''

1167

1168 sql_min = self._sql('''

1169 SELECT MIN(tmin_seconds), MIN(tmin_offset)

1170 FROM %(db)s.%(nuts)s

1171 WHERE kind_id == ?

1172 AND tmin_seconds == (

1173 SELECT MIN(tmin_seconds)

1174 FROM %(db)s.%(nuts)s

1175 WHERE kind_id == ?)

1176 ''')

1177

1178 sql_max = self._sql('''

1179 SELECT MAX(tmax_seconds), MAX(tmax_offset)

1180 FROM %(db)s.%(nuts)s

1181 WHERE kind_id == ?

1182 AND tmax_seconds == (

1183 SELECT MAX(tmax_seconds)

1184 FROM %(db)s.%(nuts)s

1185 WHERE kind_id == ?)

1186 ''')

1187

1188 gtmin = None

1189 gtmax = None

1190

1191 if isinstance(kinds, str):

1192 kinds = [kinds]

1193

1194 if kinds is None:

1195 kind_ids = model.g_content_kind_ids

1196 else:

1197 kind_ids = model.to_kind_ids(kinds)

1198

1199 for kind_id in kind_ids:

1200 for tmin_seconds, tmin_offset in self._conn.execute(

1201 sql_min, (kind_id, kind_id)):

1202 tmin = model.tjoin(tmin_seconds, tmin_offset)

1203 if tmin is not None and (gtmin is None or tmin < gtmin):

1204 gtmin = tmin

1205

1206 for (tmax_seconds, tmax_offset) in self._conn.execute(

1207 sql_max, (kind_id, kind_id)):

1208 tmax = model.tjoin(tmax_seconds, tmax_offset)

1209 if tmax is not None and (gtmax is None or tmax > gtmax):

1210 gtmax = tmax

1211

1212 return gtmin, gtmax

1213

1214 def get_deltat_span(self, kind):

1215 '''

1216 Get min and max sampling interval of all content of given kind.

1217

1218 :param kind:

1219 Content kind

1220 :type kind:

1221 str

1222

1223 :returns: (deltat_min, deltat_max)

1224 '''

1225

1226 deltats = [

1227 deltat for deltat in self.get_deltats(kind)

1228 if deltat is not None]

1229

1230 if deltats:

1231 return min(deltats), max(deltats)

1232 else:

1233 return None, None

1234

1235 def iter_kinds(self, codes=None):

1236 '''

1237 Iterate over content types available in selection.

1238

1239 :param codes:

1240 If given, get kinds only for selected codes identifier.

1241 :type codes:

1242 :py:class:`tuple` of :py:class:`str`

1243

1244 :yields:

1245 Available content kinds as :py:class:`str`.

1246

1247 :complexity:

1248 O(1), independent of number of nuts.

1249 '''

1250

1251 return self._database._iter_kinds(

1252 codes=codes,

1253 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1254

1255 def iter_deltats(self, kind=None):

1256 '''

1257 Iterate over sampling intervals available in selection.

1258

1259 :param kind:

1260 If given, get sampling intervals only for a given content type.

1261 :type kind:

1262 str

1263

1264 :yields:

1265 :py:class:`float` values.

1266

1267 :complexity:

1268 O(1), independent of number of nuts.

1269 '''

1270 return self._database._iter_deltats(

1271 kind=kind,

1272 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1273

1274 def iter_codes(self, kind=None):

1275 '''

1276 Iterate over content identifier code sequences available in selection.

1277

1278 :param kind:

1279 If given, get codes only for a given content type.

1280 :type kind:

1281 str

1282

1283 :yields:

1284 :py:class:`tuple` of :py:class:`str`

1285

1286 :complexity:

1287 O(1), independent of number of nuts.

1288 '''

1289 return self._database._iter_codes(

1290 kind=kind,

1291 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1292

1293 def iter_counts(self, kind=None):

1294 '''

1295 Iterate over number of occurrences of any (kind, codes) combination.

1296

1297 :param kind:

1298 If given, get counts only for selected content type.

1299 :type kind:

1300 str

1301

1302 :yields:

1303 Tuples of the form ``((kind, codes), count)``.

1304

1305 :complexity:

1306 O(1), independent of number of nuts.

1307 '''

1308 return self._database._iter_counts(

1309 kind=kind,

1310 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)

1311

1312 def get_kinds(self, codes=None):

1313 '''

1314 Get content types available in selection.

1315

1316 :param codes:

1317 If given, get kinds only for selected codes identifier.

1318 :type codes:

1319 :py:class:`tuple` of :py:class:`str`

1320

1321 :returns:

1322 Sorted list of available content types.

1323

1324 :complexity:

1325 O(1), independent of number of nuts.

1326

1327 '''

1328 return sorted(list(self.iter_kinds(codes=codes)))

1329

1330 def get_deltats(self, kind=None):

1331 '''

1332 Get sampling intervals available in selection.

1333

1334 :param kind:

1335 If given, get codes only for selected content type.

1336 :type kind:

1337 str

1338

1339 :complexity:

1340 O(1), independent of number of nuts.

1341

1342 :returns: sorted list of available sampling intervals

1343 '''

1344 return sorted(list(self.iter_deltats(kind=kind)))

1345

1346 def get_codes(self, kind=None):

1347 '''

1348 Get identifier code sequences available in selection.

1349

1350 :param kind:

1351 If given, get codes only for selected content type.

1352 :type kind:

1353 str

1354

1355 :complexity:

1356 O(1), independent of number of nuts.

1357

1358 :returns: sorted list of available codes as tuples of strings

1359 '''

1360 return sorted(list(self.iter_codes(kind=kind)))

1361

1362 def get_counts(self, kind=None):

1363 '''

1364 Get number of occurrences of any (kind, codes) combination.

1365

1366 :param kind:

1367 If given, get codes only for selected content type.

1368 :type kind:

1369 str

1370

1371 :complexity:

1372 O(1), independent of number of nuts.

1373

1374 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``

1375 if kind is not ``None``

1376 '''

1377 d = {}

1378 for (k, codes, deltat), count in self.iter_counts():

1379 if k not in d:

1380 v = d[k] = {}

1381 else:

1382 v = d[k]

1383

1384 if codes not in v:

1385 v[codes] = 0

1386

1387 v[codes] += count

1388

1389 if kind is not None:

1390 return d[kind]

1391 else:

1392 return d

1393

1394 def glob_codes(self, kind, codes_list):

1395 '''

1396 Find codes matching given patterns.

1397

1398 :param kind:

1399 Content kind to be queried.

1400 :type kind:

1401 str

1402

1403 :param codes_list:

1404 List of code patterns to query. If not given or empty, an empty

1405 list is returned.

1406 :type codes_list:

1407 :py:class:`list` of :py:class:`tuple` of :py:class:`str`

1408

1409 :returns:

1410 List of matches of the form ``[kind_codes_id, codes, deltat]``.

1411 '''

1412

1413 args = [to_kind_id(kind)]

1414 pats = []

1415 for codes in codes_list:

1416 pats.extend(codes_patterns_for_kind(kind, codes))

1417

1418 codes_cond = ' ( %s ) ' % ' OR '.join(

1419 ('kind_codes.codes GLOB ?',) * len(pats))

1420

1421 args.extend(separator.join(pat) for pat in pats)

1422

1423 sql = self._sql('''

1424 SELECT kind_codes_id, codes, deltat FROM kind_codes

1425 WHERE

1426 kind_id == ?

1427 AND ''' + codes_cond)

1428

1429 return list(map(list, self._conn.execute(sql, args)))

1430

1431 def update(self, constraint=None, **kwargs):

1432 '''

1433 Update or partially update channel and event inventories.

1434

1435 :param constraint:

1436 Selection of times or areas to be brought up to date.

1437 :type constraint:

1438 :py:class:`~pyrocko.squirrel.client.Constraint`

1439

1440 :param \\*\\*kwargs:

1441 Shortcut for setting ``constraint=Constraint(**kwargs)``.

1442

1443 This function triggers all attached remote sources, to check for

1444 updates in the meta-data. The sources will only submit queries when

1445 their expiration date has passed, or if the selection spans into

1446 previously unseen times or areas.

1447 '''

1448

1449 if constraint is None:

1450 constraint = client.Constraint(**kwargs)

1451

1452 for source in self._sources:

1453 source.update_channel_inventory(self, constraint)

1454 source.update_event_inventory(self, constraint)

1455

1456 def update_waveform_promises(self, constraint=None, **kwargs):

1457 '''

1458 Permit downloading of remote waveforms.

1459

1460 :param constraint:

1461 Remote waveforms compatible with the given constraint are enabled

1462 for download.

1463 :type constraint:

1464 :py:class:`~pyrocko.squirrel.client.Constraint`

1465

1466 :param \\*\\*kwargs:

1467 Shortcut for setting ``constraint=Constraint(**kwargs)``.

1468

1469 Calling this method permits Squirrel to download waveforms from remote

1470 sources when processing subsequent waveform requests. This works by

1471 inserting so called waveform promises into the database. It will look

1472 into the available channels for each remote source and create a promise

1473 for each channel compatible with the given constraint. If the promise

1474 then matches in a waveform request, Squirrel tries to download the

1475 waveform. If the download is successful, the downloaded waveform is

1476 added to the Squirrel and the promise is deleted. If the download

1477 fails, the promise is kept if the reason of failure looks like being

1478 temporary, e.g. because of a network failure. If the cause of failure

1479 however seems to be permanent, the promise is deleted so that no

1480 further attempts are made to download a waveform which might not be

1481 available from that server at all. To force re-scheduling after a

1482 permanent failure, call :py:meth:`update_waveform_promises`

1483 yet another time.

1484 '''

1485

1486 if constraint is None:

1487 constraint = client.Constraint(**kwargs)

1488

1489 # TODO

1490 print('contraint ignored atm')

1491

1492 for source in self._sources:

1493 source.update_waveform_promises(self)

1494

1495 def update_responses(self, constraint=None, **kwargs):

1496 # TODO

1497 if constraint is None:

1498 constraint = client.Constraint(**kwargs)

1499

1500 print('contraint ignored atm')

1501 for source in self._sources:

1502 source.update_response_inventory(self, constraint)

1503

1504 def get_nfiles(self):

1505 '''

1506 Get number of files in selection.

1507 '''

1508

1509 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')

1510 for row in self._conn.execute(sql):

1511 return row[0]

1512

1513 def get_nnuts(self):

1514 '''

1515 Get number of nuts in selection.

1516 '''

1517

1518 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')

1519 for row in self._conn.execute(sql):

1520 return row[0]

1521

1522 def get_total_size(self):

1523 '''

1524 Get aggregated file size available in selection.

1525 '''

1526

1527 sql = self._sql('''

1528 SELECT SUM(files.size) FROM %(db)s.%(file_states)s

1529 INNER JOIN files

1530 ON %(db)s.%(file_states)s.file_id = files.file_id

1531 ''')

1532

1533 for row in self._conn.execute(sql):

1534 return row[0] or 0

1535

1536 def get_stats(self):

1537 '''

1538 Get statistics on contents available through this selection.

1539 '''

1540

1541 kinds = self.get_kinds()

1542 time_spans = {}

1543 for kind in kinds:

1544 time_spans[kind] = self.get_time_span([kind])

1545

1546 return SquirrelStats(

1547 nfiles=self.get_nfiles(),

1548 nnuts=self.get_nnuts(),

1549 kinds=kinds,

1550 codes=self.get_codes(),

1551 total_size=self.get_total_size(),

1552 counts=self.get_counts(),

1553 time_spans=time_spans,

1554 sources=[s.describe() for s in self._sources],

1555 operators=[op.describe() for op in self._operators])

1556

1557 def get_content(

1558 self,

1559 nut,

1560 cache_id='default',

1561 accessor_id='default',

1562 show_progress=False):

1563

1564 '''

1565 Get and possibly load full content for a given index entry from file.

1566

1567 Loads the actual content objects (channel, station, waveform, ...) from

1568 file. For efficiency sibling content (all stuff in the same file

1569 segment) will also be loaded as a side effect. The loaded contents are

1570 cached in the Squirrel object.

1571 '''

1572

1573 content_cache = self._content_caches[cache_id]

1574 if not content_cache.has(nut):

1575

1576 for nut_loaded in io.iload(

1577 nut.file_path,

1578 segment=nut.file_segment,

1579 format=nut.file_format,

1580 database=self._database,

1581 show_progress=show_progress):

1582

1583 content_cache.put(nut_loaded)

1584

1585 try:

1586 return content_cache.get(nut, accessor_id)

1587 except KeyError:

1588 raise error.NotAvailable(

1589 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)

1590

1591 def advance_accessor(self, accessor_id, cache_id=None):

1592 '''

1593 Notify memory caches about consumer moving to a new data batch.

1594

1595 :param accessor_id:

1596 Name of accessing consumer to be advanced.

1597 :type accessor_id:

1598 str

1599

1600 :param cache_id:

1601 Name of cache to for which the accessor should be advanced. By

1602 default the named accessor is advanced in all registered caches.

1603 By default, two caches named ``'default'`` and ``'waveforms'`` are

1604 available.

1605 :type cache_id:

1606 str

1607

1608 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how

1609 Squirrel's memory caching works and can be tuned. Default behaviour is

1610 to release data when it has not been used in the latest data

1611 window/batch. If the accessor is never advanced, data is cached

1612 indefinitely - which is often desired e.g. for station meta-data.

1613 Methods for consecutive data traversal, like

1614 :py:meth:`chopper_waveforms` automatically advance and clear

1615 their accessor.

1616 '''

1617 for cache_ in (

1618 self._content_caches.keys()

1619 if cache_id is None

1620 else [cache_id]):

1621

1622 self._content_caches[cache_].advance_accessor(accessor_id)

1623

1624 def clear_accessor(self, accessor_id, cache_id=None):

1625 '''

1626 Notify memory caches about a consumer having finished.

1627

1628 :param accessor_id:

1629 Name of accessor to be cleared.

1630 :type accessor_id:

1631 str

1632

1633 :param cache_id:

1634 Name of cache to for which the accessor should be cleared. By

1635 default the named accessor is cleared from all registered caches.

1636 By default, two caches named ``'default'`` and ``'waveforms'`` are

1637 available.

1638 :type cache_id:

1639 str

1640

1641 Calling this method clears all references to cache entries held by the

1642 named accessor. Cache entries are then freed if not referenced by any

1643 other accessor.

1644 '''

1645

1646 for cache_ in (

1647 self._content_caches.keys()

1648 if cache_id is None

1649 else [cache_id]):

1650

1651 self._content_caches[cache_].clear_accessor(accessor_id)

1652

1653 def _check_duplicates(self, nuts):

1654 d = defaultdict(list)

1655 for nut in nuts:

1656 d[nut.codes].append(nut)

1657

1658 for codes, group in d.items():

1659 if len(group) > 1:

1660 logger.warning(

1661 'Multiple entries matching codes: %s'

1662 % '.'.join(codes.split(separator)))

1663

1664 @filldocs

1665 def get_stations(

1666 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

1667 model='squirrel'):

1668

1669 '''

1670 Get stations matching given constraints.

1671

1672 %(query_args)s

1673

1674 :param model:

1675 Select object model for returned values: ``'squirrel'`` to get

1676 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station

1677 objects with channel information attached.

1678 :type model:

1679 str

1680

1681 :returns:

1682 List of :py:class:`pyrocko.squirrel.Station

1683 <pyrocko.squirrel.model.Station>` objects by default or list of

1684 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`

1685 objects if ``model='pyrocko'`` is requested.

1686

1687 See :py:meth:`iter_nuts` for details on time span matching.

1688 '''

1689

1690 if model == 'pyrocko':

1691 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes)

1692 elif model == 'squirrel':

1693 args = self._get_selection_args(obj, tmin, tmax, time, codes)

1694 nuts = sorted(

1695 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)

1696 self._check_duplicates(nuts)

1697 return [self.get_content(nut) for nut in nuts]

1698 else:

1699 raise ValueError('Invalid station model: %s' % model)

1700

1701 @filldocs

1702 def get_channels(

1703 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1704

1705 '''

1706 Get channels matching given constraints.

1707

1708 %(query_args)s

1709

1710 :returns:

1711 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.

1712

1713 See :py:meth:`iter_nuts` for details on time span matching.

1714 '''

1715

1716 args = self._get_selection_args(obj, tmin, tmax, time, codes)

1717 nuts = sorted(

1718 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)

1719 self._check_duplicates(nuts)

1720 return [self.get_content(nut) for nut in nuts]

1721

1722 @filldocs

1723 def get_sensors(

1724 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1725

1726 '''

1727 Get sensors matching given constraints.

1728

1729 %(query_args)s

1730

1731 :returns:

1732 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.

1733

1734 See :py:meth:`iter_nuts` for details on time span matching.

1735 '''

1736

1737 tmin, tmax, codes = self._get_selection_args(

1738 obj, tmin, tmax, time, codes)

1739

1740 if codes is not None:

1741 if isinstance(codes, str):

1742 codes = codes.split('.')

1743 codes = tuple(codes_inflate(codes))

1744 if codes[4] != '*':

1745 codes = codes[:4] + (codes[4][:-1] + '?',) + codes[5:]

1746

1747 nuts = sorted(

1748 self.iter_nuts(

1749 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)

1750 self._check_duplicates(nuts)

1751 return model.Sensor.from_channels(

1752 self.get_content(nut) for nut in nuts)

1753

1754 @filldocs

1755 def get_responses(

1756 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1757

1758 '''

1759 Get instrument responses matching given constraints.

1760

1761 %(query_args)s

1762

1763 :returns:

1764 List of :py:class:`~pyrocko.squirrel.model.Response` objects.

1765

1766 See :py:meth:`iter_nuts` for details on time span matching.

1767 '''

1768

1769 args = self._get_selection_args(obj, tmin, tmax, time, codes)

1770 nuts = sorted(

1771 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)

1772 self._check_duplicates(nuts)

1773 return [self.get_content(nut) for nut in nuts]

1774

1775 @filldocs

1776 def get_response(

1777 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1778

1779 '''

1780 Get instrument response matching given constraints.

1781

1782 %(query_args)s

1783

1784 :returns:

1785 :py:class:`~pyrocko.squirrel.model.Response` object.

1786

1787 Same as :py:meth:`get_responses` but returning exactly one response.

1788 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more

1789 than one is available.

1790

1791 See :py:meth:`iter_nuts` for details on time span matching.

1792 '''

1793

1794 responses = self.get_responses(obj, tmin, tmax, time, codes)

1795 if len(responses) == 0:

1796 raise error.NotAvailable(

1797 'No instrument response available.')

1798 elif len(responses) > 1:

1799 raise error.NotAvailable(

1800 'Multiple instrument responses matching given constraints.')

1801

1802 return responses[0]

1803

1804 @filldocs

1805 def get_events(

1806 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1807

1808 '''

1809 Get events matching given constraints.

1810

1811 %(query_args)s

1812

1813 :returns:

1814 List of :py:class:`~pyrocko.model.event.Event` objects.

1815

1816 See :py:meth:`iter_nuts` for details on time span matching.

1817 '''

1818

1819 args = self._get_selection_args(obj, tmin, tmax, time, codes)

1820 nuts = sorted(

1821 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)

1822 self._check_duplicates(nuts)

1823 return [self.get_content(nut) for nut in nuts]

1824

1825 def _redeem_promises(self, *args):

1826

1827 tmin, tmax, _ = args

1828

1829 waveforms = list(self.iter_nuts('waveform', *args))

1830 promises = list(self.iter_nuts('waveform_promise', *args))

1831

1832 codes_to_avail = defaultdict(list)

1833 for nut in waveforms:

1834 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax+nut.deltat))

1835

1836 def tts(x):

1837 if isinstance(x, tuple):

1838 return tuple(tts(e) for e in x)

1839 elif isinstance(x, list):

1840 return list(tts(e) for e in x)

1841 else:

1842 return util.time_to_str(x)

1843

1844 orders = []

1845 for promise in promises:

1846 waveforms_avail = codes_to_avail[promise.codes]

1847 for block_tmin, block_tmax in blocks(

1848 max(tmin, promise.tmin),

1849 min(tmax, promise.tmax),

1850 promise.deltat):

1851

1852 orders.append(

1853 WaveformOrder(

1854 source_id=promise.file_path,

1855 codes=tuple(promise.codes.split(separator)),

1856 tmin=block_tmin,

1857 tmax=block_tmax,

1858 deltat=promise.deltat,

1859 gaps=gaps(waveforms_avail, block_tmin, block_tmax)))

1860

1861 orders_noop, orders = lpick(lambda order: order.gaps, orders)

1862

1863 order_keys_noop = set(order_key(order) for order in orders_noop)

1864 if len(order_keys_noop) != 0 or len(orders_noop) != 0:

1865 logger.info(

1866 'Waveform orders already satisified with cached/local data: '

1867 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))

1868

1869 source_ids = []

1870 sources = {}

1871 for source in self._sources:

1872 if isinstance(source, fdsn.FDSNSource):

1873 source_ids.append(source._source_id)

1874 sources[source._source_id] = source

1875

1876 source_priority = dict(

1877 (source_id, i) for (i, source_id) in enumerate(source_ids))

1878

1879 order_groups = defaultdict(list)

1880 for order in orders:

1881 order_groups[order_key(order)].append(order)

1882

1883 for k, order_group in order_groups.items():

1884 order_group.sort(

1885 key=lambda order: source_priority[order.source_id])

1886

1887 n_order_groups = len(order_groups)

1888

1889 if len(order_groups) != 0 or len(orders) != 0:

1890 logger.info(

1891 'Waveform orders standing for download: %i (%i)'

1892 % (len(order_groups), len(orders)))

1893

1894 task = make_task('Waveform orders processed', n_order_groups)

1895 else:

1896 task = None

1897

1898 def split_promise(order):

1899 self._split_nuts(

1900 'waveform_promise',

1901 order.tmin, order.tmax,

1902 codes=order.codes,

1903 path=order.source_id)

1904

1905 def release_order_group(order):

1906 okey = order_key(order)

1907 for followup in order_groups[okey]:

1908 split_promise(followup)

1909

1910 del order_groups[okey]

1911

1912 if task:

1913 task.update(n_order_groups - len(order_groups))

1914

1915 def noop(order):

1916 pass

1917

1918 def success(order):

1919 release_order_group(order)

1920 split_promise(order)

1921

1922 def batch_add(paths):

1923 self.add(paths)

1924

1925 calls = queue.Queue()

1926

1927 def enqueue(f):

1928 def wrapper(*args):

1929 calls.put((f, args))

1930

1931 return wrapper

1932

1933 for order in orders_noop:

1934 split_promise(order)

1935

1936 while order_groups:

1937

1938 orders_now = []

1939 empty = []

1940 for k, order_group in order_groups.items():

1941 try:

1942 orders_now.append(order_group.pop(0))

1943 except IndexError:

1944 empty.append(k)

1945

1946 for k in empty:

1947 del order_groups[k]

1948

1949 by_source_id = defaultdict(list)

1950 for order in orders_now:

1951 by_source_id[order.source_id].append(order)

1952

1953 threads = []

1954 for source_id in by_source_id:

1955 def download():

1956 try:

1957 sources[source_id].download_waveforms(

1958 by_source_id[source_id],

1959 success=enqueue(success),

1960 error_permanent=enqueue(split_promise),

1961 error_temporary=noop,

1962 batch_add=enqueue(batch_add))

1963

1964 finally:

1965 calls.put(None)

1966

1967 thread = threading.Thread(target=download)

1968 thread.start()

1969 threads.append(thread)

1970

1971 ndone = 0

1972 while ndone < len(threads):

1973 ret = calls.get()

1974 if ret is None:

1975 ndone += 1

1976 else:

1977 ret[0](*ret[1])

1978

1979 for thread in threads:

1980 thread.join()

1981

1982 if task:

1983 task.update(n_order_groups - len(order_groups))

1984

1985 if task:

1986 task.done()

1987

1988 @filldocs

1989 def get_waveform_nuts(

1990 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

1991

1992 '''

1993 Get waveform content entities matching given constraints.

1994

1995 %(query_args)s

1996

1997 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally

1998 resolves matching waveform promises (downloads waveforms from remote

1999 sources).

2000

2001 See :py:meth:`iter_nuts` for details on time span matching.

2002 '''

2003

2004 args = self._get_selection_args(obj, tmin, tmax, time, codes)

2005 self._redeem_promises(*args)

2006 return sorted(

2007 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)

2008

2009 @filldocs

2010 def get_waveforms(

2011 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2012 uncut=False, want_incomplete=True, degap=True, maxgap=5,

2013 maxlap=None, snap=None, include_last=False, load_data=True,

2014 accessor_id='default', operator_params=None):

2015

2016 '''

2017 Get waveforms matching given constraints.

2018

2019 %(query_args)s

2020

2021 :param uncut:

2022 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]

2023 and to disable degapping/deoverlapping. Returns untouched traces as

2024 they are read from file segment. File segments are always read in

2025 their entirety.

2026 :type uncut:

2027 bool

2028

2029 :param want_incomplete:

2030 If ``True``, gappy/incomplete traces are included in the result.

2031 :type want_incomplete:

2032 bool

2033

2034 :param degap:

2035 If ``True``, connect traces and remove gaps and overlaps.

2036 :type degap:

2037 bool

2038

2039 :param maxgap:

2040 Maximum gap size in samples which is filled with interpolated

2041 samples when ``degap`` is ``True``.

2042 :type maxgap:

2043 int

2044

2045 :param maxlap:

2046 Maximum overlap size in samples which is removed when ``degap`` is

2047 ``True``

2048 :type maxlap:

2049 int

2050

2051 :param snap:

2052 Rounding functions used when computing sample index from time

2053 instance, for trace start and trace end, respectively. By default,

2054 ``(round, round)`` is used.

2055 :type snap:

2056 tuple of 2 callables

2057

2058 :param include_last:

2059 If ``True``, add one more sample to the returned traces (the sample

2060 which would be the first sample of a query with ``tmin`` set to the

2061 current value of ``tmax``).

2062 :type include_last:

2063 bool

2064

2065 :param load_data:

2066 If ``True``, waveform data samples are read from files (or cache).

2067 If ``False``, meta-information-only traces are returned (dummy

2068 traces with no data samples).

2069 :type load_data:

2070 bool

2071

2072 :param accessor_id:

2073 Name of consumer on who's behalf data is accessed. Used in cache

2074 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key

2075 to distinguish different points of extraction for the decision of

2076 when to release cached waveform data. Should be used when data is

2077 alternately extracted from more than one region / selection.

2078 :type accessor_id:

2079 str

2080

2081 See :py:meth:`iter_nuts` for details on time span matching.

2082

2083 Loaded data is kept in memory (at least) until

2084 :py:meth:`clear_accessor` has been called or

2085 :py:meth:`advance_accessor` has been called two consecutive times

2086 without data being accessed between the two calls (by this accessor).

2087 Data may still be further kept in the memory cache if held alive by

2088 consumers with a different ``accessor_id``.

2089 '''

2090

2091 tmin, tmax, codes = self._get_selection_args(

2092 obj, tmin, tmax, time, codes)

2093

2094 self_tmin, self_tmax = self.get_time_span(

2095 ['waveform', 'waveform_promise'])

2096

2097 if None in (self_tmin, self_tmax):

2098 logger.warning(

2099 'No waveforms available.')

2100 return []

2101

2102 tmin = tmin if tmin is not None else self_tmin

2103 tmax = tmax if tmax is not None else self_tmax

2104

2105 if codes is not None:

2106 operator = self.get_operator(codes)

2107 if operator is not None:

2108 return operator.get_waveforms(

2109 self, codes,

2110 tmin=tmin, tmax=tmax,

2111 uncut=uncut, want_incomplete=want_incomplete, degap=degap,

2112 maxgap=maxgap, maxlap=maxlap, snap=snap,

2113 include_last=include_last, load_data=load_data,

2114 accessor_id=accessor_id, params=operator_params)

2115

2116 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes)

2117

2118 if load_data:

2119 traces = [

2120 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]

2121

2122 else:

2123 traces = [

2124 trace.Trace(**nut.trace_kwargs) for nut in nuts]

2125

2126 if uncut:

2127 return traces

2128

2129 if snap is None:

2130 snap = (round, round)

2131

2132 chopped = []

2133 for tr in traces:

2134 if not load_data and tr.ydata is not None:

2135 tr = tr.copy(data=False)

2136 tr.ydata = None

2137

2138 try:

2139 chopped.append(tr.chop(

2140 tmin, tmax,

2141 inplace=False,

2142 snap=snap,

2143 include_last=include_last))

2144

2145 except trace.NoData:

2146 pass

2147

2148 processed = self._process_chopped(

2149 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)

2150

2151 return processed

2152

2153 @filldocs

2154 def chopper_waveforms(

2155 self, obj=None, tmin=None, tmax=None, time=None, codes=None,

2156 tinc=None, tpad=0.,

2157 want_incomplete=True, snap_window=False,

2158 degap=True, maxgap=5, maxlap=None,

2159 snap=None, include_last=False, load_data=True,

2160 accessor_id=None, clear_accessor=True, operator_params=None):

2161

2162 '''

2163 Iterate window-wise over waveform archive.

2164

2165 %(query_args)s

2166

2167 :param tinc:

2168 Time increment (window shift time) (default uses ``tmax-tmin``)

2169 :type tinc:

2170 timestamp

2171

2172 :param tpad:

2173 Padding time appended on either side of the data window (window

2174 overlap is ``2*tpad``).

2175 :type tpad:

2176 timestamp

2177

2178 :param want_incomplete:

2179 If ``True``, gappy/incomplete traces are included in the result.

2180 :type want_incomplete:

2181 bool

2182

2183 :param snap_window:

2184 If ``True``, start time windows at multiples of tinc with respect

2185 to system time zero.

2186

2187 :param degap:

2188 If ``True``, connect traces and remove gaps and overlaps.

2189 :type degap:

2190 bool

2191

2192 :param maxgap:

2193 Maximum gap size in samples which is filled with interpolated

2194 samples when ``degap`` is ``True``.

2195 :type maxgap:

2196 int

2197

2198 :param maxlap:

2199 Maximum overlap size in samples which is removed when ``degap`` is

2200 ``True``

2201 :type maxlap:

2202 int

2203

2204 :param snap:

2205 Rounding functions used when computing sample index from time

2206 instance, for trace start and trace end, respectively. By default,

2207 ``(round, round)`` is used.

2208 :type snap:

2209 tuple of 2 callables

2210

2211 :param include_last:

2212 If ``True``, add one more sample to the returned traces (the sample

2213 which would be the first sample of a query with ``tmin`` set to the

2214 current value of ``tmax``).

2215 :type include_last:

2216 bool

2217

2218 :param load_data:

2219 If ``True``, waveform data samples are read from files (or cache).

2220 If ``False``, meta-information-only traces are returned (dummy

2221 traces with no data samples).

2222 :type load_data:

2223 bool

2224

2225 :param accessor_id:

2226 Name of consumer on who's behalf data is accessed. Used in cache

2227 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key

2228 to distinguish different points of extraction for the decision of

2229 when to release cached waveform data. Should be used when data is

2230 alternately extracted from more than one region / selection.

2231 :type accessor_id:

2232 str

2233

2234 :param clear_accessor:

2235 If ``True`` (default), :py:meth:`clear_accessor` is called when the

2236 chopper finishes. Set to ``False`` to keep loaded waveforms in

2237 memory when the generator returns.

2238

2239 :yields:

2240 A list of :py:class:`~pyrocko.trace.Trace` objects for every

2241 extracted time window.

2242

2243 See :py:meth:`iter_nuts` for details on time span matching.

2244 '''

2245

2246 tmin, tmax, codes = self._get_selection_args(

2247 obj, tmin, tmax, time, codes)

2248

2249 self_tmin, self_tmax = self.get_time_span(

2250 ['waveform', 'waveform_promise'])

2251

2252 if None in (self_tmin, self_tmax):

2253 logger.warning(

2254 'Content has undefined time span. No waveforms and no '

2255 'waveform promises?')

2256 return

2257

2258 if snap_window and tinc is not None:

2259 tmin = tmin if tmin is not None else self_tmin

2260 tmax = tmax if tmax is not None else self_tmax

2261 tmin = math.floor(tmin / tinc) * tinc

2262 tmax = math.ceil(tmax / tinc) * tinc

2263 else:

2264 tmin = tmin if tmin is not None else self_tmin + tpad

2265 tmax = tmax if tmax is not None else self_tmax - tpad

2266

2267 tinc = tinc if tinc is not None else tmax - tmin

2268

2269 try:

2270 if accessor_id is None:

2271 accessor_id = 'chopper%i' % self._n_choppers_active

2272

2273 self._n_choppers_active += 1

2274

2275 eps = tinc * 1e-6

2276 if tinc != 0.0:

2277 nwin = int(((tmax - eps) - tmin) / tinc) + 1

2278 else:

2279 nwin = 1

2280

2281 for iwin in range(nwin):

2282 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)

2283 chopped = []

2284 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)

2285 eps = tinc*1e-6

2286 if wmin >= tmax-eps:

2287 break

2288

2289 chopped = self.get_waveforms(

2290 tmin=wmin-tpad,

2291 tmax=wmax+tpad,

2292 codes=codes,

2293 snap=snap,

2294 include_last=include_last,

2295 load_data=load_data,

2296 want_incomplete=want_incomplete,

2297 degap=degap,

2298 maxgap=maxgap,

2299 maxlap=maxlap,

2300 accessor_id=accessor_id,

2301 operator_params=operator_params)

2302

2303 self.advance_accessor(accessor_id)

2304

2305 yield Batch(

2306 tmin=wmin,

2307 tmax=wmax,

2308 i=iwin,

2309 n=nwin,

2310 traces=chopped)

2311

2312 iwin += 1

2313

2314 finally:

2315 self._n_choppers_active -= 1

2316 if clear_accessor:

2317 self.clear_accessor(accessor_id, 'waveform')

2318

2319 def _process_chopped(

2320 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):

2321

2322 chopped.sort(key=lambda a: a.full_id)

2323 if degap:

2324 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)

2325

2326 if not want_incomplete:

2327 chopped_weeded = []

2328 for tr in chopped:

2329 emin = tr.tmin - tmin

2330 emax = tr.tmax + tr.deltat - tmax

2331 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):

2332 chopped_weeded.append(tr)

2333

2334 elif degap:

2335 if (0. < emin <= 5. * tr.deltat

2336 and -5. * tr.deltat <= emax < 0.):

2337

2338 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')

2339 chopped_weeded.append(tr)

2340

2341 chopped = chopped_weeded

2342

2343 return chopped

2344

2345 def _get_pyrocko_stations(

2346 self, obj=None, tmin=None, tmax=None, time=None, codes=None):

2347

2348 from pyrocko import model as pmodel

2349

2350 by_nsl = defaultdict(lambda: (list(), list()))

2351 for station in self.get_stations(obj, tmin, tmax, time, codes):

2352 sargs = station._get_pyrocko_station_args()

2353 nsl = sargs[1:4]

2354 by_nsl[nsl][0].append(sargs)

2355

2356 for channel in self.get_channels(obj, tmin, tmax, time, codes):

2357 sargs = channel._get_pyrocko_station_args()

2358 nsl = sargs[1:4]

2359 sargs_list, channels_list = by_nsl[nsl]

2360 sargs_list.append(sargs)

2361 channels_list.append(channel)

2362

2363 pstations = []

2364 nsls = list(by_nsl.keys())

2365 nsls.sort()

2366 for nsl in nsls:

2367 sargs_list, channels_list = by_nsl[nsl]

2368 sargs = util.consistency_merge(sargs_list)

2369

2370 by_c = defaultdict(list)

2371 for ch in channels_list:

2372 by_c[ch.channel].append(ch._get_pyrocko_channel_args())

2373

2374 chas = list(by_c.keys())

2375 chas.sort()

2376 pchannels = []

2377 for cha in chas:

2378 list_of_cargs = by_c[cha]

2379 cargs = util.consistency_merge(list_of_cargs)

2380 pchannels.append(pmodel.Channel(

2381 name=cargs[0],

2382 azimuth=cargs[1],

2383 dip=cargs[2]))

2384

2385 pstations.append(pmodel.Station(

2386 network=sargs[0],

2387 station=sargs[1],

2388 location=sargs[2],

2389 lat=sargs[3],

2390 lon=sargs[4],

2391 elevation=sargs[5],

2392 depth=sargs[6] or 0.0,

2393 channels=pchannels))

2394

2395 return pstations

2396

2397 @property

2398 def pile(self):

2399

2400 '''

2401 Emulates the older :py:class:`pyrocko.pile.Pile` interface.

2402

2403 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,

2404 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods

2405 but uses the fluffy power of the Squirrel under the hood.

2406

2407 This interface can be used as a drop-in replacement for piles which are

2408 used in existing scripts and programs for efficient waveform data

2409 access. The Squirrel-based pile scales better for large datasets. Newer

2410 scripts should use Squirrel's native methods to avoid the emulation

2411 overhead.

2412 '''

2413 from . import pile

2414

2415 if self._pile is None:

2416 self._pile = pile.Pile(self)

2417

2418 return self._pile

2419

2420 def snuffle(self):

2421 '''

2422 Look at dataset in Snuffler.

2423 '''

2424 self.pile.snuffle()

2425

2426 def _gather_codes_keys(self, kind, gather, selector):

2427 return set(

2428 gather(codes)

2429 for codes in self.iter_codes(kind)

2430 if selector is None or selector(codes))

2431

2432 def __str__(self):

2433 return str(self.get_stats())

2434

2435 def get_coverage(

2436 self, kind, tmin=None, tmax=None, codes_list=None, limit=None):

2437

2438 '''

2439 Get coverage information.

2440

2441 Get information about strips of gapless data coverage.

2442

2443 :param kind:

2444 Content kind to be queried.

2445 :type kind:

2446 str

2447

2448 :param tmin:

2449 Start time of query interval.

2450 :type tmin:

2451 timestamp

2452

2453 :param tmax:

2454 End time of query interval.

2455 :type tmax:

2456 timestamp

2457

2458 :param codes_list:

2459 List of code patterns to query. If not given or empty, an empty

2460 list is returned.

2461 :type codes_list:

2462 :py:class:`list` of :py:class:`tuple` of :py:class:`str`

2463

2464 :param limit:

2465 Limit query to return only up to a given maximum number of entries

2466 per matching channel (without setting this option, very gappy data

2467 could cause the query to execute for a very long time).

2468 :type limit:

2469 int

2470

2471 :returns:

2472 List of entries of the form ``(pattern, codes, deltat, tmin, tmax,

2473 data)`` where ``pattern`` is the request code pattern which

2474 yielded this entry, ``codes`` are the matching channel codes,

2475 ``tmin`` and ``tmax`` are the global min and max times for which

2476 data for this channel is available, regardless of any time

2477 restrictions in the query. ``data`` is a list with (up to

2478 ``limit``) change-points of the form ``(time, count)`` where a

2479 ``count`` of zero indicates a data gap, a value of 1 normal data

2480 coverage and higher values indicate duplicate/redundant data.

2481 '''

2482

2483 tmin_seconds, tmin_offset = model.tsplit(tmin)

2484 tmax_seconds, tmax_offset = model.tsplit(tmax)

2485

2486 kdata_all = []

2487 for pattern in codes_list:

2488 kdata = self.glob_codes(kind, [pattern])

2489 for row in kdata:

2490 row[0:0] = [pattern]

2491

2492 kdata_all.extend(kdata)

2493

2494 kind_codes_ids = [x[1] for x in kdata_all]

2495

2496 counts_at_tmin = {}

2497 if tmin is not None:

2498 for nut in self.iter_nuts(

2499 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):

2500

2501 k = nut.codes, nut.deltat

2502 if k not in counts_at_tmin:

2503 counts_at_tmin[k] = 0

2504

2505 counts_at_tmin[k] += 1

2506

2507 coverage = []

2508 for pattern, kind_codes_id, codes, deltat in kdata_all:

2509 entry = [pattern, codes, deltat, None, None, []]

2510 for i, order in [(0, 'ASC'), (1, 'DESC')]:

2511 sql = self._sql('''

2512 SELECT

2513 time_seconds,

2514 time_offset

2515 FROM %(db)s.%(coverage)s

2516 WHERE

2517 kind_codes_id == ?

2518 ORDER BY

2519 kind_codes_id ''' + order + ''',

2520 time_seconds ''' + order + ''',

2521 time_offset ''' + order + '''

2522 LIMIT 1

2523 ''')

2524

2525 for row in self._conn.execute(sql, [kind_codes_id]):

2526 entry[3+i] = model.tjoin(row[0], row[1])

2527

2528 if None in entry[3:5]:

2529 continue

2530

2531 args = [kind_codes_id]

2532

2533 sql_time = ''

2534 if tmin is not None:

2535 # intentionally < because (== tmin) is queried from nuts

2536 sql_time += ' AND ( ? < time_seconds ' \

2537 'OR ( ? == time_seconds AND ? < time_offset ) ) '

2538 args.extend([tmin_seconds, tmin_seconds, tmin_offset])

2539

2540 if tmax is not None:

2541 sql_time += ' AND ( time_seconds < ? ' \

2542 'OR ( ? == time_seconds AND time_offset <= ? ) ) '

2543 args.extend([tmax_seconds, tmax_seconds, tmax_offset])

2544

2545 sql_limit = ''

2546 if limit is not None:

2547 sql_limit = ' LIMIT ?'

2548 args.append(limit)

2549

2550 sql = self._sql('''

2551 SELECT

2552 time_seconds,

2553 time_offset,

2554 step

2555 FROM %(db)s.%(coverage)s

2556 WHERE

2557 kind_codes_id == ?

2558 ''' + sql_time + '''

2559 ORDER BY

2560 kind_codes_id,

2561 time_seconds,

2562 time_offset

2563 ''' + sql_limit)

2564

2565 rows = list(self._conn.execute(sql, args))

2566

2567 if limit is not None and len(rows) == limit:

2568 entry[-1] = None

2569 else:

2570 counts = counts_at_tmin.get((codes, deltat), 0)

2571 tlast = None

2572 if tmin is not None:

2573 entry[-1].append((tmin, counts))

2574 tlast = tmin

2575

2576 for row in rows:

2577 t = model.tjoin(row[0], row[1])

2578 counts += row[2]

2579 entry[-1].append((t, counts))

2580 tlast = t

2581

2582 if tmax is not None and (tlast is None or tlast != tmax):

2583 entry[-1].append((tmax, counts))

2584

2585 coverage.append(entry)

2586

2587 return coverage

2588

2589 def add_operator(self, op):

2590 self._operators.append(op)

2591

2592 def update_operator_mappings(self):

2593 available = [

2594 separator.join(codes)

2595 for codes in self.get_codes(kind=('channel'))]

2596

2597 for operator in self._operators:

2598 operator.update_mappings(available, self._operator_registry)

2599

2600 def iter_operator_mappings(self):

2601 for operator in self._operators:

2602 for in_codes, out_codes in operator.iter_mappings():

2603 yield operator, in_codes, out_codes

2604

2605 def get_operator_mappings(self):

2606 return list(self.iter_operator_mappings())

2607

2608 def get_operator(self, codes):

2609 if isinstance(codes, tuple):

2610 codes = separator.join(codes)

2611 try:

2612 return self._operator_registry[codes][0]

2613 except KeyError:

2614 return None

2615

2616 def get_operator_group(self, codes):

2617 if isinstance(codes, tuple):

2618 codes = separator.join(codes)

2619 try:

2620 return self._operator_registry[codes]

2621 except KeyError:

2622 return None, (None, None, None)

2623

2624 def iter_operator_codes(self):

2625 for _, _, out_codes in self.iter_operator_mappings():

2626 for codes in out_codes:

2627 yield tuple(codes.split(separator))

2628

2629 def get_operator_codes(self):

2630 return list(self.iter_operator_codes())

2631

2632 def print_tables(self, table_names=None, stream=None):

2633 '''

2634 Dump raw database tables in textual form (for debugging purposes).

2635

2636 :param table_names:

2637 Names of tables to be dumped or ``None`` to dump all.

2638 :type table_names:

2639 :py:class:`list` of :py:class:`str`

2640

2641 :param stream:

2642 Open file or ``None`` to dump to standard output.

2643 '''

2644

2645 if stream is None:

2646 stream = sys.stdout

2647

2648 if isinstance(table_names, str):

2649 table_names = [table_names]

2650

2651 if table_names is None:

2652 table_names = [

2653 'selection_file_states',

2654 'selection_nuts',

2655 'selection_kind_codes_count',

2656 'files', 'nuts', 'kind_codes', 'kind_codes_count']

2657

2658 m = {

2659 'selection_file_states': '%(db)s.%(file_states)s',

2660 'selection_nuts': '%(db)s.%(nuts)s',

2661 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',

2662 'files': 'files',

2663 'nuts': 'nuts',

2664 'kind_codes': 'kind_codes',

2665 'kind_codes_count': 'kind_codes_count'}

2666

2667 for table_name in table_names:

2668 self._database.print_table(

2669 m[table_name] % self._names, stream=stream)

2670

2671

2672class SquirrelStats(Object):

2673 '''

2674 Container to hold statistics about contents available from a Squirrel.

2675

2676 See also :py:meth:`Squirrel.get_stats`.

2677 '''

2678

2679 nfiles = Int.T(

2680 help='Number of files in selection.')

2681 nnuts = Int.T(

2682 help='Number of index nuts in selection.')

2683 codes = List.T(

2684 Tuple.T(content_t=String.T()),

2685 help='Available code sequences in selection, e.g. '

2686 '(agency, network, station, location) for stations nuts.')

2687 kinds = List.T(

2688 String.T(),

2689 help='Available content types in selection.')

2690 total_size = Int.T(

2691 help='Aggregated file size of files is selection.')

2692 counts = Dict.T(

2693 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),

2694 help='Breakdown of how many nuts of any content type and code '

2695 'sequence are available in selection, ``counts[kind][codes]``.')

2696 time_spans = Dict.T(

2697 String.T(), Tuple.T(content_t=Timestamp.T()),

2698 help='Time spans by content type.')

2699 sources = List.T(

2700 String.T(),

2701 help='Descriptions of attached sources.')

2702 operators = List.T(

2703 String.T(),

2704 help='Descriptions of attached operators.')

2705

2706 def __str__(self):

2707 kind_counts = dict(

2708 (kind, sum(self.counts[kind].values())) for kind in self.kinds)

2709

2710 scodes = model.codes_to_str_abbreviated(self.codes)

2711

2712 ssources = '<none>' if not self.sources else '\n' + '\n'.join(

2713 ' ' + s for s in self.sources)

2714

2715 soperators = '<none>' if not self.operators else '\n' + '\n'.join(

2716 ' ' + s for s in self.operators)

2717

2718 def stime(t):

2719 return util.tts(t) if t is not None and t not in (

2720 model.g_tmin, model.g_tmax) else '<none>'

2721

2722 def stable(rows):

2723 ns = [max(len(w) for w in col) for col in zip(*rows)]

2724 return '\n'.join(

2725 ' '.join(w.ljust(n) for n, w in zip(ns, row))

2726 for row in rows)

2727

2728 def indent(s):

2729 return '\n'.join(' '+line for line in s.splitlines())

2730

2731 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(

2732 kind + ':',

2733 str(kind_counts[kind]),

2734 stime(self.time_spans[kind][0]),

2735 '-',

2736 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))

2737

2738 s = '''

2739Number of files: %i

2740Total size of known files: %s

2741Number of index nuts: %i

2742Available content kinds: %s

2743Available codes: %s

2744Sources: %s

2745Operators: %s''' % (

2746 self.nfiles,

2747 util.human_bytesize(self.total_size),

2748 self.nnuts,

2749 stspans, scodes, ssources, soperators)

2750

2751 return s.lstrip()

2752

2753

2754__all__ = [

2755 'Squirrel',

2756 'SquirrelStats',

2757]

Coverage for /usr/local/lib/python3.9/dist-packages/pyrocko/squirrel/base.py : 83%

842 statements 700 run 142 missing 21 excluded