1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, print_function
8import sys
9import os
11import math
12import logging
13import threading
14import queue
15from collections import defaultdict
17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
18from pyrocko import util, trace
19from pyrocko.progress import progress
21from . import model, io, cache, dataset
23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM
25from .client import fdsn, catalog
26from .selection import Selection, filldocs
27from .database import abspath
28from . import client, environment, error
30logger = logging.getLogger('psq.base')
32guts_prefix = 'squirrel'
35def make_task(*args):
36 return progress.task(*args, logger=logger)
39def lpick(condition, seq):
40 ft = [], []
41 for ele in seq:
42 ft[int(bool(condition(ele)))].append(ele)
44 return ft
47# derived list class to enable detection of already preprocessed codes patterns
48class codes_patterns_list(list):
49 pass
52def codes_patterns_for_kind(kind_id, codes):
53 if isinstance(codes, codes_patterns_list):
54 return codes
56 if isinstance(codes, list):
57 lcodes = codes_patterns_list()
58 for sc in codes:
59 lcodes.extend(codes_patterns_for_kind(kind_id, sc))
61 return lcodes
63 codes = to_codes(kind_id, codes)
65 lcodes = codes_patterns_list()
66 lcodes.append(codes)
67 if kind_id == model.STATION:
68 return lcodes.append(codes.replace(location='[*]'))
70 return lcodes
73def blocks(tmin, tmax, deltat, nsamples_block=100000):
74 tblock = util.to_time_float(deltat * nsamples_block)
75 iblock_min = int(math.floor(tmin / tblock))
76 iblock_max = int(math.ceil(tmax / tblock))
77 for iblock in range(iblock_min, iblock_max):
78 yield iblock * tblock, (iblock+1) * tblock
81def gaps(avail, tmin, tmax):
82 assert tmin < tmax
84 data = [(tmax, 1), (tmin, -1)]
85 for (tmin_a, tmax_a) in avail:
86 assert tmin_a < tmax_a
87 data.append((tmin_a, 1))
88 data.append((tmax_a, -1))
90 data.sort()
91 s = 1
92 gaps = []
93 tmin_g = None
94 for t, x in data:
95 if s == 1 and x == -1:
96 tmin_g = t
97 elif s == 0 and x == 1 and tmin_g is not None:
98 tmax_g = t
99 if tmin_g != tmax_g:
100 gaps.append((tmin_g, tmax_g))
102 s += x
104 return gaps
107def order_key(order):
108 return (order.codes, order.tmin, order.tmax)
111class Batch(object):
112 '''
113 Batch of waveforms from window-wise data extraction.
115 Encapsulates state and results yielded for each window in window-wise
116 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
118 *Attributes:*
120 .. py:attribute:: tmin
122 Start of this time window.
124 .. py:attribute:: tmax
126 End of this time window.
128 .. py:attribute:: i
130 Index of this time window in sequence.
132 .. py:attribute:: n
134 Total number of time windows in sequence.
136 .. py:attribute:: traces
138 Extracted waveforms for this time window.
139 '''
141 def __init__(self, tmin, tmax, i, n, traces):
142 self.tmin = tmin
143 self.tmax = tmax
144 self.i = i
145 self.n = n
146 self.traces = traces
149class Squirrel(Selection):
150 '''
151 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
153 :param env:
154 Squirrel environment instance or directory path to use as starting
155 point for its detection. By default, the current directory is used as
156 starting point. When searching for a usable environment the directory
157 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
158 directory is used if it exists, otherwise the parent directories are
159 search upwards for the existence of such a directory. If no such
160 directory is found, the user's global Squirrel environment
161 ``'$HOME/.pyrocko/squirrel'`` is used.
162 :type env:
163 :py:class:`~pyrocko.squirrel.environment.Environment` or
164 :py:class:`str`
166 :param database:
167 Database instance or path to database. By default the
168 database found in the detected Squirrel environment is used.
169 :type database:
170 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
172 :param cache_path:
173 Directory path to use for data caching. By default, the ``'cache'``
174 directory in the detected Squirrel environment is used.
175 :type cache_path:
176 :py:class:`str`
178 :param persistent:
179 If given a name, create a persistent selection.
180 :type persistent:
181 :py:class:`str`
183 This is the central class of the Squirrel framework. It provides a unified
184 interface to query and access seismic waveforms, station meta-data and
185 event information from local file collections and remote data sources. For
186 prompt responses, a profound database setup is used under the hood. To
187 speed up assemblage of ad-hoc data selections, files are indexed on first
188 use and the extracted meta-data is remembered in the database for
189 subsequent accesses. Bulk data is lazily loaded from disk and remote
190 sources, just when requested. Once loaded, data is cached in memory to
191 expedite typical access patterns. Files and data sources can be dynamically
192 added to and removed from the Squirrel selection at runtime.
194 Queries are restricted to the contents of the files currently added to the
195 Squirrel selection (usually a subset of the file meta-information
196 collection in the database). This list of files is referred to here as the
197 "selection". By default, temporary tables are created in the attached
198 database to hold the names of the files in the selection as well as various
199 indices and counters. These tables are only visible inside the application
200 which created them and are deleted when the database connection is closed
201 or the application exits. To create a selection which is not deleted at
202 exit, supply a name to the ``persistent`` argument of the Squirrel
203 constructor. Persistent selections are shared among applications using the
204 same database.
206 **Method summary**
208 Some of the methods are implemented in :py:class:`Squirrel`'s base class
209 :py:class:`~pyrocko.squirrel.selection.Selection`.
211 .. autosummary::
213 ~Squirrel.add
214 ~Squirrel.add_source
215 ~Squirrel.add_fdsn
216 ~Squirrel.add_catalog
217 ~Squirrel.add_dataset
218 ~Squirrel.add_virtual
219 ~Squirrel.update
220 ~Squirrel.update_waveform_promises
221 ~Squirrel.advance_accessor
222 ~Squirrel.clear_accessor
223 ~Squirrel.reload
224 ~pyrocko.squirrel.selection.Selection.iter_paths
225 ~Squirrel.iter_nuts
226 ~Squirrel.iter_kinds
227 ~Squirrel.iter_deltats
228 ~Squirrel.iter_codes
229 ~pyrocko.squirrel.selection.Selection.get_paths
230 ~Squirrel.get_nuts
231 ~Squirrel.get_kinds
232 ~Squirrel.get_deltats
233 ~Squirrel.get_codes
234 ~Squirrel.get_counts
235 ~Squirrel.get_time_span
236 ~Squirrel.get_deltat_span
237 ~Squirrel.get_nfiles
238 ~Squirrel.get_nnuts
239 ~Squirrel.get_total_size
240 ~Squirrel.get_stats
241 ~Squirrel.get_content
242 ~Squirrel.get_stations
243 ~Squirrel.get_channels
244 ~Squirrel.get_responses
245 ~Squirrel.get_events
246 ~Squirrel.get_waveform_nuts
247 ~Squirrel.get_waveforms
248 ~Squirrel.chopper_waveforms
249 ~Squirrel.get_coverage
250 ~Squirrel.pile
251 ~Squirrel.snuffle
252 ~Squirrel.glob_codes
253 ~pyrocko.squirrel.selection.Selection.get_database
254 ~Squirrel.print_tables
255 '''
257 def __init__(
258 self, env=None, database=None, cache_path=None, persistent=None):
260 if not isinstance(env, environment.Environment):
261 env = environment.get_environment(env)
263 if database is None:
264 database = env.expand_path(env.database_path)
266 if cache_path is None:
267 cache_path = env.expand_path(env.cache_path)
269 if persistent is None:
270 persistent = env.persistent
272 Selection.__init__(
273 self, database=database, persistent=persistent)
275 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
277 self._content_caches = {
278 'waveform': cache.ContentCache(),
279 'default': cache.ContentCache()}
281 self._cache_path = cache_path
283 self._sources = []
284 self._operators = []
285 self._operator_registry = {}
287 self._pile = None
288 self._n_choppers_active = 0
290 self._names.update({
291 'nuts': self.name + '_nuts',
292 'kind_codes_count': self.name + '_kind_codes_count',
293 'coverage': self.name + '_coverage'})
295 with self.transaction('create tables') as cursor:
296 self._create_tables_squirrel(cursor)
298 def _create_tables_squirrel(self, cursor):
300 cursor.execute(self._register_table(self._sql(
301 '''
302 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
303 nut_id integer PRIMARY KEY,
304 file_id integer,
305 file_segment integer,
306 file_element integer,
307 kind_id integer,
308 kind_codes_id integer,
309 tmin_seconds integer,
310 tmin_offset integer,
311 tmax_seconds integer,
312 tmax_offset integer,
313 kscale integer)
314 ''')))
316 cursor.execute(self._register_table(self._sql(
317 '''
318 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
319 kind_codes_id integer PRIMARY KEY,
320 count integer)
321 ''')))
323 cursor.execute(self._sql(
324 '''
325 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
326 ON %(nuts)s (file_id, file_segment, file_element)
327 '''))
329 cursor.execute(self._sql(
330 '''
331 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
332 ON %(nuts)s (file_id)
333 '''))
335 cursor.execute(self._sql(
336 '''
337 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
338 ON %(nuts)s (kind_id, tmin_seconds)
339 '''))
341 cursor.execute(self._sql(
342 '''
343 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
344 ON %(nuts)s (kind_id, tmax_seconds)
345 '''))
347 cursor.execute(self._sql(
348 '''
349 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
350 ON %(nuts)s (kind_id, kscale, tmin_seconds)
351 '''))
353 cursor.execute(self._sql(
354 '''
355 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
356 BEFORE DELETE ON main.files FOR EACH ROW
357 BEGIN
358 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
359 END
360 '''))
362 # trigger only on size to make silent update of mtime possible
363 cursor.execute(self._sql(
364 '''
365 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
366 BEFORE UPDATE OF size ON main.files FOR EACH ROW
367 BEGIN
368 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
369 END
370 '''))
372 cursor.execute(self._sql(
373 '''
374 CREATE TRIGGER IF NOT EXISTS
375 %(db)s.%(file_states)s_delete_files
376 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
377 BEGIN
378 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
379 END
380 '''))
382 cursor.execute(self._sql(
383 '''
384 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
385 BEFORE INSERT ON %(nuts)s FOR EACH ROW
386 BEGIN
387 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
388 (new.kind_codes_id, 0);
389 UPDATE %(kind_codes_count)s
390 SET count = count + 1
391 WHERE new.kind_codes_id
392 == %(kind_codes_count)s.kind_codes_id;
393 END
394 '''))
396 cursor.execute(self._sql(
397 '''
398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
399 BEFORE DELETE ON %(nuts)s FOR EACH ROW
400 BEGIN
401 UPDATE %(kind_codes_count)s
402 SET count = count - 1
403 WHERE old.kind_codes_id
404 == %(kind_codes_count)s.kind_codes_id;
405 END
406 '''))
408 cursor.execute(self._register_table(self._sql(
409 '''
410 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
411 kind_codes_id integer,
412 time_seconds integer,
413 time_offset integer,
414 step integer)
415 ''')))
417 cursor.execute(self._sql(
418 '''
419 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
420 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
421 '''))
423 cursor.execute(self._sql(
424 '''
425 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
426 AFTER INSERT ON %(nuts)s FOR EACH ROW
427 BEGIN
428 INSERT OR IGNORE INTO %(coverage)s VALUES
429 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
430 ;
431 UPDATE %(coverage)s
432 SET step = step + 1
433 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
434 AND new.tmin_seconds == %(coverage)s.time_seconds
435 AND new.tmin_offset == %(coverage)s.time_offset
436 ;
437 INSERT OR IGNORE INTO %(coverage)s VALUES
438 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
439 ;
440 UPDATE %(coverage)s
441 SET step = step - 1
442 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
443 AND new.tmax_seconds == %(coverage)s.time_seconds
444 AND new.tmax_offset == %(coverage)s.time_offset
445 ;
446 DELETE FROM %(coverage)s
447 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
448 AND new.tmin_seconds == %(coverage)s.time_seconds
449 AND new.tmin_offset == %(coverage)s.time_offset
450 AND step == 0
451 ;
452 DELETE FROM %(coverage)s
453 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
454 AND new.tmax_seconds == %(coverage)s.time_seconds
455 AND new.tmax_offset == %(coverage)s.time_offset
456 AND step == 0
457 ;
458 END
459 '''))
461 cursor.execute(self._sql(
462 '''
463 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
464 BEFORE DELETE ON %(nuts)s FOR EACH ROW
465 BEGIN
466 INSERT OR IGNORE INTO %(coverage)s VALUES
467 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
468 ;
469 UPDATE %(coverage)s
470 SET step = step - 1
471 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
472 AND old.tmin_seconds == %(coverage)s.time_seconds
473 AND old.tmin_offset == %(coverage)s.time_offset
474 ;
475 INSERT OR IGNORE INTO %(coverage)s VALUES
476 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
477 ;
478 UPDATE %(coverage)s
479 SET step = step + 1
480 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
481 AND old.tmax_seconds == %(coverage)s.time_seconds
482 AND old.tmax_offset == %(coverage)s.time_offset
483 ;
484 DELETE FROM %(coverage)s
485 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
486 AND old.tmin_seconds == %(coverage)s.time_seconds
487 AND old.tmin_offset == %(coverage)s.time_offset
488 AND step == 0
489 ;
490 DELETE FROM %(coverage)s
491 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
492 AND old.tmax_seconds == %(coverage)s.time_seconds
493 AND old.tmax_offset == %(coverage)s.time_offset
494 AND step == 0
495 ;
496 END
497 '''))
499 def _delete(self):
500 '''Delete database tables associated with this Squirrel.'''
502 with self.transaction('delete tables') as cursor:
503 for s in '''
504 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
505 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
506 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
507 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
508 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
509 DROP TABLE %(db)s.%(nuts)s;
510 DROP TABLE %(db)s.%(kind_codes_count)s;
511 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
512 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
513 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
514 '''.strip().splitlines():
516 cursor.execute(self._sql(s))
518 Selection._delete(self)
520 @filldocs
521 def add(self,
522 paths,
523 kinds=None,
524 format='detect',
525 include=None,
526 exclude=None,
527 check=True):
529 '''
530 Add files to the selection.
532 :param paths:
533 Iterator yielding paths to files or directories to be added to the
534 selection. Recurses into directories. If given a ``str``, it
535 is treated as a single path to be added.
536 :type paths:
537 :py:class:`list` of :py:class:`str`
539 :param kinds:
540 Content types to be made available through the Squirrel selection.
541 By default, all known content types are accepted.
542 :type kinds:
543 :py:class:`list` of :py:class:`str`
545 :param format:
546 File format identifier or ``'detect'`` to enable auto-detection
547 (available: %(file_formats)s).
548 :type format:
549 str
551 :param include:
552 If not ``None``, files are only included if their paths match the
553 given regular expression pattern.
554 :type format:
555 str
557 :param exclude:
558 If not ``None``, files are only included if their paths do not
559 match the given regular expression pattern.
560 :type format:
561 str
563 :param check:
564 If ``True``, all file modification times are checked to see if
565 cached information has to be updated (slow). If ``False``, only
566 previously unknown files are indexed and cached information is used
567 for known files, regardless of file state (fast, corrresponds to
568 Squirrel's ``--optimistic`` mode). File deletions will go
569 undetected in the latter case.
570 :type check:
571 bool
573 :Complexity:
574 O(log N)
575 '''
577 if isinstance(kinds, str):
578 kinds = (kinds,)
580 if isinstance(paths, str):
581 paths = [paths]
583 kind_mask = model.to_kind_mask(kinds)
585 with progress.view():
586 Selection.add(
587 self, util.iter_select_files(
588 paths,
589 show_progress=False,
590 include=include,
591 exclude=exclude,
592 pass_through=lambda path: path.startswith('virtual:')
593 ), kind_mask, format)
595 self._load(check)
596 self._update_nuts()
598 def reload(self):
599 '''
600 Check for modifications and reindex modified files.
602 Based on file modification times.
603 '''
605 self._set_file_states_force_check()
606 self._load(check=True)
607 self._update_nuts()
609 def add_virtual(self, nuts, virtual_paths=None):
610 '''
611 Add content which is not backed by files.
613 :param nuts:
614 Content pieces to be added.
615 :type nuts:
616 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
618 :param virtual_paths:
619 List of virtual paths to prevent creating a temporary list of the
620 nuts while aggregating the file paths for the selection.
621 :type virtual_paths:
622 :py:class:`list` of :py:class:`str`
624 Stores to the main database and the selection.
625 '''
627 if isinstance(virtual_paths, str):
628 virtual_paths = [virtual_paths]
630 if virtual_paths is None:
631 if not isinstance(nuts, list):
632 nuts = list(nuts)
633 virtual_paths = set(nut.file_path for nut in nuts)
635 Selection.add(self, virtual_paths)
636 self.get_database().dig(nuts)
637 self._update_nuts()
639 def add_volatile(self, nuts):
640 if not isinstance(nuts, list):
641 nuts = list(nuts)
643 paths = list(set(nut.file_path for nut in nuts))
644 io.backends.virtual.add_nuts(nuts)
645 self.add_virtual(nuts, paths)
646 self._volatile_paths.extend(paths)
648 def add_volatile_waveforms(self, traces):
649 '''
650 Add in-memory waveforms which will be removed when the app closes.
651 '''
653 name = model.random_name()
655 path = 'virtual:volatile:%s' % name
657 nuts = []
658 for itr, tr in enumerate(traces):
659 assert tr.tmin <= tr.tmax
660 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
661 tmax_seconds, tmax_offset = model.tsplit(
662 tr.tmin + tr.data_len()*tr.deltat)
664 nuts.append(model.Nut(
665 file_path=path,
666 file_format='virtual',
667 file_segment=itr,
668 file_element=0,
669 file_mtime=0,
670 codes=tr.codes,
671 tmin_seconds=tmin_seconds,
672 tmin_offset=tmin_offset,
673 tmax_seconds=tmax_seconds,
674 tmax_offset=tmax_offset,
675 deltat=tr.deltat,
676 kind_id=to_kind_id('waveform'),
677 content=tr))
679 self.add_volatile(nuts)
680 return path
682 def _load(self, check):
683 for _ in io.iload(
684 self,
685 content=[],
686 skip_unchanged=True,
687 check=check):
688 pass
690 def _update_nuts(self, transaction=None):
691 transaction = transaction or self.transaction('update nuts')
692 with make_task('Aggregating selection') as task, \
693 transaction as cursor:
695 self._conn.set_progress_handler(task.update, 100000)
696 nrows = cursor.execute(self._sql(
697 '''
698 INSERT INTO %(db)s.%(nuts)s
699 SELECT NULL,
700 nuts.file_id, nuts.file_segment, nuts.file_element,
701 nuts.kind_id, nuts.kind_codes_id,
702 nuts.tmin_seconds, nuts.tmin_offset,
703 nuts.tmax_seconds, nuts.tmax_offset,
704 nuts.kscale
705 FROM %(db)s.%(file_states)s
706 INNER JOIN nuts
707 ON %(db)s.%(file_states)s.file_id == nuts.file_id
708 INNER JOIN kind_codes
709 ON nuts.kind_codes_id ==
710 kind_codes.kind_codes_id
711 WHERE %(db)s.%(file_states)s.file_state != 2
712 AND (((1 << kind_codes.kind_id)
713 & %(db)s.%(file_states)s.kind_mask) != 0)
714 ''')).rowcount
716 task.update(nrows)
717 self._set_file_states_known(transaction)
718 self._conn.set_progress_handler(None, 0)
720 def add_source(self, source, check=True):
721 '''
722 Add remote resource.
724 :param source:
725 Remote data access client instance.
726 :type source:
727 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
728 '''
730 self._sources.append(source)
731 source.setup(self, check=check)
733 def add_fdsn(self, *args, **kwargs):
734 '''
735 Add FDSN site for transparent remote data access.
737 Arguments are passed to
738 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
739 '''
741 self.add_source(fdsn.FDSNSource(*args, **kwargs))
743 def add_catalog(self, *args, **kwargs):
744 '''
745 Add online catalog for transparent event data access.
747 Arguments are passed to
748 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
749 '''
751 self.add_source(catalog.CatalogSource(*args, **kwargs))
753 def add_dataset(self, ds, check=True, warn_persistent=True):
754 '''
755 Read dataset description from file and add its contents.
757 :param ds:
758 Path to dataset description file or dataset description object
759 . See :py:mod:`~pyrocko.squirrel.dataset`.
760 :type ds:
761 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
763 :param check:
764 If ``True``, all file modification times are checked to see if
765 cached information has to be updated (slow). If ``False``, only
766 previously unknown files are indexed and cached information is used
767 for known files, regardless of file state (fast, corrresponds to
768 Squirrel's ``--optimistic`` mode). File deletions will go
769 undetected in the latter case.
770 :type check:
771 bool
772 '''
773 if isinstance(ds, str):
774 ds = dataset.read_dataset(ds)
775 path = ds
776 else:
777 path = None
779 if warn_persistent and ds.persistent and (
780 not self._persistent or (self._persistent != ds.persistent)):
782 logger.warning(
783 'Dataset `persistent` flag ignored. Can not be set on already '
784 'existing Squirrel instance.%s' % (
785 ' Dataset: %s' % path if path else ''))
787 ds.setup(self, check=check)
789 def _get_selection_args(
790 self, kind_id,
791 obj=None, tmin=None, tmax=None, time=None, codes=None):
793 if codes is not None:
794 codes = codes_patterns_for_kind(kind_id, codes)
796 if time is not None:
797 tmin = time
798 tmax = time
800 if obj is not None:
801 tmin = tmin if tmin is not None else obj.tmin
802 tmax = tmax if tmax is not None else obj.tmax
803 codes = codes if codes is not None else codes_patterns_for_kind(
804 kind_id, obj.codes)
806 return tmin, tmax, codes
808 def _get_selection_args_str(self, *args, **kwargs):
810 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
811 return 'tmin: %s, tmax: %s, codes: %s' % (
812 util.time_to_str(tmin) if tmin is not None else 'none',
813 util.time_to_str(tmax) if tmin is not None else 'none',
814 ','.join(str(entry) for entry in codes))
816 def _selection_args_to_kwargs(
817 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
819 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
821 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
823 tmin_seconds, tmin_offset = model.tsplit(tmin)
824 tmax_seconds, tmax_offset = model.tsplit(tmax)
825 if naiv:
826 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
827 args.append(tmax_seconds)
828 else:
829 tscale_edges = model.tscale_edges
830 tmin_cond = []
831 for kscale in range(tscale_edges.size + 1):
832 if kscale != tscale_edges.size:
833 tscale = int(tscale_edges[kscale])
834 tmin_cond.append('''
835 (%(db)s.%(nuts)s.kind_id = ?
836 AND %(db)s.%(nuts)s.kscale == ?
837 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
838 ''')
839 args.extend(
840 (to_kind_id(kind), kscale,
841 tmin_seconds - tscale - 1, tmax_seconds + 1))
843 else:
844 tmin_cond.append('''
845 (%(db)s.%(nuts)s.kind_id == ?
846 AND %(db)s.%(nuts)s.kscale == ?
847 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
848 ''')
850 args.extend(
851 (to_kind_id(kind), kscale, tmax_seconds + 1))
852 if tmin_cond:
853 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
855 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
856 args.append(tmin_seconds)
858 def iter_nuts(
859 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False,
860 kind_codes_ids=None, path=None):
862 '''
863 Iterate over content entities matching given constraints.
865 :param kind:
866 Content kind (or kinds) to extract.
867 :type kind:
868 :py:class:`str`, :py:class:`list` of :py:class:`str`
870 :param tmin:
871 Start time of query interval.
872 :type tmin:
873 timestamp
875 :param tmax:
876 End time of query interval.
877 :type tmax:
878 timestamp
880 :param codes:
881 List of code patterns to query.
882 :type codes:
883 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
884 objects appropriate for the queried content type, or anything which
885 can be converted to such objects.
887 :param naiv:
888 Bypass time span lookup through indices (slow, for testing).
889 :type naiv:
890 :py:class:`bool`
892 :param kind_codes_ids:
893 Kind-codes IDs of contents to be retrieved (internal use).
894 :type kind_codes_ids:
895 :py:class:`list` of :py:class:`int`
897 :yields:
898 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
899 intersecting content.
901 :complexity:
902 O(log N) for the time selection part due to heavy use of database
903 indices.
905 Query time span is treated as a half-open interval ``[tmin, tmax)``.
906 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
907 closed-interval so that content intersecting with the time instant ``t
908 = tmin = tmax`` is returned (otherwise nothing would be returned as
909 ``[t, t)`` never matches anything).
911 Time spans of content entities to be matched are also treated as half
912 open intervals, e.g. content span ``[0, 1)`` is matched by query span
913 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
914 modified to closed-interval when the content time span is an empty
915 interval, i.e. to indicate a time instant. E.g. time instant 0 is
916 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
917 '''
919 if not isinstance(kind, str):
920 if kind is None:
921 kind = model.g_content_kinds
922 for kind_ in kind:
923 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
924 yield nut
926 return
928 kind_id = to_kind_id(kind)
930 cond = []
931 args = []
932 if tmin is not None or tmax is not None:
933 assert kind is not None
934 if tmin is None:
935 tmin = self.get_time_span()[0]
936 if tmax is None:
937 tmax = self.get_time_span()[1] + 1.0
939 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
941 cond.append('kind_codes.kind_id == ?')
942 args.append(kind_id)
944 if codes is not None:
945 pats = codes_patterns_for_kind(kind_id, codes)
947 if pats:
948 # could optimize this by using IN for non-patterns
949 cond.append(
950 ' ( %s ) ' % ' OR '.join(
951 ('kind_codes.codes GLOB ?',) * len(pats)))
952 args.extend(pat.safe_str for pat in pats)
954 if kind_codes_ids is not None:
955 cond.append(
956 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
957 '?'*len(kind_codes_ids)))
959 args.extend(kind_codes_ids)
961 db = self.get_database()
962 if path is not None:
963 cond.append('files.path == ?')
964 args.append(db.relpath(abspath(path)))
966 sql = ('''
967 SELECT
968 files.path,
969 files.format,
970 files.mtime,
971 files.size,
972 %(db)s.%(nuts)s.file_segment,
973 %(db)s.%(nuts)s.file_element,
974 kind_codes.kind_id,
975 kind_codes.codes,
976 %(db)s.%(nuts)s.tmin_seconds,
977 %(db)s.%(nuts)s.tmin_offset,
978 %(db)s.%(nuts)s.tmax_seconds,
979 %(db)s.%(nuts)s.tmax_offset,
980 kind_codes.deltat
981 FROM files
982 INNER JOIN %(db)s.%(nuts)s
983 ON files.file_id == %(db)s.%(nuts)s.file_id
984 INNER JOIN kind_codes
985 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
986 ''')
988 if cond:
989 sql += ''' WHERE ''' + ' AND '.join(cond)
991 sql = self._sql(sql)
992 if tmin is None and tmax is None:
993 for row in self._conn.execute(sql, args):
994 row = (db.abspath(row[0]),) + row[1:]
995 nut = model.Nut(values_nocheck=row)
996 yield nut
997 else:
998 assert tmin is not None and tmax is not None
999 if tmin == tmax:
1000 for row in self._conn.execute(sql, args):
1001 row = (db.abspath(row[0]),) + row[1:]
1002 nut = model.Nut(values_nocheck=row)
1003 if (nut.tmin <= tmin < nut.tmax) \
1004 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1006 yield nut
1007 else:
1008 for row in self._conn.execute(sql, args):
1009 row = (db.abspath(row[0]),) + row[1:]
1010 nut = model.Nut(values_nocheck=row)
1011 if (tmin < nut.tmax and nut.tmin < tmax) \
1012 or (nut.tmin == nut.tmax
1013 and tmin <= nut.tmin < tmax):
1015 yield nut
1017 def get_nuts(self, *args, **kwargs):
1018 '''
1019 Get content entities matching given constraints.
1021 Like :py:meth:`iter_nuts` but returns results as a list.
1022 '''
1024 return list(self.iter_nuts(*args, **kwargs))
1026 def _split_nuts(
1027 self, kind, tmin=None, tmax=None, codes=None, path=None):
1029 kind_id = to_kind_id(kind)
1030 tmin_seconds, tmin_offset = model.tsplit(tmin)
1031 tmax_seconds, tmax_offset = model.tsplit(tmax)
1033 names_main_nuts = dict(self._names)
1034 names_main_nuts.update(db='main', nuts='nuts')
1036 db = self.get_database()
1038 def main_nuts(s):
1039 return s % names_main_nuts
1041 with self.transaction('split nuts') as cursor:
1042 # modify selection and main
1043 for sql_subst in [
1044 self._sql, main_nuts]:
1046 cond = []
1047 args = []
1049 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1051 if codes is not None:
1052 pats = codes_patterns_for_kind(kind_id, codes)
1053 if pats:
1054 cond.append(
1055 ' ( %s ) ' % ' OR '.join(
1056 ('kind_codes.codes GLOB ?',) * len(pats)))
1057 args.extend(pat.safe_str for pat in pats)
1059 if path is not None:
1060 cond.append('files.path == ?')
1061 args.append(db.relpath(abspath(path)))
1063 sql = sql_subst('''
1064 SELECT
1065 %(db)s.%(nuts)s.nut_id,
1066 %(db)s.%(nuts)s.tmin_seconds,
1067 %(db)s.%(nuts)s.tmin_offset,
1068 %(db)s.%(nuts)s.tmax_seconds,
1069 %(db)s.%(nuts)s.tmax_offset,
1070 kind_codes.deltat
1071 FROM files
1072 INNER JOIN %(db)s.%(nuts)s
1073 ON files.file_id == %(db)s.%(nuts)s.file_id
1074 INNER JOIN kind_codes
1075 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1076 WHERE ''' + ' AND '.join(cond)) # noqa
1078 insert = []
1079 delete = []
1080 for row in cursor.execute(sql, args):
1081 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1082 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1084 nut_tmin = model.tjoin(
1085 nut_tmin_seconds, nut_tmin_offset)
1086 nut_tmax = model.tjoin(
1087 nut_tmax_seconds, nut_tmax_offset)
1089 if nut_tmin < tmax and tmin < nut_tmax:
1090 if nut_tmin < tmin:
1091 insert.append((
1092 nut_tmin_seconds, nut_tmin_offset,
1093 tmin_seconds, tmin_offset,
1094 model.tscale_to_kscale(
1095 tmin_seconds - nut_tmin_seconds),
1096 nut_id))
1098 if tmax < nut_tmax:
1099 insert.append((
1100 tmax_seconds, tmax_offset,
1101 nut_tmax_seconds, nut_tmax_offset,
1102 model.tscale_to_kscale(
1103 nut_tmax_seconds - tmax_seconds),
1104 nut_id))
1106 delete.append((nut_id,))
1108 sql_add = '''
1109 INSERT INTO %(db)s.%(nuts)s (
1110 file_id, file_segment, file_element, kind_id,
1111 kind_codes_id, tmin_seconds, tmin_offset,
1112 tmax_seconds, tmax_offset, kscale )
1113 SELECT
1114 file_id, file_segment, file_element,
1115 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1116 FROM %(db)s.%(nuts)s
1117 WHERE nut_id == ?
1118 '''
1119 cursor.executemany(sql_subst(sql_add), insert)
1121 sql_delete = '''
1122 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1123 '''
1124 cursor.executemany(sql_subst(sql_delete), delete)
1126 def get_time_span(self, kinds=None):
1127 '''
1128 Get time interval over all content in selection.
1130 :param kinds:
1131 If not ``None``, restrict query to given content kinds.
1132 :type kind:
1133 list of str
1135 :complexity:
1136 O(1), independent of the number of nuts.
1138 :returns:
1139 ``(tmin, tmax)``, combined time interval of queried content kinds.
1140 '''
1142 sql_min = self._sql('''
1143 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1144 FROM %(db)s.%(nuts)s
1145 WHERE kind_id == ?
1146 AND tmin_seconds == (
1147 SELECT MIN(tmin_seconds)
1148 FROM %(db)s.%(nuts)s
1149 WHERE kind_id == ?)
1150 ''')
1152 sql_max = self._sql('''
1153 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1154 FROM %(db)s.%(nuts)s
1155 WHERE kind_id == ?
1156 AND tmax_seconds == (
1157 SELECT MAX(tmax_seconds)
1158 FROM %(db)s.%(nuts)s
1159 WHERE kind_id == ?)
1160 ''')
1162 gtmin = None
1163 gtmax = None
1165 if isinstance(kinds, str):
1166 kinds = [kinds]
1168 if kinds is None:
1169 kind_ids = model.g_content_kind_ids
1170 else:
1171 kind_ids = model.to_kind_ids(kinds)
1173 for kind_id in kind_ids:
1174 for tmin_seconds, tmin_offset in self._conn.execute(
1175 sql_min, (kind_id, kind_id)):
1176 tmin = model.tjoin(tmin_seconds, tmin_offset)
1177 if tmin is not None and (gtmin is None or tmin < gtmin):
1178 gtmin = tmin
1180 for (tmax_seconds, tmax_offset) in self._conn.execute(
1181 sql_max, (kind_id, kind_id)):
1182 tmax = model.tjoin(tmax_seconds, tmax_offset)
1183 if tmax is not None and (gtmax is None or tmax > gtmax):
1184 gtmax = tmax
1186 return gtmin, gtmax
1188 def has(self, kinds):
1189 '''
1190 Check availability of given content kinds.
1192 :param kinds:
1193 Content kinds to query.
1194 :type kind:
1195 list of str
1197 :returns:
1198 ``True`` if any of the queried content kinds is available
1199 in the selection.
1200 '''
1201 self_tmin, self_tmax = self.get_time_span(kinds)
1203 return None not in (self_tmin, self_tmax)
1205 def get_deltat_span(self, kind):
1206 '''
1207 Get min and max sampling interval of all content of given kind.
1209 :param kind:
1210 Content kind
1211 :type kind:
1212 str
1214 :returns: ``(deltat_min, deltat_max)``
1215 '''
1217 deltats = [
1218 deltat for deltat in self.get_deltats(kind)
1219 if deltat is not None]
1221 if deltats:
1222 return min(deltats), max(deltats)
1223 else:
1224 return None, None
1226 def iter_kinds(self, codes=None):
1227 '''
1228 Iterate over content types available in selection.
1230 :param codes:
1231 If given, get kinds only for selected codes identifier.
1232 Only a single identifier may be given here and no pattern matching
1233 is done, currently.
1234 :type codes:
1235 :py:class:`~pyrocko.squirrel.model.Codes`
1237 :yields:
1238 Available content kinds as :py:class:`str`.
1240 :complexity:
1241 O(1), independent of number of nuts.
1242 '''
1244 return self._database._iter_kinds(
1245 codes=codes,
1246 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1248 def iter_deltats(self, kind=None):
1249 '''
1250 Iterate over sampling intervals available in selection.
1252 :param kind:
1253 If given, get sampling intervals only for a given content type.
1254 :type kind:
1255 str
1257 :yields:
1258 :py:class:`float` values.
1260 :complexity:
1261 O(1), independent of number of nuts.
1262 '''
1263 return self._database._iter_deltats(
1264 kind=kind,
1265 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1267 def iter_codes(self, kind=None):
1268 '''
1269 Iterate over content identifier code sequences available in selection.
1271 :param kind:
1272 If given, get codes only for a given content type.
1273 :type kind:
1274 str
1276 :yields:
1277 :py:class:`tuple` of :py:class:`str`
1279 :complexity:
1280 O(1), independent of number of nuts.
1281 '''
1282 return self._database._iter_codes(
1283 kind=kind,
1284 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1286 def _iter_codes_info(self, kind=None):
1287 '''
1288 Iterate over number of occurrences of any (kind, codes) combination.
1290 :param kind:
1291 If given, get counts only for selected content type.
1292 :type kind:
1293 str
1295 :yields:
1296 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1298 :complexity:
1299 O(1), independent of number of nuts.
1300 '''
1301 return self._database._iter_codes_info(
1302 kind=kind,
1303 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1305 def get_kinds(self, codes=None):
1306 '''
1307 Get content types available in selection.
1309 :param codes:
1310 If given, get kinds only for selected codes identifier.
1311 Only a single identifier may be given here and no pattern matching
1312 is done, currently.
1313 :type codes:
1314 :py:class:`~pyrocko.squirrel.model.Codes`
1316 :returns:
1317 Sorted list of available content types.
1318 :rtype:
1319 py:class:`list` of :py:class:`str`
1321 :complexity:
1322 O(1), independent of number of nuts.
1324 '''
1325 return sorted(list(self.iter_kinds(codes=codes)))
1327 def get_deltats(self, kind=None):
1328 '''
1329 Get sampling intervals available in selection.
1331 :param kind:
1332 If given, get sampling intervals only for selected content type.
1333 :type kind:
1334 str
1336 :complexity:
1337 O(1), independent of number of nuts.
1339 :returns: Sorted list of available sampling intervals.
1340 '''
1341 return sorted(list(self.iter_deltats(kind=kind)))
1343 def get_codes(self, kind=None):
1344 '''
1345 Get identifier code sequences available in selection.
1347 :param kind:
1348 If given, get codes only for selected content type.
1349 :type kind:
1350 str
1352 :complexity:
1353 O(1), independent of number of nuts.
1355 :returns: Sorted list of available codes as tuples of strings.
1356 '''
1357 return sorted(list(self.iter_codes(kind=kind)))
1359 def get_counts(self, kind=None):
1360 '''
1361 Get number of occurrences of any (kind, codes) combination.
1363 :param kind:
1364 If given, get codes only for selected content type.
1365 :type kind:
1366 str
1368 :complexity:
1369 O(1), independent of number of nuts.
1371 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1372 if kind is not ``None``
1373 '''
1374 d = {}
1375 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1376 if kind_id not in d:
1377 v = d[kind_id] = {}
1378 else:
1379 v = d[kind_id]
1381 if codes not in v:
1382 v[codes] = 0
1384 v[codes] += count
1386 if kind is not None:
1387 return d[to_kind_id(kind)]
1388 else:
1389 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1391 def glob_codes(self, kind, codes):
1392 '''
1393 Find codes matching given patterns.
1395 :param kind:
1396 Content kind to be queried.
1397 :type kind:
1398 str
1400 :param codes:
1401 List of code patterns to query.
1402 :type codes:
1403 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1404 objects appropriate for the queried content type, or anything which
1405 can be converted to such objects.
1407 :returns:
1408 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1409 '''
1411 kind_id = to_kind_id(kind)
1412 args = [kind_id]
1413 pats = codes_patterns_for_kind(kind_id, codes)
1415 if pats:
1416 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1417 ('kind_codes.codes GLOB ?',) * len(pats))
1419 args.extend(pat.safe_str for pat in pats)
1420 else:
1421 codes_cond = ''
1423 sql = self._sql('''
1424 SELECT kind_codes_id, codes, deltat FROM kind_codes
1425 WHERE
1426 kind_id == ? ''' + codes_cond)
1428 return list(map(list, self._conn.execute(sql, args)))
1430 def update(self, constraint=None, **kwargs):
1431 '''
1432 Update or partially update channel and event inventories.
1434 :param constraint:
1435 Selection of times or areas to be brought up to date.
1436 :type constraint:
1437 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1439 :param \\*\\*kwargs:
1440 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1442 This function triggers all attached remote sources, to check for
1443 updates in the meta-data. The sources will only submit queries when
1444 their expiration date has passed, or if the selection spans into
1445 previously unseen times or areas.
1446 '''
1448 if constraint is None:
1449 constraint = client.Constraint(**kwargs)
1451 for source in self._sources:
1452 source.update_channel_inventory(self, constraint)
1453 source.update_event_inventory(self, constraint)
1455 def update_waveform_promises(self, constraint=None, **kwargs):
1456 '''
1457 Permit downloading of remote waveforms.
1459 :param constraint:
1460 Remote waveforms compatible with the given constraint are enabled
1461 for download.
1462 :type constraint:
1463 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1465 :param \\*\\*kwargs:
1466 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1468 Calling this method permits Squirrel to download waveforms from remote
1469 sources when processing subsequent waveform requests. This works by
1470 inserting so called waveform promises into the database. It will look
1471 into the available channels for each remote source and create a promise
1472 for each channel compatible with the given constraint. If the promise
1473 then matches in a waveform request, Squirrel tries to download the
1474 waveform. If the download is successful, the downloaded waveform is
1475 added to the Squirrel and the promise is deleted. If the download
1476 fails, the promise is kept if the reason of failure looks like being
1477 temporary, e.g. because of a network failure. If the cause of failure
1478 however seems to be permanent, the promise is deleted so that no
1479 further attempts are made to download a waveform which might not be
1480 available from that server at all. To force re-scheduling after a
1481 permanent failure, call :py:meth:`update_waveform_promises`
1482 yet another time.
1483 '''
1485 if constraint is None:
1486 constraint = client.Constraint(**kwargs)
1488 for source in self._sources:
1489 source.update_waveform_promises(self, constraint)
1491 def remove_waveform_promises(self, from_database='selection'):
1492 '''
1493 Remove waveform promises from live selection or global database.
1495 Calling this function removes all waveform promises provided by the
1496 attached sources.
1498 :param from_database:
1499 Remove from live selection ``'selection'`` or global database
1500 ``'global'``.
1501 '''
1502 for source in self._sources:
1503 source.remove_waveform_promises(self, from_database=from_database)
1505 def update_responses(self, constraint=None, **kwargs):
1506 if constraint is None:
1507 constraint = client.Constraint(**kwargs)
1509 for source in self._sources:
1510 source.update_response_inventory(self, constraint)
1512 def get_nfiles(self):
1513 '''
1514 Get number of files in selection.
1515 '''
1517 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1518 for row in self._conn.execute(sql):
1519 return row[0]
1521 def get_nnuts(self):
1522 '''
1523 Get number of nuts in selection.
1524 '''
1526 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1527 for row in self._conn.execute(sql):
1528 return row[0]
1530 def get_total_size(self):
1531 '''
1532 Get aggregated file size available in selection.
1533 '''
1535 sql = self._sql('''
1536 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1537 INNER JOIN files
1538 ON %(db)s.%(file_states)s.file_id = files.file_id
1539 ''')
1541 for row in self._conn.execute(sql):
1542 return row[0] or 0
1544 def get_stats(self):
1545 '''
1546 Get statistics on contents available through this selection.
1547 '''
1549 kinds = self.get_kinds()
1550 time_spans = {}
1551 for kind in kinds:
1552 time_spans[kind] = self.get_time_span([kind])
1554 return SquirrelStats(
1555 nfiles=self.get_nfiles(),
1556 nnuts=self.get_nnuts(),
1557 kinds=kinds,
1558 codes=self.get_codes(),
1559 total_size=self.get_total_size(),
1560 counts=self.get_counts(),
1561 time_spans=time_spans,
1562 sources=[s.describe() for s in self._sources],
1563 operators=[op.describe() for op in self._operators])
1565 def get_content(
1566 self,
1567 nut,
1568 cache_id='default',
1569 accessor_id='default',
1570 show_progress=False,
1571 model='squirrel'):
1573 '''
1574 Get and possibly load full content for a given index entry from file.
1576 Loads the actual content objects (channel, station, waveform, ...) from
1577 file. For efficiency, sibling content (all stuff in the same file
1578 segment) will also be loaded as a side effect. The loaded contents are
1579 cached in the Squirrel object.
1580 '''
1582 content_cache = self._content_caches[cache_id]
1583 if not content_cache.has(nut):
1585 for nut_loaded in io.iload(
1586 nut.file_path,
1587 segment=nut.file_segment,
1588 format=nut.file_format,
1589 database=self._database,
1590 update_selection=self,
1591 show_progress=show_progress):
1593 content_cache.put(nut_loaded)
1595 try:
1596 return content_cache.get(nut, accessor_id, model)
1597 except KeyError:
1598 raise error.NotAvailable(
1599 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1601 def advance_accessor(self, accessor_id='default', cache_id=None):
1602 '''
1603 Notify memory caches about consumer moving to a new data batch.
1605 :param accessor_id:
1606 Name of accessing consumer to be advanced.
1607 :type accessor_id:
1608 str
1610 :param cache_id:
1611 Name of cache to for which the accessor should be advanced. By
1612 default the named accessor is advanced in all registered caches.
1613 By default, two caches named ``'default'`` and ``'waveform'`` are
1614 available.
1615 :type cache_id:
1616 str
1618 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1619 Squirrel's memory caching works and can be tuned. Default behaviour is
1620 to release data when it has not been used in the latest data
1621 window/batch. If the accessor is never advanced, data is cached
1622 indefinitely - which is often desired e.g. for station meta-data.
1623 Methods for consecutive data traversal, like
1624 :py:meth:`chopper_waveforms` automatically advance and clear
1625 their accessor.
1626 '''
1627 for cache_ in (
1628 self._content_caches.keys()
1629 if cache_id is None
1630 else [cache_id]):
1632 self._content_caches[cache_].advance_accessor(accessor_id)
1634 def clear_accessor(self, accessor_id, cache_id=None):
1635 '''
1636 Notify memory caches about a consumer having finished.
1638 :param accessor_id:
1639 Name of accessor to be cleared.
1640 :type accessor_id:
1641 str
1643 :param cache_id:
1644 Name of cache for which the accessor should be cleared. By default
1645 the named accessor is cleared from all registered caches. By
1646 default, two caches named ``'default'`` and ``'waveform'`` are
1647 available.
1648 :type cache_id:
1649 str
1651 Calling this method clears all references to cache entries held by the
1652 named accessor. Cache entries are then freed if not referenced by any
1653 other accessor.
1654 '''
1656 for cache_ in (
1657 self._content_caches.keys()
1658 if cache_id is None
1659 else [cache_id]):
1661 self._content_caches[cache_].clear_accessor(accessor_id)
1663 def get_cache_stats(self, cache_id):
1664 return self._content_caches[cache_id].get_stats()
1666 def _check_duplicates(self, nuts):
1667 d = defaultdict(list)
1668 for nut in nuts:
1669 d[nut.codes].append(nut)
1671 for codes, group in d.items():
1672 if len(group) > 1:
1673 logger.warning(
1674 'Multiple entries matching codes: %s' % str(codes))
1676 @filldocs
1677 def get_stations(
1678 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1679 model='squirrel'):
1681 '''
1682 Get stations matching given constraints.
1684 %(query_args)s
1686 :param model:
1687 Select object model for returned values: ``'squirrel'`` to get
1688 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1689 objects with channel information attached.
1690 :type model:
1691 str
1693 :returns:
1694 List of :py:class:`pyrocko.squirrel.Station
1695 <pyrocko.squirrel.model.Station>` objects by default or list of
1696 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1697 objects if ``model='pyrocko'`` is requested.
1699 See :py:meth:`iter_nuts` for details on time span matching.
1700 '''
1702 if model == 'pyrocko':
1703 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes)
1704 elif model in ('squirrel', 'stationxml'):
1705 args = self._get_selection_args(
1706 STATION, obj, tmin, tmax, time, codes)
1708 nuts = sorted(
1709 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1710 self._check_duplicates(nuts)
1711 return [self.get_content(nut, model=model) for nut in nuts]
1712 else:
1713 raise ValueError('Invalid station model: %s' % model)
1715 @filldocs
1716 def get_channels(
1717 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1718 model='squirrel'):
1720 '''
1721 Get channels matching given constraints.
1723 %(query_args)s
1725 :returns:
1726 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1728 See :py:meth:`iter_nuts` for details on time span matching.
1729 '''
1731 args = self._get_selection_args(
1732 CHANNEL, obj, tmin, tmax, time, codes)
1734 nuts = sorted(
1735 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1736 self._check_duplicates(nuts)
1737 return [self.get_content(nut, model=model) for nut in nuts]
1739 @filldocs
1740 def get_sensors(
1741 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1743 '''
1744 Get sensors matching given constraints.
1746 %(query_args)s
1748 :returns:
1749 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1751 See :py:meth:`iter_nuts` for details on time span matching.
1752 '''
1754 tmin, tmax, codes = self._get_selection_args(
1755 CHANNEL, obj, tmin, tmax, time, codes)
1757 if codes is not None:
1758 codes = codes_patterns_list(
1759 (entry.replace(channel=entry.channel[:-1] + '?')
1760 if entry != '*' else entry)
1761 for entry in codes)
1763 nuts = sorted(
1764 self.iter_nuts(
1765 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1766 self._check_duplicates(nuts)
1767 return model.Sensor.from_channels(
1768 self.get_content(nut) for nut in nuts)
1770 @filldocs
1771 def get_responses(
1772 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1773 model='squirrel'):
1775 '''
1776 Get instrument responses matching given constraints.
1778 %(query_args)s
1780 :returns:
1781 List of :py:class:`~pyrocko.squirrel.model.Response` objects.
1783 See :py:meth:`iter_nuts` for details on time span matching.
1784 '''
1786 args = self._get_selection_args(
1787 RESPONSE, obj, tmin, tmax, time, codes)
1789 nuts = sorted(
1790 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1791 self._check_duplicates(nuts)
1792 return [self.get_content(nut, model=model) for nut in nuts]
1794 @filldocs
1795 def get_response(
1796 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1797 model='squirrel'):
1799 '''
1800 Get instrument response matching given constraints.
1802 %(query_args)s
1804 :returns:
1805 :py:class:`~pyrocko.squirrel.model.Response` object.
1807 Same as :py:meth:`get_responses` but returning exactly one response.
1808 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more
1809 than one is available.
1811 See :py:meth:`iter_nuts` for details on time span matching.
1812 '''
1814 responses = self.get_responses(
1815 obj, tmin, tmax, time, codes, model=model)
1816 if len(responses) == 0:
1817 raise error.NotAvailable(
1818 'No instrument response available (%s).'
1819 % self._get_selection_args_str(
1820 RESPONSE, obj, tmin, tmax, time, codes))
1822 elif len(responses) > 1:
1823 if model == 'squirrel':
1824 rinfo = ':\n' + '\n'.join(
1825 ' ' + resp.summary for resp in responses)
1826 else:
1827 rinfo = '.'
1829 raise error.NotAvailable(
1830 'Multiple instrument responses matching given constraints '
1831 '(%s)%s' % (
1832 self._get_selection_args_str(
1833 RESPONSE, obj, tmin, tmax, time, codes), rinfo))
1835 return responses[0]
1837 @filldocs
1838 def get_events(
1839 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1841 '''
1842 Get events matching given constraints.
1844 %(query_args)s
1846 :returns:
1847 List of :py:class:`~pyrocko.model.event.Event` objects.
1849 See :py:meth:`iter_nuts` for details on time span matching.
1850 '''
1852 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
1853 nuts = sorted(
1854 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
1855 self._check_duplicates(nuts)
1856 return [self.get_content(nut) for nut in nuts]
1858 def _redeem_promises(self, *args):
1860 tmin, tmax, _ = args
1862 waveforms = list(self.iter_nuts('waveform', *args))
1863 promises = list(self.iter_nuts('waveform_promise', *args))
1865 codes_to_avail = defaultdict(list)
1866 for nut in waveforms:
1867 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
1869 def tts(x):
1870 if isinstance(x, tuple):
1871 return tuple(tts(e) for e in x)
1872 elif isinstance(x, list):
1873 return list(tts(e) for e in x)
1874 else:
1875 return util.time_to_str(x)
1877 orders = []
1878 for promise in promises:
1879 waveforms_avail = codes_to_avail[promise.codes]
1880 for block_tmin, block_tmax in blocks(
1881 max(tmin, promise.tmin),
1882 min(tmax, promise.tmax),
1883 promise.deltat):
1885 orders.append(
1886 WaveformOrder(
1887 source_id=promise.file_path,
1888 codes=promise.codes,
1889 tmin=block_tmin,
1890 tmax=block_tmax,
1891 deltat=promise.deltat,
1892 gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
1894 orders_noop, orders = lpick(lambda order: order.gaps, orders)
1896 order_keys_noop = set(order_key(order) for order in orders_noop)
1897 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
1898 logger.info(
1899 'Waveform orders already satisified with cached/local data: '
1900 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
1902 source_ids = []
1903 sources = {}
1904 for source in self._sources:
1905 if isinstance(source, fdsn.FDSNSource):
1906 source_ids.append(source._source_id)
1907 sources[source._source_id] = source
1909 source_priority = dict(
1910 (source_id, i) for (i, source_id) in enumerate(source_ids))
1912 order_groups = defaultdict(list)
1913 for order in orders:
1914 order_groups[order_key(order)].append(order)
1916 for k, order_group in order_groups.items():
1917 order_group.sort(
1918 key=lambda order: source_priority[order.source_id])
1920 n_order_groups = len(order_groups)
1922 if len(order_groups) != 0 or len(orders) != 0:
1923 logger.info(
1924 'Waveform orders standing for download: %i (%i)'
1925 % (len(order_groups), len(orders)))
1927 task = make_task('Waveform orders processed', n_order_groups)
1928 else:
1929 task = None
1931 def split_promise(order):
1932 self._split_nuts(
1933 'waveform_promise',
1934 order.tmin, order.tmax,
1935 codes=order.codes,
1936 path=order.source_id)
1938 def release_order_group(order):
1939 okey = order_key(order)
1940 for followup in order_groups[okey]:
1941 split_promise(followup)
1943 del order_groups[okey]
1945 if task:
1946 task.update(n_order_groups - len(order_groups))
1948 def noop(order):
1949 pass
1951 def success(order):
1952 release_order_group(order)
1953 split_promise(order)
1955 def batch_add(paths):
1956 self.add(paths)
1958 calls = queue.Queue()
1960 def enqueue(f):
1961 def wrapper(*args):
1962 calls.put((f, args))
1964 return wrapper
1966 for order in orders_noop:
1967 split_promise(order)
1969 while order_groups:
1971 orders_now = []
1972 empty = []
1973 for k, order_group in order_groups.items():
1974 try:
1975 orders_now.append(order_group.pop(0))
1976 except IndexError:
1977 empty.append(k)
1979 for k in empty:
1980 del order_groups[k]
1982 by_source_id = defaultdict(list)
1983 for order in orders_now:
1984 by_source_id[order.source_id].append(order)
1986 threads = []
1987 for source_id in by_source_id:
1988 def download():
1989 try:
1990 sources[source_id].download_waveforms(
1991 by_source_id[source_id],
1992 success=enqueue(success),
1993 error_permanent=enqueue(split_promise),
1994 error_temporary=noop,
1995 batch_add=enqueue(batch_add))
1997 finally:
1998 calls.put(None)
2000 thread = threading.Thread(target=download)
2001 thread.start()
2002 threads.append(thread)
2004 ndone = 0
2005 while ndone < len(threads):
2006 ret = calls.get()
2007 if ret is None:
2008 ndone += 1
2009 else:
2010 ret[0](*ret[1])
2012 for thread in threads:
2013 thread.join()
2015 if task:
2016 task.update(n_order_groups - len(order_groups))
2018 if task:
2019 task.done()
2021 @filldocs
2022 def get_waveform_nuts(
2023 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2025 '''
2026 Get waveform content entities matching given constraints.
2028 %(query_args)s
2030 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2031 resolves matching waveform promises (downloads waveforms from remote
2032 sources).
2034 See :py:meth:`iter_nuts` for details on time span matching.
2035 '''
2037 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2038 self._redeem_promises(*args)
2039 return sorted(
2040 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2042 @filldocs
2043 def get_waveforms(
2044 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2045 uncut=False, want_incomplete=True, degap=True, maxgap=5,
2046 maxlap=None, snap=None, include_last=False, load_data=True,
2047 accessor_id='default', operator_params=None):
2049 '''
2050 Get waveforms matching given constraints.
2052 %(query_args)s
2054 :param uncut:
2055 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2056 and to disable degapping/deoverlapping. Returns untouched traces as
2057 they are read from file segment. File segments are always read in
2058 their entirety.
2059 :type uncut:
2060 bool
2062 :param want_incomplete:
2063 If ``True``, gappy/incomplete traces are included in the result.
2064 :type want_incomplete:
2065 bool
2067 :param degap:
2068 If ``True``, connect traces and remove gaps and overlaps.
2069 :type degap:
2070 bool
2072 :param maxgap:
2073 Maximum gap size in samples which is filled with interpolated
2074 samples when ``degap`` is ``True``.
2075 :type maxgap:
2076 int
2078 :param maxlap:
2079 Maximum overlap size in samples which is removed when ``degap`` is
2080 ``True``.
2081 :type maxlap:
2082 int
2084 :param snap:
2085 Rounding functions used when computing sample index from time
2086 instance, for trace start and trace end, respectively. By default,
2087 ``(round, round)`` is used.
2088 :type snap:
2089 tuple of 2 callables
2091 :param include_last:
2092 If ``True``, add one more sample to the returned traces (the sample
2093 which would be the first sample of a query with ``tmin`` set to the
2094 current value of ``tmax``).
2095 :type include_last:
2096 bool
2098 :param load_data:
2099 If ``True``, waveform data samples are read from files (or cache).
2100 If ``False``, meta-information-only traces are returned (dummy
2101 traces with no data samples).
2102 :type load_data:
2103 bool
2105 :param accessor_id:
2106 Name of consumer on who's behalf data is accessed. Used in cache
2107 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2108 to distinguish different points of extraction for the decision of
2109 when to release cached waveform data. Should be used when data is
2110 alternately extracted from more than one region / selection.
2111 :type accessor_id:
2112 str
2114 See :py:meth:`iter_nuts` for details on time span matching.
2116 Loaded data is kept in memory (at least) until
2117 :py:meth:`clear_accessor` has been called or
2118 :py:meth:`advance_accessor` has been called two consecutive times
2119 without data being accessed between the two calls (by this accessor).
2120 Data may still be further kept in the memory cache if held alive by
2121 consumers with a different ``accessor_id``.
2122 '''
2124 tmin, tmax, codes = self._get_selection_args(
2125 WAVEFORM, obj, tmin, tmax, time, codes)
2127 self_tmin, self_tmax = self.get_time_span(
2128 ['waveform', 'waveform_promise'])
2130 if None in (self_tmin, self_tmax):
2131 logger.warning(
2132 'No waveforms available.')
2133 return []
2135 tmin = tmin if tmin is not None else self_tmin
2136 tmax = tmax if tmax is not None else self_tmax
2138 if codes is not None and len(codes) == 1:
2139 # TODO: fix for multiple / mixed codes
2140 operator = self.get_operator(codes[0])
2141 if operator is not None:
2142 return operator.get_waveforms(
2143 self, codes[0],
2144 tmin=tmin, tmax=tmax,
2145 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2146 maxgap=maxgap, maxlap=maxlap, snap=snap,
2147 include_last=include_last, load_data=load_data,
2148 accessor_id=accessor_id, params=operator_params)
2150 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes)
2152 if load_data:
2153 traces = [
2154 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2156 else:
2157 traces = [
2158 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2160 if uncut:
2161 return traces
2163 if snap is None:
2164 snap = (round, round)
2166 chopped = []
2167 for tr in traces:
2168 if not load_data and tr.ydata is not None:
2169 tr = tr.copy(data=False)
2170 tr.ydata = None
2172 try:
2173 chopped.append(tr.chop(
2174 tmin, tmax,
2175 inplace=False,
2176 snap=snap,
2177 include_last=include_last))
2179 except trace.NoData:
2180 pass
2182 processed = self._process_chopped(
2183 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2185 return processed
2187 @filldocs
2188 def chopper_waveforms(
2189 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2190 tinc=None, tpad=0.,
2191 want_incomplete=True, snap_window=False,
2192 degap=True, maxgap=5, maxlap=None,
2193 snap=None, include_last=False, load_data=True,
2194 accessor_id=None, clear_accessor=True, operator_params=None):
2196 '''
2197 Iterate window-wise over waveform archive.
2199 %(query_args)s
2201 :param tinc:
2202 Time increment (window shift time) (default uses ``tmax-tmin``).
2203 :type tinc:
2204 timestamp
2206 :param tpad:
2207 Padding time appended on either side of the data window (window
2208 overlap is ``2*tpad``).
2209 :type tpad:
2210 timestamp
2212 :param want_incomplete:
2213 If ``True``, gappy/incomplete traces are included in the result.
2214 :type want_incomplete:
2215 bool
2217 :param snap_window:
2218 If ``True``, start time windows at multiples of tinc with respect
2219 to system time zero.
2220 :type snap_window:
2221 bool
2223 :param degap:
2224 If ``True``, connect traces and remove gaps and overlaps.
2225 :type degap:
2226 bool
2228 :param maxgap:
2229 Maximum gap size in samples which is filled with interpolated
2230 samples when ``degap`` is ``True``.
2231 :type maxgap:
2232 int
2234 :param maxlap:
2235 Maximum overlap size in samples which is removed when ``degap`` is
2236 ``True``.
2237 :type maxlap:
2238 int
2240 :param snap:
2241 Rounding functions used when computing sample index from time
2242 instance, for trace start and trace end, respectively. By default,
2243 ``(round, round)`` is used.
2244 :type snap:
2245 tuple of 2 callables
2247 :param include_last:
2248 If ``True``, add one more sample to the returned traces (the sample
2249 which would be the first sample of a query with ``tmin`` set to the
2250 current value of ``tmax``).
2251 :type include_last:
2252 bool
2254 :param load_data:
2255 If ``True``, waveform data samples are read from files (or cache).
2256 If ``False``, meta-information-only traces are returned (dummy
2257 traces with no data samples).
2258 :type load_data:
2259 bool
2261 :param accessor_id:
2262 Name of consumer on who's behalf data is accessed. Used in cache
2263 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2264 to distinguish different points of extraction for the decision of
2265 when to release cached waveform data. Should be used when data is
2266 alternately extracted from more than one region / selection.
2267 :type accessor_id:
2268 str
2270 :param clear_accessor:
2271 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2272 chopper finishes. Set to ``False`` to keep loaded waveforms in
2273 memory when the generator returns.
2274 :type clear_accessor:
2275 bool
2277 :yields:
2278 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2279 extracted time window.
2281 See :py:meth:`iter_nuts` for details on time span matching.
2282 '''
2284 tmin, tmax, codes = self._get_selection_args(
2285 WAVEFORM, obj, tmin, tmax, time, codes)
2287 self_tmin, self_tmax = self.get_time_span(
2288 ['waveform', 'waveform_promise'])
2290 if None in (self_tmin, self_tmax):
2291 logger.warning(
2292 'Content has undefined time span. No waveforms and no '
2293 'waveform promises?')
2294 return
2296 if snap_window and tinc is not None:
2297 tmin = tmin if tmin is not None else self_tmin
2298 tmax = tmax if tmax is not None else self_tmax
2299 tmin = math.floor(tmin / tinc) * tinc
2300 tmax = math.ceil(tmax / tinc) * tinc
2301 else:
2302 tmin = tmin if tmin is not None else self_tmin + tpad
2303 tmax = tmax if tmax is not None else self_tmax - tpad
2305 tinc = tinc if tinc is not None else tmax - tmin
2307 try:
2308 if accessor_id is None:
2309 accessor_id = 'chopper%i' % self._n_choppers_active
2311 self._n_choppers_active += 1
2313 eps = tinc * 1e-6
2314 if tinc != 0.0:
2315 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2316 else:
2317 nwin = 1
2319 for iwin in range(nwin):
2320 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2322 chopped = self.get_waveforms(
2323 tmin=wmin-tpad,
2324 tmax=wmax+tpad,
2325 codes=codes,
2326 snap=snap,
2327 include_last=include_last,
2328 load_data=load_data,
2329 want_incomplete=want_incomplete,
2330 degap=degap,
2331 maxgap=maxgap,
2332 maxlap=maxlap,
2333 accessor_id=accessor_id,
2334 operator_params=operator_params)
2336 self.advance_accessor(accessor_id)
2338 yield Batch(
2339 tmin=wmin,
2340 tmax=wmax,
2341 i=iwin,
2342 n=nwin,
2343 traces=chopped)
2345 iwin += 1
2347 finally:
2348 self._n_choppers_active -= 1
2349 if clear_accessor:
2350 self.clear_accessor(accessor_id, 'waveform')
2352 def _process_chopped(
2353 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2355 chopped.sort(key=lambda a: a.full_id)
2356 if degap:
2357 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2359 if not want_incomplete:
2360 chopped_weeded = []
2361 for tr in chopped:
2362 emin = tr.tmin - tmin
2363 emax = tr.tmax + tr.deltat - tmax
2364 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2365 chopped_weeded.append(tr)
2367 elif degap:
2368 if (0. < emin <= 5. * tr.deltat
2369 and -5. * tr.deltat <= emax < 0.):
2371 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2372 chopped_weeded.append(tr)
2374 chopped = chopped_weeded
2376 return chopped
2378 def _get_pyrocko_stations(
2379 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2381 from pyrocko import model as pmodel
2383 by_nsl = defaultdict(lambda: (list(), list()))
2384 for station in self.get_stations(obj, tmin, tmax, time, codes):
2385 sargs = station._get_pyrocko_station_args()
2386 by_nsl[station.codes.nsl][0].append(sargs)
2388 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2389 sargs = channel._get_pyrocko_station_args()
2390 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2391 sargs_list.append(sargs)
2392 channels_list.append(channel)
2394 pstations = []
2395 nsls = list(by_nsl.keys())
2396 nsls.sort()
2397 for nsl in nsls:
2398 sargs_list, channels_list = by_nsl[nsl]
2399 sargs = util.consistency_merge(
2400 [('',) + x for x in sargs_list])
2402 by_c = defaultdict(list)
2403 for ch in channels_list:
2404 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2406 chas = list(by_c.keys())
2407 chas.sort()
2408 pchannels = []
2409 for cha in chas:
2410 list_of_cargs = by_c[cha]
2411 cargs = util.consistency_merge(
2412 [('',) + x for x in list_of_cargs])
2413 pchannels.append(pmodel.Channel(*cargs))
2415 pstations.append(
2416 pmodel.Station(*sargs, channels=pchannels))
2418 return pstations
2420 @property
2421 def pile(self):
2423 '''
2424 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2426 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2427 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2428 but uses the fluffy power of the Squirrel under the hood.
2430 This interface can be used as a drop-in replacement for piles which are
2431 used in existing scripts and programs for efficient waveform data
2432 access. The Squirrel-based pile scales better for large datasets. Newer
2433 scripts should use Squirrel's native methods to avoid the emulation
2434 overhead.
2435 '''
2436 from . import pile
2438 if self._pile is None:
2439 self._pile = pile.Pile(self)
2441 return self._pile
2443 def snuffle(self):
2444 '''
2445 Look at dataset in Snuffler.
2446 '''
2447 self.pile.snuffle()
2449 def _gather_codes_keys(self, kind, gather, selector):
2450 return set(
2451 gather(codes)
2452 for codes in self.iter_codes(kind)
2453 if selector is None or selector(codes))
2455 def __str__(self):
2456 return str(self.get_stats())
2458 def get_coverage(
2459 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2461 '''
2462 Get coverage information.
2464 Get information about strips of gapless data coverage.
2466 :param kind:
2467 Content kind to be queried.
2468 :type kind:
2469 str
2471 :param tmin:
2472 Start time of query interval.
2473 :type tmin:
2474 timestamp
2476 :param tmax:
2477 End time of query interval.
2478 :type tmax:
2479 timestamp
2481 :param codes:
2482 If given, restrict query to given content codes patterns.
2483 :type codes:
2484 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2485 objects appropriate for the queried content type, or anything which
2486 can be converted to such objects.
2488 :param limit:
2489 Limit query to return only up to a given maximum number of entries
2490 per matching time series (without setting this option, very gappy
2491 data could cause the query to execute for a very long time).
2492 :type limit:
2493 int
2495 :returns:
2496 Information about time spans covered by the requested time series
2497 data.
2498 :rtype:
2499 :py:class:`list` of :py:class:`Coverage` objects
2500 '''
2502 tmin_seconds, tmin_offset = model.tsplit(tmin)
2503 tmax_seconds, tmax_offset = model.tsplit(tmax)
2504 kind_id = to_kind_id(kind)
2506 codes_info = list(self._iter_codes_info(kind=kind))
2508 kdata_all = []
2509 if codes is None:
2510 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2511 kdata_all.append(
2512 (codes_entry, kind_codes_id, codes_entry, deltat))
2514 else:
2515 for codes_entry in codes:
2516 pattern = to_codes(kind_id, codes_entry)
2517 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2518 if model.match_codes(pattern, codes_entry):
2519 kdata_all.append(
2520 (pattern, kind_codes_id, codes_entry, deltat))
2522 kind_codes_ids = [x[1] for x in kdata_all]
2524 counts_at_tmin = {}
2525 if tmin is not None:
2526 for nut in self.iter_nuts(
2527 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2529 k = nut.codes, nut.deltat
2530 if k not in counts_at_tmin:
2531 counts_at_tmin[k] = 0
2533 counts_at_tmin[k] += 1
2535 coverages = []
2536 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2537 entry = [pattern, codes_entry, deltat, None, None, []]
2538 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2539 sql = self._sql('''
2540 SELECT
2541 time_seconds,
2542 time_offset
2543 FROM %(db)s.%(coverage)s
2544 WHERE
2545 kind_codes_id == ?
2546 ORDER BY
2547 kind_codes_id ''' + order + ''',
2548 time_seconds ''' + order + ''',
2549 time_offset ''' + order + '''
2550 LIMIT 1
2551 ''')
2553 for row in self._conn.execute(sql, [kind_codes_id]):
2554 entry[3+i] = model.tjoin(row[0], row[1])
2556 if None in entry[3:5]:
2557 continue
2559 args = [kind_codes_id]
2561 sql_time = ''
2562 if tmin is not None:
2563 # intentionally < because (== tmin) is queried from nuts
2564 sql_time += ' AND ( ? < time_seconds ' \
2565 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2566 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2568 if tmax is not None:
2569 sql_time += ' AND ( time_seconds < ? ' \
2570 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2571 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2573 sql_limit = ''
2574 if limit is not None:
2575 sql_limit = ' LIMIT ?'
2576 args.append(limit)
2578 sql = self._sql('''
2579 SELECT
2580 time_seconds,
2581 time_offset,
2582 step
2583 FROM %(db)s.%(coverage)s
2584 WHERE
2585 kind_codes_id == ?
2586 ''' + sql_time + '''
2587 ORDER BY
2588 kind_codes_id,
2589 time_seconds,
2590 time_offset
2591 ''' + sql_limit)
2593 rows = list(self._conn.execute(sql, args))
2595 if limit is not None and len(rows) == limit:
2596 entry[-1] = None
2597 else:
2598 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2599 tlast = None
2600 if tmin is not None:
2601 entry[-1].append((tmin, counts))
2602 tlast = tmin
2604 for row in rows:
2605 t = model.tjoin(row[0], row[1])
2606 counts += row[2]
2607 entry[-1].append((t, counts))
2608 tlast = t
2610 if tmax is not None and (tlast is None or tlast != tmax):
2611 entry[-1].append((tmax, counts))
2613 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2615 return coverages
2617 def add_operator(self, op):
2618 self._operators.append(op)
2620 def update_operator_mappings(self):
2621 available = self.get_codes(kind=('channel'))
2623 for operator in self._operators:
2624 operator.update_mappings(available, self._operator_registry)
2626 def iter_operator_mappings(self):
2627 for operator in self._operators:
2628 for in_codes, out_codes in operator.iter_mappings():
2629 yield operator, in_codes, out_codes
2631 def get_operator_mappings(self):
2632 return list(self.iter_operator_mappings())
2634 def get_operator(self, codes):
2635 try:
2636 return self._operator_registry[codes][0]
2637 except KeyError:
2638 return None
2640 def get_operator_group(self, codes):
2641 try:
2642 return self._operator_registry[codes]
2643 except KeyError:
2644 return None, (None, None, None)
2646 def iter_operator_codes(self):
2647 for _, _, out_codes in self.iter_operator_mappings():
2648 for codes in out_codes:
2649 yield codes
2651 def get_operator_codes(self):
2652 return list(self.iter_operator_codes())
2654 def print_tables(self, table_names=None, stream=None):
2655 '''
2656 Dump raw database tables in textual form (for debugging purposes).
2658 :param table_names:
2659 Names of tables to be dumped or ``None`` to dump all.
2660 :type table_names:
2661 :py:class:`list` of :py:class:`str`
2663 :param stream:
2664 Open file or ``None`` to dump to standard output.
2665 '''
2667 if stream is None:
2668 stream = sys.stdout
2670 if isinstance(table_names, str):
2671 table_names = [table_names]
2673 if table_names is None:
2674 table_names = [
2675 'selection_file_states',
2676 'selection_nuts',
2677 'selection_kind_codes_count',
2678 'files', 'nuts', 'kind_codes', 'kind_codes_count']
2680 m = {
2681 'selection_file_states': '%(db)s.%(file_states)s',
2682 'selection_nuts': '%(db)s.%(nuts)s',
2683 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
2684 'files': 'files',
2685 'nuts': 'nuts',
2686 'kind_codes': 'kind_codes',
2687 'kind_codes_count': 'kind_codes_count'}
2689 for table_name in table_names:
2690 self._database.print_table(
2691 m[table_name] % self._names, stream=stream)
2694class SquirrelStats(Object):
2695 '''
2696 Container to hold statistics about contents available from a Squirrel.
2698 See also :py:meth:`Squirrel.get_stats`.
2699 '''
2701 nfiles = Int.T(
2702 help='Number of files in selection.')
2703 nnuts = Int.T(
2704 help='Number of index nuts in selection.')
2705 codes = List.T(
2706 Tuple.T(content_t=String.T()),
2707 help='Available code sequences in selection, e.g. '
2708 '(agency, network, station, location) for stations nuts.')
2709 kinds = List.T(
2710 String.T(),
2711 help='Available content types in selection.')
2712 total_size = Int.T(
2713 help='Aggregated file size of files is selection.')
2714 counts = Dict.T(
2715 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
2716 help='Breakdown of how many nuts of any content type and code '
2717 'sequence are available in selection, ``counts[kind][codes]``.')
2718 time_spans = Dict.T(
2719 String.T(), Tuple.T(content_t=Timestamp.T()),
2720 help='Time spans by content type.')
2721 sources = List.T(
2722 String.T(),
2723 help='Descriptions of attached sources.')
2724 operators = List.T(
2725 String.T(),
2726 help='Descriptions of attached operators.')
2728 def __str__(self):
2729 kind_counts = dict(
2730 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
2732 scodes = model.codes_to_str_abbreviated(self.codes)
2734 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
2735 ' ' + s for s in self.sources)
2737 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
2738 ' ' + s for s in self.operators)
2740 def stime(t):
2741 return util.tts(t) if t is not None and t not in (
2742 model.g_tmin, model.g_tmax) else '<none>'
2744 def stable(rows):
2745 ns = [max(len(w) for w in col) for col in zip(*rows)]
2746 return '\n'.join(
2747 ' '.join(w.ljust(n) for n, w in zip(ns, row))
2748 for row in rows)
2750 def indent(s):
2751 return '\n'.join(' '+line for line in s.splitlines())
2753 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
2754 kind + ':',
2755 str(kind_counts[kind]),
2756 stime(self.time_spans[kind][0]),
2757 '-',
2758 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
2760 s = '''
2761Number of files: %i
2762Total size of known files: %s
2763Number of index nuts: %i
2764Available content kinds: %s
2765Available codes: %s
2766Sources: %s
2767Operators: %s''' % (
2768 self.nfiles,
2769 util.human_bytesize(self.total_size),
2770 self.nnuts,
2771 stspans, scodes, ssources, soperators)
2773 return s.lstrip()
2776__all__ = [
2777 'Squirrel',
2778 'SquirrelStats',
2779]