# http://pyrocko.org - GPLv3 # # The Pyrocko Developers, 21st Century # ---|P------/S----------~Lg----------
return codes[:n] + ('*',) * (n-len(codes))
'station': 4, 'channel': 5, 'response': 5, 'waveform': 6, 'event': 1, 'waveform_promise': 6, 'undefined': 1}
return []
cfill2 = list(cfill) cfill2[3] = '[*]' return [cfill, cfill2]
groups = defaultdict(list) for channel in channels: codes = channel.codes gcodes = codes[:-1] + (codes[-1][:-1],) groups[gcodes].append(channel)
return groups
list_of_args = [channel._get_pyrocko_station_args() for channel in group] args = util.consistency_merge(list_of_args + extra_args) from pyrocko import model as pmodel return pmodel.Station( network=args[0], station=args[1], location=args[2], lat=args[3], lon=args[4], elevation=args[5], depth=args[6], channels=[ch.get_pyrocko_channel() for ch in group])
assert tmin_a < tmax_a data.append((tmin_a, 1)) data.append((tmax_a, -1))
''' Prompt, lazy, indexing, caching, dynamic seismological dataset access.
:param env: Squirrel environment instance or directory path to use as starting point for its detection. By default, the current directory is used as starting point. When searching for a usable environment the directory ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) directory is used if it exists, otherwise the parent directories are search upwards for the existence of such a directory. If no such directory is found, the user's global Squirrel environment ``'$HOME/.pyrocko/squirrel'`` is used. :type env: :py:class:`~pyrocko.squirrel.environment.SquirrelEnvironment` or :py:class:`str`
:param database: Database instance or path to database. By default the database found in the detected Squirrel environment is used. :type database: :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
:param cache_path: Directory path to use for data caching. By default, the ``'cache'`` directory in the detected Squirrel environment is used. :type cache_path: :py:class:`str`
:param persistent: If given a name, create a persistent selection. :type persistent: :py:class:`str`
This is the central class of the Squirrel framework. It provides a unified interface to query and access seismic waveforms, station meta-data and event information from local file collections and remote data sources. For prompt responses, a profound database setup is used under the hood. To speed up assemblage of ad-hoc data selections, files are indexed on first use and the extracted meta-data is remembered in the database for subsequent accesses. Bulk data is lazily loaded from disk and remote sources, just when requested. Once loaded, data is cached in memory to expedite typical access patterns. Files and data sources can be dynamically added to and removed from the Squirrel selection at runtime.
Queries are restricted to the contents of the files currently added to the Squirrel selection (usually a subset of the file meta-information collection in the database). This list of files is referred to here as the "selection". By default, temporary tables are created in the attached database to hold the names of the files in the selection as well as various indices and counters. These tables are only visible inside the application which created them and are deleted when the database connection is closed or the application exits. To create a selection which is not deleted at exit, supply a name to the ``persistent`` argument of the Squirrel constructor. Persistent selections are shared among applications using the same database.
**Method summary**
Some of the methods are implemented in :py:class:`Squirrel`'s base class :py:class:`~pyrocko.squirrel.selection.Selection`.
.. autosummary::
~Squirrel.add ~Squirrel.add_source ~Squirrel.add_fdsn ~Squirrel.add_catalog ~Squirrel.add_dataset ~Squirrel.add_virtual ~Squirrel.update ~Squirrel.update_waveform_promises ~Squirrel.advance_accessor ~Squirrel.clear_accessor ~Squirrel.reload ~pyrocko.squirrel.selection.Selection.iter_paths ~Squirrel.iter_nuts ~Squirrel.iter_kinds ~Squirrel.iter_deltats ~Squirrel.iter_codes ~Squirrel.iter_counts ~pyrocko.squirrel.selection.Selection.get_paths ~Squirrel.get_nuts ~Squirrel.get_kinds ~Squirrel.get_deltats ~Squirrel.get_codes ~Squirrel.get_counts ~Squirrel.get_time_span ~Squirrel.get_deltat_span ~Squirrel.get_nfiles ~Squirrel.get_nnuts ~Squirrel.get_total_size ~Squirrel.get_stats ~Squirrel.get_content ~Squirrel.get_stations ~Squirrel.get_channels ~Squirrel.get_responses ~Squirrel.get_events ~Squirrel.get_waveform_nuts ~Squirrel.get_waveforms ~Squirrel.chopper_waveforms ~Squirrel.get_coverage ~Squirrel.pile ~Squirrel.snuffle ~Squirrel.glob_codes ~pyrocko.squirrel.selection.Selection.get_database ~Squirrel.print_tables '''
self, env=None, database=None, cache_path=None, persistent=None):
'waveform': cache.ContentCache(), 'default': cache.ContentCache()}
'nuts': self.name + '_nuts', 'kind_codes_count': self.name + '_kind_codes_count', 'coverage': self.name + '_coverage'})
''' CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( nut_id integer PRIMARY KEY, file_id integer, file_segment integer, file_element integer, kind_id integer, kind_codes_id integer, tmin_seconds integer, tmin_offset integer, tmax_seconds integer, tmax_offset integer, kscale integer) ''')))
''' CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( kind_codes_id integer PRIMARY KEY, count integer) ''')))
''' CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element ON %(nuts)s (file_id, file_segment, file_element) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id ON %(nuts)s (file_id) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds ON %(nuts)s (kind_id, tmin_seconds) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds ON %(nuts)s (kind_id, tmax_seconds) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale ON %(nuts)s (kind_id, kscale, tmin_seconds) '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts BEFORE DELETE ON main.files FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
# trigger only on size to make silent update of mtime possible ''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 BEFORE UPDATE OF size ON main.files FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(file_states)s_delete_files BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes BEFORE INSERT ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(kind_codes_count)s VALUES (new.kind_codes_id, 0); UPDATE %(kind_codes_count)s SET count = count + 1 WHERE new.kind_codes_id == %(kind_codes_count)s.kind_codes_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes BEFORE DELETE ON %(nuts)s FOR EACH ROW BEGIN UPDATE %(kind_codes_count)s SET count = count - 1 WHERE old.kind_codes_id == %(kind_codes_count)s.kind_codes_id; END '''))
''' CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( kind_codes_id integer, time_seconds integer, time_offset integer, step integer) ''')))
''' CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time ON %(coverage)s (kind_codes_id, time_seconds, time_offset) '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage AFTER INSERT ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(coverage)s VALUES (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) ; UPDATE %(coverage)s SET step = step + 1 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmin_seconds == %(coverage)s.time_seconds AND new.tmin_offset == %(coverage)s.time_offset ; INSERT OR IGNORE INTO %(coverage)s VALUES (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) ; UPDATE %(coverage)s SET step = step - 1 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmax_seconds == %(coverage)s.time_seconds AND new.tmax_offset == %(coverage)s.time_offset ; DELETE FROM %(coverage)s WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmin_seconds == %(coverage)s.time_seconds AND new.tmin_offset == %(coverage)s.time_offset AND step == 0 ; DELETE FROM %(coverage)s WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmax_seconds == %(coverage)s.time_seconds AND new.tmax_offset == %(coverage)s.time_offset AND step == 0 ; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage BEFORE DELETE ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(coverage)s VALUES (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) ; UPDATE %(coverage)s SET step = step - 1 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmin_seconds == %(coverage)s.time_seconds AND old.tmin_offset == %(coverage)s.time_offset ; INSERT OR IGNORE INTO %(coverage)s VALUES (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) ; UPDATE %(coverage)s SET step = step + 1 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmax_seconds == %(coverage)s.time_seconds AND old.tmax_offset == %(coverage)s.time_offset ; DELETE FROM %(coverage)s WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmin_seconds == %(coverage)s.time_seconds AND old.tmin_offset == %(coverage)s.time_offset AND step == 0 ; DELETE FROM %(coverage)s WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmax_seconds == %(coverage)s.time_seconds AND old.tmax_offset == %(coverage)s.time_offset AND step == 0 ; END '''))
'''Delete database tables associated with this Squirrel.'''
DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; DROP TRIGGER %(db)s.%(file_states)s_delete_files; DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; DROP TABLE %(db)s.%(nuts)s; DROP TABLE %(db)s.%(kind_codes_count)s; DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; DROP TABLE IF EXISTS %(db)s.%(coverage)s; '''.strip().splitlines():
paths, kinds=None, format='detect', check=True, progress_viewer='terminal'):
''' Add files to the selection.
:param paths: Iterator yielding paths to files or directories to be added to the selection. Recurses into directories. If given a ``str``, it is treated as a single path to be added. :type paths: :py:class:`list` of :py:class:`str`
:param kinds: Content types to be made available through the Squirrel selection. By default, all known content types are accepted. :type kinds: :py:class:`list` of :py:class:`str`
:param format: File format identifier or ``'detect'`` to enable auto-detection (available: %(file_formats)s). :type format: str
:param check: If ``True``, all file modification times are checked to see if cached information has to be updated (slow). If ``False``, only previously unknown files are indexed and cached information is used for known files, regardless of file state (fast, corrresponds to Squirrel's ``--optimistic`` mode). File deletions will go undetected in the latter case. :type check: bool
:Complexity: O(log N) '''
self, util.iter_select_files( paths, show_progress=False, pass_through=lambda path: path.startswith('virtual:') ), kind_mask, format)
''' Check for modifications and reindex modified files.
Based on file modification times. '''
''' Add content which is not backed by files.
:param nuts: Content pieces to be added. :type nuts: iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
:param virtual_paths: List of virtual paths to prevent creating a temporary list of the nuts while aggregating the file paths for the selection. :type virtual_paths: :py:class:`list` of :py:class:`str`
Stores to the main database and the selection. '''
virtual_paths = [virtual_paths]
nuts_add = [] virtual_paths = set() for nut in nuts: virtual_paths.add(nut.file_path) nuts_add.append(nut) else:
self, content=[], skip_unchanged=True, check=check):
''' INSERT INTO %(db)s.%(nuts)s SELECT NULL, nuts.file_id, nuts.file_segment, nuts.file_element, nuts.kind_id, nuts.kind_codes_id, nuts.tmin_seconds, nuts.tmin_offset, nuts.tmax_seconds, nuts.tmax_offset, nuts.kscale FROM %(db)s.%(file_states)s INNER JOIN nuts ON %(db)s.%(file_states)s.file_id == nuts.file_id INNER JOIN kind_codes ON nuts.kind_codes_id == kind_codes.kind_codes_id WHERE %(db)s.%(file_states)s.file_state != 2 AND (((1 << kind_codes.kind_id) & %(db)s.%(file_states)s.kind_mask) != 0) ''')).rowcount
''' Add remote resource.
:param source: Remote data access client instance. :type source: subclass of :py:class:`~pyrocko.squirrel.client.base.Source` '''
''' Add FDSN site for transparent remote data access.
Arguments are passed to :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`. '''
''' Add online catalog for transparent event data access.
Arguments are passed to :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`. '''
''' Read dataset description from file and add its contents.
:param path: Path to dataset description file. See :py:mod:`~pyrocko.squirrel.dataset`. :type path: str
:param check: If ``True``, all file modification times are checked to see if cached information has to be updated (slow). If ``False``, only previously unknown files are indexed and cached information is used for known files, regardless of file state (fast, corrresponds to Squirrel's ``--optimistic`` mode). File deletions will go undetected in the latter case. :type check: bool ''' ds = dataset.read_dataset(path) ds.setup(self, check=check, progress_viewer=progress_viewer)
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
tmin = time tmax = time
tmin = tmin if tmin is not None else obj.tmin tmax = tmax if tmax is not None else obj.tmax codes = codes if codes is not None else obj.codes
codes = tuple(codes.split('.'))
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, kind_codes_ids=None):
''' Iterate over content entities matching given constraints.
:param kind: Content kind (or kinds) to extract. :type kind: :py:class:`str`, :py:class:`list` of :py:class:`str`
:param tmin: Start time of query interval. :type tmin: timestamp
:param tmax: End time of query interval. :type tmax: timestamp
:param codes: Pattern of content codes to query. :type codes: :py:class:`tuple` of :py:class:`str`
:param naiv: Bypass time span lookup through indices (slow, for testing). :type naiv: :py:class:`bool`
:param kind_codes_ids: Kind-codes IDs of contents to be retrieved (internal use). :type kind_codes_ids: :py:class:`list` of :py:class:`str`
:yields: :py:class:`~pyrocko.squirrel.model.Nut` objects representing the intersecting content.
:complexity: O(log N) for the time selection part due to heavy use of database indices.
Query time span is treated as a half-open interval ``[tmin, tmax)``. However, if ``tmin`` equals ``tmax``, the edge logics are modified to closed-interval so that content intersecting with the time instant ``t = tmin = tmax`` is returned (otherwise nothing would be returned as ``[t, t)`` never matches anything).
Time spans of content entities to be matched are also treated as half open intervals, e.g. content span ``[0, 1)`` is matched by query span ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are modified to closed-interval when the content time span is an empty interval, i.e. to indicate a time instant. E.g. time instant 0 is matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. '''
tmin = self.get_time_span()[0] tmax = self.get_time_span()[1] + 1.0
else:
(%(db)s.%(nuts)s.kind_id = ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) ''') (to_kind_id(kind), kscale, tmin_seconds - tscale - 1, tmax_seconds + 1))
else: (%(db)s.%(nuts)s.kind_id == ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds <= ?) ''')
(to_kind_id(kind), kscale, tmax_seconds + 1))
pats = codes_patterns_for_kind(kind, codes) if pats: extra_cond.append( ' ( %s ) ' % ' OR '.join( ('kind_codes.codes GLOB ?',) * len(pats))) args.extend(separator.join(pat) for pat in pats)
' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( '?'*len(kind_codes_ids)))
SELECT files.path, files.format, files.mtime, files.size, %(db)s.%(nuts)s.file_segment, %(db)s.%(nuts)s.file_element, kind_codes.kind_id, kind_codes.codes, %(db)s.%(nuts)s.tmin_seconds, %(db)s.%(nuts)s.tmin_offset, %(db)s.%(nuts)s.tmax_seconds, %(db)s.%(nuts)s.tmax_offset, kind_codes.deltat FROM files INNER JOIN %(db)s.%(nuts)s ON files.file_id == %(db)s.%(nuts)s.file_id INNER JOIN kind_codes ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id ''')
else: or (nut.tmin == nut.tmax and tmin == nut.tmin):
else: or (nut.tmin == nut.tmax and tmin <= nut.tmin < tmax):
''' Get content entities matching given constraints.
Like :py:meth:`iter_nuts` but returns results as a list. '''
self, kind, tmin=None, tmax=None, codes=None, path=None):
(%(db)s.%(nuts)s.kind_id = ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) ''') (to_kind_id(kind), kscale, tmin_seconds - tscale - 1, tmax_seconds + 1))
else: (%(db)s.%(nuts)s.kind_id == ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds <= ?) ''')
(to_kind_id(kind), kscale, tmax_seconds + 1))
' ( %s ) ' % ' OR '.join( ('kind_codes.codes GLOB ?',) * len(pats)))
SELECT %(db)s.%(nuts)s.nut_id, %(db)s.%(nuts)s.tmin_seconds, %(db)s.%(nuts)s.tmin_offset, %(db)s.%(nuts)s.tmax_seconds, %(db)s.%(nuts)s.tmax_offset, kind_codes.deltat FROM files INNER JOIN %(db)s.%(nuts)s ON files.file_id == %(db)s.%(nuts)s.file_id INNER JOIN kind_codes ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id WHERE ( ''' + ' OR '.join(tmin_cond) + ''' ) AND ''' + ' AND '.join(extra_cond))
nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
nut_tmin_seconds, nut_tmin_offset, nut_deltat) nut_tmax_seconds, nut_tmax_offset, nut_deltat)
nut_tmin_seconds, nut_tmin_offset, tmin_seconds, tmin_offset, model.tscale_to_kscale( tmin_seconds - nut_tmin_seconds), nut_id))
tmax_seconds, tmax_offset, nut_tmax_seconds, nut_tmax_offset, model.tscale_to_kscale( nut_tmax_seconds - tmax_seconds), nut_id))
INSERT INTO %(db)s.%(nuts)s ( file_id, file_segment, file_element, kind_id, kind_codes_id, tmin_seconds, tmin_offset, tmax_seconds, tmax_offset, kscale ) SELECT file_id, file_segment, file_element, kind_id, kind_codes_id, ?, ?, ?, ?, ? FROM %(db)s.%(nuts)s WHERE nut_id == ? '''
''' Get time interval over all content in selection.
:complexity: O(1), independent of the number of nuts.
:returns: (tmin, tmax) '''
SELECT MIN(tmin_seconds), MIN(tmin_offset) FROM %(db)s.%(nuts)s WHERE kind_id == ? AND tmin_seconds == ( SELECT MIN(tmin_seconds) FROM %(db)s.%(nuts)s WHERE kind_id == ?) ''')
SELECT MAX(tmax_seconds), MAX(tmax_offset) FROM %(db)s.%(nuts)s WHERE kind_id == ? AND tmax_seconds == ( SELECT MAX(tmax_seconds) FROM %(db)s.%(nuts)s WHERE kind_id == ?) ''')
else:
sql_min, (kind_id, kind_id)):
sql_max, (kind_id, kind_id)):
''' Get min and max sampling interval of all content of given kind.
:param kind: Content kind :type kind: str
:returns: (deltat_min, deltat_max) '''
deltat for deltat in self.get_deltats(kind) if deltat is not None]
else: return None, None
''' Iterate over content types available in selection.
:param codes: If given, get kinds only for selected codes identifier. :type codes: :py:class:`tuple` of :py:class:`str`
:yields: Available content kinds as :py:class:`str`.
:complexity: O(1), independent of number of nuts. '''
codes=codes, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over sampling intervals available in selection.
:param kind: If given, get sampling intervals only for a given content type. :type kind: str
:yields: :py:class:`float` values.
:complexity: O(1), independent of number of nuts. ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over content identifier code sequences available in selection.
:param kind: If given, get codes only for a given content type. :type kind: str
:yields: :py:class:`tuple` of :py:class:`str`
:complexity: O(1), independent of number of nuts. ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over number of occurrences of any (kind, codes) combination.
:param kind: If given, get counts only for selected content type. :type kind: str
:yields: Tuples of the form ``((kind, codes), count)``.
:complexity: O(1), independent of number of nuts. ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Get content types available in selection.
:param codes: If given, get kinds only for selected codes identifier. :type codes: :py:class:`tuple` of :py:class:`str`
:returns: Sorted list of available content types.
:complexity: O(1), independent of number of nuts.
'''
''' Get sampling intervals available in selection.
:param kind: If given, get codes only for selected content type. :type kind: str
:complexity: O(1), independent of number of nuts.
:returns: sorted list of available sampling intervals '''
''' Get identifier code sequences available in selection.
:param kind: If given, get codes only for selected content type. :type kind: str
:complexity: O(1), independent of number of nuts.
:returns: sorted list of available codes as tuples of strings '''
''' Get number of occurrences of any (kind, codes) combination.
:param kind: If given, get codes only for selected content type. :type kind: str
:complexity: O(1), independent of number of nuts.
:returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]`` if kind is not ``None`` ''' else:
else:
''' Find codes matching given patterns.
:param kind: Content kind to be queried. :type kind: str
:param codes_list: List of code patterns to query. If not given or empty, an empty list is returned. :type codes_list: :py:class:`list` of :py:class:`tuple` of :py:class:`str`
:returns: List of matches of the form ``[kind_codes_id, codes, deltat]``. '''
('kind_codes.codes GLOB ?',) * len(pats))
SELECT kind_codes_id, codes, deltat FROM kind_codes WHERE kind_id == ? AND ''' + codes_cond)
''' Update or partially update channel and event inventories.
:param constraint: Selection of times or areas to be brought up to date. :type constraint: :py:class:`~pyrocko.squirrel.client.Constraint`
:param \\*\\*kwargs: Shortcut for setting ``constraint=Constraint(**kwargs)``.
This function triggers all attached remote sources, to check for updates in the meta-data. The sources will only submit queries when their expiration date has passed, or if the selection spans into previously unseen times or areas. '''
''' Permit downloading of remote waveforms.
:param constraint: Remote waveforms compatible with the given constraint are enabled for download. :type constraint: :py:class:`~pyrocko.squirrel.client.Constraint`
:param \\*\\*kwargs: Shortcut for setting ``constraint=Constraint(**kwargs)``.
Calling this method permits Squirrel to download waveforms from remote sources when processing subsequent waveform requests. This works by inserting so called waveform promises into the database. It will look into the available channels for each remote source and create a promise for each channel compatible with the given constraint. If the promise then matches in a waveform request, Squirrel tries to download the waveform. If the download is successful, the downloaded waveform is added to the Squirrel and the promise is deleted. If the download fails, the promise is kept if the reason of failure looks like being temporary, e.g. because of a network failure. If the cause of failure however seems to be permanent, the promise is deleted so that no further attempts are made to download a waveform which might not be available from that server at all. To force re-scheduling after a permanent failure, call :py:meth:`update_waveform_promises` yet another time. '''
''' Get number of files in selection. '''
''' Get number of nuts in selection. '''
''' Get aggregated file size available in selection. '''
SELECT SUM(files.size) FROM %(db)s.%(file_states)s INNER JOIN files ON %(db)s.%(file_states)s.file_id = files.file_id ''')
''' Get statistics on contents available through this selection. '''
nfiles=self.get_nfiles(), nnuts=self.get_nnuts(), kinds=self.get_kinds(), codes=self.get_codes(), total_size=self.get_total_size(), counts=self.get_counts(), tmin=tmin, tmax=tmax)
''' Get and possibly load full content for a given index entry from file.
Loads the actual content objects (channel, station, waveform, ...) from file. For efficiency sibling content (all stuff in the same file segment) will also be loaded as a side effect. The loaded contents are cached in the Squirrel object. '''
nut.file_path, segment=nut.file_segment, format=nut.file_format, database=self._database):
except KeyError: raise error.NotAvailable( 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
''' Notify memory caches about consumer moving to a new data batch.
:param accessor_id: Name of accessing consumer to be advanced. :type accessor_id: str
:param cache_id: Name of cache to for which the accessor should be advanced. By default the named accessor is advanced in all registered caches. By default, two caches named ``'default'`` and ``'waveforms'`` are available. :type cache_id: str
See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how Squirrel's memory caching works and can be tuned. Default behaviour is to release data when it has not been used in the latest data window/batch. If the accessor is never advanced, data is cached indefinitely - which is often desired e.g. for station meta-data. Methods for consecutive data traversal, like :py:meth:`chopper_waveforms` automatically advance and clear their accessor. ''' self._content_caches.keys() if cache_id is None else [cache_id]):
''' Notify memory caches about a consumer having finished.
:param accessor_id: Name of accessor to be cleared. :type accessor_id: str
:param cache_id: Name of cache to for which the accessor should be cleared. By default the named accessor is cleared from all registered caches. By default, two caches named ``'default'`` and ``'waveforms'`` are available. :type cache_id: str
Calling this method clears all references to cache entries held by the named accessor. Cache entries are then freed if not referenced by any other accessor. '''
self._content_caches.keys() if cache_id is None else [cache_id]):
'Multiple entries matching codes %s' % '.'.join(codes.split(separator)))
self, obj=None, tmin=None, tmax=None, time=None, codes=None, model='squirrel'):
''' Get stations matching given constraints.
%(query_args)s
:param model: Select object model for returned values: ``'squirrel'`` to get Squirrel station objects or ``'pyrocko'`` to get Pyrocko station objects with channel information attached. :type model: str
:returns: List of :py:class:`pyrocko.squirrel.Station <pyrocko.squirrel.model.Station>` objects by default or list of :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>` objects if ``model='pyrocko'`` is requested.
See :py:meth:`iter_nuts` for details on time span matching. '''
self.iter_nuts('station', *args), key=lambda nut: nut.dkey) else: raise ValueError('Invalid station model: %s' % model)
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
''' Get channels matching given constraints.
%(query_args)s
:returns: List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
See :py:meth:`iter_nuts` for details on time span matching. '''
self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
''' Get instrument responses matching given constraints.
%(query_args)s
:returns: List of :py:class:`~pyrocko.squirrel.model.Response` objects.
See :py:meth:`iter_nuts` for details on time span matching. '''
args = self._get_selection_args(obj, tmin, tmax, time, codes) nuts = sorted( self.iter_nuts('response', *args), key=lambda nut: nut.dkey) self._check_duplicates(nuts) return [self.get_content(nut) for nut in nuts]
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
''' Get events matching given constraints.
%(query_args)s
:returns: List of :py:class:`~pyrocko.model.event.Event` objects.
See :py:meth:`iter_nuts` for details on time span matching. '''
self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
if isinstance(x, tuple): return tuple(tts(e) for e in x) elif isinstance(x, list): return list(tts(e) for e in x) else: return util.time_to_str(x)
max(tmin, promise.tmin), min(tmax, promise.tmax), promise.deltat):
WaveformOrder( source_id=promise.file_path, codes=tuple(promise.codes.split(separator)), tmin=block_tmin, tmax=block_tmax, deltat=promise.deltat, gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
logger.info( 'Waveform orders already satisified with cached/local data: ' '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
(source_id, i) for (i, source_id) in enumerate(source_ids))
key=lambda order: source_priority[order.source_id])
'Waveform orders standing for download: %i (%i)' % (len(order_groups), len(orders)))
'waveform_promise', order.tmin, order.tmax, codes=order.codes, path=order.source_id)
pass
split_promise(order)
# TODO: parallelize this loop self, by_source_id[source_id], success=success, error_permanent=split_promise, error_temporary=noop)
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
''' Get waveform content entities matching given constraints.
%(query_args)s
Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally resolves matching waveform promises (downloads waveforms from remote sources).
See :py:meth:`iter_nuts` for details on time span matching. '''
self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
self, obj=None, tmin=None, tmax=None, time=None, codes=None, uncut=False, want_incomplete=True, degap=True, maxgap=5, maxlap=None, snap=None, include_last=False, load_data=True, accessor_id='default'):
''' Get waveforms matching given constraints.
%(query_args)s
:param uncut: Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``] and to disable degapping/deoverlapping. Returns untouched traces as they are read from file segment. File segments are always read in their entirety. :type uncut: bool
:param want_incomplete: If ``True``, gappy/incomplete traces are included in the result. :type want_incomplete: bool
:param degap: If ``True``, connect traces and remove gaps and overlaps. :type degap: bool
:param maxgap: Maximum gap size in samples which is filled with interpolated samples when ``degap`` is ``True``. :type maxgap: int
:param maxlap: Maximum overlap size in samples which is removed when ``degap`` is ``True`` :type maxlap: int
:param snap: Rounding functions used when computing sample index from time instance, for trace start and trace end, respectively. By default, ``(round, round)`` is used. :type snap: tuple of 2 callables
:param include_last: If ``True``, add one more sample to the returned traces (the sample which would be the first sample of a query with ``tmin`` set to the current value of ``tmax``). :type include_last: bool
:param load_data: If ``True``, waveform data samples are read from files (or cache). If ``False``, meta-information-only traces are returned (dummy traces with no data samples). :type load_data: bool
:param accessor_id: Name of consumer on who's behalf data is accessed. Used in cache management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key to distinguish different points of extraction for the decision of when to release cached waveform data. Should be used when data is alternately extracted from more than one region / selection. :type accessor_id: str
See :py:meth:`iter_nuts` for details on time span matching.
Loaded data is kept in memory (at least) until :py:meth:`clear_accessor` has been called or :py:meth:`advance_accessor` has been called two consecutive times without data being accessed between the two calls (by this accessor). Data may still be further kept in the memory cache if held alive by consumers with a different ``accessor_id``. '''
self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
else: traces = [ trace.Trace(**nut.trace_kwargs) for nut in nuts]
tr = tr.copy(data=False) tr.ydata = None
tmin, tmax, inplace=False, snap=snap, include_last=include_last))
except trace.NoData: pass
chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
self, obj=None, tmin=None, tmax=None, time=None, codes=None, tinc=None, tpad=0., want_incomplete=True, degap=True, maxgap=5, maxlap=None, snap=None, include_last=False, load_data=True, accessor_id=None, clear_accessor=True):
''' Iterate window-wise over waveform archive.
%(query_args)s
:param tinc: Time increment (window shift time) (default uses ``tmax-tmin``) :type tinc: timestamp
:param tpad: Padding time appended on either side of the data window (window overlap is ``2*tpad``). :type tpad: timestamp
:param want_incomplete: If ``True``, gappy/incomplete traces are included in the result. :type want_incomplete: bool
:param degap: If ``True``, connect traces and remove gaps and overlaps. :type degap: bool
:param maxgap: Maximum gap size in samples which is filled with interpolated samples when ``degap`` is ``True``. :type maxgap: int
:param maxlap: Maximum overlap size in samples which is removed when ``degap`` is ``True`` :type maxlap: int
:param snap: Rounding functions used when computing sample index from time instance, for trace start and trace end, respectively. By default, ``(round, round)`` is used. :type snap: tuple of 2 callables
:param include_last: If ``True``, add one more sample to the returned traces (the sample which would be the first sample of a query with ``tmin`` set to the current value of ``tmax``). :type include_last: bool
:param load_data: If ``True``, waveform data samples are read from files (or cache). If ``False``, meta-information-only traces are returned (dummy traces with no data samples). :type load_data: bool
:param accessor_id: Name of consumer on who's behalf data is accessed. Used in cache management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key to distinguish different points of extraction for the decision of when to release cached waveform data. Should be used when data is alternately extracted from more than one region / selection. :type accessor_id: str
:param clear_accessor: If ``True`` (default), :py:meth:`clear_accessor` is called when the chopper finishes. Set to ``False`` to keep loaded waveforms in memory when the generator returns.
:yields: A list of :py:class:`~pyrocko.trace.Trace` objects for every extracted time window.
See :py:meth:`iter_nuts` for details on time span matching. '''
obj, tmin, tmax, time, codes)
['waveform', 'waveform_promise'])
logger.warning('Content has undefined time span. No waveforms?') return
tmin=wmin-tpad, tmax=wmax+tpad, codes=codes, snap=snap, include_last=include_last, load_data=load_data, want_incomplete=want_incomplete, degap=degap, maxgap=maxgap, maxlap=maxlap, accessor_id=accessor_id)
finally:
self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
chopped_weeded = [] for tr in chopped: emin = tr.tmin - tmin emax = tr.tmax + tr.deltat - tmax if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat): chopped_weeded.append(tr)
elif degap: if (0. < emin <= 5. * tr.deltat and -5. * tr.deltat <= emax < 0.):
tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat') chopped_weeded.append(tr)
chopped = chopped_weeded
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
name=cargs[0], azimuth=cargs[1], dip=cargs[2]))
network=sargs[0], station=sargs[1], location=sargs[2], lat=sargs[3], lon=sargs[4], elevation=sargs[5], depth=sargs[6] or 0.0, channels=pchannels))
def pile(self):
''' Emulates the older :py:class:`pyrocko.pile.Pile` interface.
This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object, which emulates most of the older :py:class:`pyrocko.pile.Pile` methods but uses the fluffy power of the Squirrel under the hood.
This interface can be used as a drop-in replacement for piles which are used in existing scripts and programs for efficient waveform data access. The Squirrel-based pile scales better for large datasets. Newer scripts should use Squirrel's native methods to avoid the emulation overhead. '''
''' Look at dataset in Snuffler. ''' self.pile.snuffle()
gather(codes) for codes in self.iter_codes(kind) if selector is None or selector(codes))
def __str__(self): return str(self.get_stats())
self, kind, tmin=None, tmax=None, codes_list=None, limit=None):
''' Get coverage information.
Get information about strips of gapless data coverage.
:param kind: Content kind to be queried. :type kind: str
:param tmin: Start time of query interval. :type tmin: timestamp
:param tmax: End time of query interval. :type tmax: timestamp
:param codes_list: List of code patterns to query. If not given or empty, an empty list is returned. :type codes_list: :py:class:`list` of :py:class:`tuple` of :py:class:`str`
:param limit: Limit query to return only up to a given maximum number of entries per matching channel (without setting this option, very gappy data could cause the query to execute for a very long time). :type limit: int
:returns: List of entries of the form ``(pattern, codes, deltat, tmin, tmax, data)`` where ``pattern`` is the request code pattern which yielded this entry, ``codes`` are the matching channel codes, ``tmin`` and ``tmax`` are the global min and max times for which data for this channel is available, regardless of any time restrictions in the query. ``data`` is a list with (up to ``limit``) change-points of the form ``(time, count)`` where a ``count`` of zero indicates a data gap, a value of 1 normal data coverage and higher values indicate duplicate/redundant data. '''
kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
SELECT time_seconds, time_offset FROM %(db)s.%(coverage)s WHERE kind_codes_id == ? ORDER BY kind_codes_id ''' + order + ''', time_seconds ''' + order + ''', time_offset ''' + order + ''' LIMIT 1 ''')
continue
# intentionally < because (== tmin) is queried from nuts 'OR ( ? == time_seconds AND ? < time_offset ) ) '
'OR ( ? == time_seconds AND time_offset <= ? ) ) '
sql_limit = ' LIMIT ?' args.append(limit)
SELECT time_seconds, time_offset, step FROM %(db)s.%(coverage)s WHERE kind_codes_id == ? ''' + sql_time + ''' ORDER BY kind_codes_id, time_seconds, time_offset ''' + sql_limit)
entry[-1] = None else:
''' Dump raw database tables in textual form (for debugging purposes).
:param table_names: Names of tables to be dumped or ``None`` to dump all. :type table_names: :py:class:`list` of :py:class:`str`
:param stream: Open file or ``None`` to dump to standard output. '''
stream = sys.stdout
table_names = [table_names]
'selection_file_states', 'selection_nuts', 'selection_kind_codes_count', 'files', 'nuts', 'kind_codes', 'kind_codes_count']
'selection_file_states': '%(db)s.%(file_states)s', 'selection_nuts': '%(db)s.%(nuts)s', 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 'files': 'files', 'nuts': 'nuts', 'kind_codes': 'kind_codes', 'kind_codes_count': 'kind_codes_count'}
m[table_name] % self._names, stream=stream)
''' Container to hold statistics about contents available from a Squirrel.
See also :py:meth:`Squirrel.get_stats`. '''
help='Number of files in selection.') help='Number of index nuts in selection.') Tuple.T(content_t=String.T()), help='Available code sequences in selection, e.g. ' '(agency, network, station, location) for stations nuts.') String.T(), help='Available content types in selection.') help='Aggregated file size of files is selection.') String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), help='Breakdown of how many nuts of any content type and code ' 'sequence are available in selection, ``counts[kind][codes]``.') optional=True, help='Earliest start time of all nuts in selection.') optional=True, help='Latest end time of all nuts in selection.')
def __str__(self): kind_counts = dict( (kind, sum(self.counts[kind].values())) for kind in self.kinds)
codes = ['.'.join(x) for x in self.codes]
if len(codes) > 20: scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ + '\n [%i more]\n' % (len(codes) - 20) \ + util.ewrap(codes[-10:], indent=' ') else: scodes = '\n' + util.ewrap(codes, indent=' ') \ if codes else '<none>'
stmin = util.tts(self.tmin) if self.tmin is not None else '<none>' stmax = util.tts(self.tmax) if self.tmax is not None else '<none>'
s = ''' Available codes: %s Number of files: %i Total size of known files: %s Number of index nuts: %i Available content kinds: %s Time span of indexed contents: %s - %s''' % ( scodes, self.nfiles, util.human_bytesize(self.total_size), self.nnuts, ', '.join('%s: %i' % ( kind, kind_counts[kind]) for kind in sorted(self.kinds)), stmin, stmax)
return s
'Squirrel', 'SquirrelStats', ] |