# http://pyrocko.org - GPLv3 # # The Pyrocko Developers, 21st Century # ---|P------/S----------~Lg----------
global g_icount
return codes[:n] + ('*',) * (n-len(codes))
'station': 4, 'channel': 5, 'response': 5, 'waveform': 6, 'event': 1, 'waveform_promise': 6, 'undefined': 1}
return []
cfill2 = list(cfill) cfill2[3] = '[*]' return [cfill, cfill2]
groups = defaultdict(list) for channel in channels: codes = channel.codes gcodes = codes[:-1] + (codes[-1][:-1],) groups[gcodes].append(channel)
return groups
list_of_args = [channel._get_pyrocko_station_args() for channel in group] args = util.consistency_merge(list_of_args + extra_args) from pyrocko import model as pmodel return pmodel.Station( network=args[0], station=args[1], location=args[2], lat=args[3], lon=args[4], elevation=args[5], depth=args[6], channels=[ch.get_pyrocko_channel() for ch in group])
assert tmin_a < tmax_a data.append((tmin_a, 1)) data.append((tmax_a, -1))
''' Database backed file selection (base class for :py:class:`Squirrel`).
:param database: Database instance or file path to database. :type database: :py:class:`Database` or :py:class:`str` :param persistent: If given a name, create a persistent selection. :type persistent: :py:class:`str`
In the Squirrel framework, a selection is conceptually a list of files to be made available in the application. Instead of using :py:class:`Selection` directly, user applications should usually use its subclass :py:class:`Squirrel` which adds content indices to the selection and provides high level data querying.
By default, a temporary table in the database is created to hold the names of the files in the selection. This table is only visible inside the application which created it. If a name is given to ``persistent``, a named selection is created, which is visible also in other applications using the same database.
Besides the filename references, desired content kind masks and file format indications are stored in the selection's database table to make the user choice regarding these options persistent on a per-file basis. Book-keeping on whether files are unknown, known or if modification checks are forced is also handled in the selection's file-state table.
Paths of files can be added to the selection using the :py:meth:`add` method and removed with :py:meth:`remove`. :py:meth:`undig_grouped` can be used to iterate over all content known to the selection. '''
raise error.SquirrelError( 'invalid persistent selection name: %s' % persistent)
else:
'db': 'main' if self._persistent else 'temp', 'file_states': self.name + '_file_states', 'bulkinsert': self.name + '_bulkinsert'}
''' CREATE TABLE IF NOT EXISTS %(db)s.%(file_states)s ( file_id integer PRIMARY KEY, file_state integer, kind_mask integer, format text) ''')))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(file_states)s_index_file_state ON %(file_states)s (file_state) '''))
else:
''' Get the database to which this selection belongs.
:returns: :py:class:`Database` object '''
''' Destroy the tables assoctiated with this selection. ''' 'DROP TABLE %(db)s.%(file_states)s'))
self, paths, kind_mask=model.g_kind_mask_all, format='detect'):
''' Add files to the selection.
:param paths: Paths to files to be added to the selection. :type paths: iterator yielding :py:class:`str` objects '''
paths = [paths]
# short non-iterator paths: can do without temp table
''' INSERT OR IGNORE INTO files VALUES (NULL, ?, NULL, NULL, NULL) ''', ((x,) for x in paths))
''' DELETE FROM %(db)s.%(file_states)s WHERE file_id IN ( SELECT files.file_id FROM files WHERE files.path == ? ) AND kind_mask != ? OR format != ? '''), ( (path, kind_mask, format) for path in paths))
''' INSERT OR IGNORE INTO %(db)s.%(file_states)s SELECT files.file_id, 0, ?, ? FROM files WHERE files.path = ? '''), ((kind_mask, format, path) for path in paths))
''' UPDATE %(db)s.%(file_states)s SET file_state = 1 WHERE file_id IN ( SELECT files.file_id FROM files WHERE files.path == ? ) AND file_state != 0 '''), ((path,) for path in paths))
else:
''' CREATE TEMP TABLE temp.%(bulkinsert)s (path text) '''))
'INSERT INTO temp.%(bulkinsert)s VALUES (?)'), ((x,) for x in paths))
''' INSERT OR IGNORE INTO files SELECT NULL, path, NULL, NULL, NULL FROM temp.%(bulkinsert)s '''))
''' DELETE FROM %(db)s.%(file_states)s WHERE file_id IN ( SELECT files.file_id FROM temp.%(bulkinsert)s INNER JOIN files ON temp.%(bulkinsert)s.path == files.path) AND kind_mask != ? OR format != ? '''), (kind_mask, format))
''' INSERT OR IGNORE INTO %(db)s.%(file_states)s SELECT files.file_id, 0, ?, ? FROM temp.%(bulkinsert)s INNER JOIN files ON temp.%(bulkinsert)s.path == files.path '''), (kind_mask, format))
''' UPDATE %(db)s.%(file_states)s SET file_state = 1 WHERE file_id IN ( SELECT files.file_id FROM temp.%(bulkinsert)s INNER JOIN files ON temp.%(bulkinsert)s.path == files.path) AND file_state != 0 '''))
'DROP TABLE temp.%(bulkinsert)s'))
''' Remove files from the selection.
:param paths: Paths to files to be removed from the selection. :type paths: :py:class:`list` of :py:class:`str` ''' paths = [paths]
''' DELETE FROM %(db)s.%(file_states)s WHERE %(db)s.%(file_states)s.file_id IN (SELECT files.file_id FROM files WHERE files.path == ?) '''), ((path,) for path in paths))
''' Iterate over all file paths currently belonging to the selection.
:returns: Iterator yielding file paths. '''
sql = self._sql(''' SELECT files.path FROM %(db)s.%(file_states)s INNER JOIN files ON files.file_id = %(db)s.%(file_states)s.file_id ORDER BY %(db)s.%(file_states)s.file_id ''')
for values in self._conn.execute(sql): yield values[0]
''' Get all file paths currently belonging to the selection.
:returns: List of file paths. ''' return list(self.iter_paths())
''' Set file states to "known" (2). ''' ''' UPDATE %(db)s.%(file_states)s SET file_state = 2 WHERE file_state < 2 '''))
''' Set file states to "request force check" (1). ''' ''' UPDATE %(db)s.%(file_states)s SET file_state = 1 '''))
''' Get inventory of cached content for all files in the selection.
:param: skip_unchanged: if ``True`` only inventory of modified files is yielded (:py:meth:`flag_modified` must be called beforehand).
This generator yields tuples ``((format, path), nuts)`` where ``path`` is the path to the file, ``format`` is the format assignation or ``'detect'`` and ``nuts`` is a list of :py:class:`~pyrocko.squirrel.Nut` objects representing the contents of the file. '''
WHERE %(db)s.%(file_states)s.file_state == 0 ''' else:
SELECT COUNT() FROM %(db)s.%(file_states)s ''' + where), ())[0]
SELECT %(db)s.%(file_states)s.format, files.path, files.format, files.mtime, files.size, nuts.file_segment, nuts.file_element, kind_codes.kind_id, kind_codes.codes, nuts.tmin_seconds, nuts.tmin_offset, nuts.tmax_seconds, nuts.tmax_offset, kind_codes.deltat FROM %(db)s.%(file_states)s LEFT OUTER JOIN files ON %(db)s.%(file_states)s.file_id = files.file_id LEFT OUTER JOIN nuts ON files.file_id = nuts.file_id LEFT OUTER JOIN kind_codes ON nuts.kind_codes_id == kind_codes.kind_codes_id ''' + where + ''' ORDER BY %(db)s.%(file_states)s.file_id ''')
''' Mark files which have been modified.
:param check: If ``True`` query modification times of known files on disk. If ``False``, only flag unknown files.
Assumes file state is 0 for newly added files, 1 for files added again to the selection (forces check), or 2 for all others (no checking is done for those).
Sets file state to 0 for unknown or modified files, 2 for known and not modified files. '''
UPDATE %(db)s.%(file_states)s SET file_state = 0 WHERE ( SELECT mtime FROM files WHERE files.file_id == %(db)s.%(file_states)s.file_id) IS NULL AND file_state == 1 ''')
UPDATE %(db)s.%(file_states)s SET file_state = 2 WHERE file_state == 1 ''')
SELECT files.file_id, files.path, files.format, files.mtime, files.size FROM %(db)s.%(file_states)s INNER JOIN files ON %(db)s.%(file_states)s.file_id == files.file_id WHERE %(db)s.%(file_states)s.file_state == 1 ORDER BY %(db)s.%(file_states)s.file_id ''')
size_db) in self._conn.execute(sql):
except io.UnknownFormat: continue
else:
# could better use callback function here...
UPDATE %(db)s.%(file_states)s SET file_state = ? WHERE file_id = ? ''')
''' Container to hold statistics about contents available from a Squirrel.
See also :py:meth:`Squirrel.get_stats`. '''
help='Number of files in selection.') help='Number of index nuts in selection.') Tuple.T(content_t=String.T()), help='Available code sequences in selection, e.g. ' '(agency, network, station, location) for stations nuts.') String.T(), help='Available content types in selection.') help='Aggregated file size of files is selection.') String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), help='Breakdown of how many nuts of any content type and code ' 'sequence are available in selection, ``counts[kind][codes]``.') optional=True, help='Earliest start time of all nuts in selection.') optional=True, help='Latest end time of all nuts in selection.')
def __str__(self): kind_counts = dict( (kind, sum(self.counts[kind].values())) for kind in self.kinds)
codes = ['.'.join(x) for x in self.codes]
if len(codes) > 20: scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ + '\n [%i more]\n' % (len(codes) - 20) \ + util.ewrap(codes[-10:], indent=' ') else: scodes = '\n' + util.ewrap(codes, indent=' ') \ if codes else '<none>'
stmin = util.tts(self.tmin) if self.tmin is not None else '<none>' stmax = util.tts(self.tmax) if self.tmax is not None else '<none>'
s = ''' Available codes: %s Number of files: %i Total size of known files: %s Number of index nuts: %i Available content kinds: %s Time span of indexed contents: %s - %s''' % ( scodes, self.nfiles, util.human_bytesize(self.total_size), self.nnuts, ', '.join('%s: %i' % ( kind, kind_counts[kind]) for kind in sorted(self.kinds)), stmin, stmax)
return s
''' Prompt, lazy, indexing, caching, dynamic seismological dataset access.
:param env: Squirrel environment instance or directory path to use as starting point for its detection. By default, the current directory is used as starting point. When searching for a usable environment the directory ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point) directory is used if it exists, otherwise the parent directories are search upwards for the existence of such a directory. If no such directory is found, the user's global Squirrel environment ``'$HOME/.pyrocko/squirrel'`` is used. :type env: :py:class:`SquirrelEnvironment` or :py:class:`str` :param database: Database instance or path to database. By default the database found in the detected Squirrel environment is used. :type database: :py:class:`Database` or :py:class:`str` :param cache_path: Directory path to use for data caching. By default, the ``'cache'`` directory in the detected Squirrel environment is used. :type cache_path: :py:class:`str` :param persistent: If given a name, create a persistent selection. :type persistent: :py:class:`str`
Provides a unified interface to query seismic waveforms, station and sensor metadata, and event information from local file collections and remote data sources. Query results are promptly returned, even for very large collections, thanks to a highly optimized database setup working behind the scenes. Assemblage of a data selection is very fast for known files as all content indices are cached in a database. Unknown files are automatically indexed when added to the selection.
Features
- Efficient[1] lookup of data relevant for a selected time window. - Metadata caching and indexing. - Modified files are re-indexed as needed. - SQL database (sqlite) is used behind the scenes. - Can handle selections with millions of files. - Data can be added and removed at run-time, efficiently[1]. - Just-in-time download of missing data. - Disk-cache of meta-data query results with expiration time. - Efficient event catalog synchronization. - Always-up-to-date data coverage indices. - Always-up-to-date indices of available station/channel codes.
[1] O log N performance, where N is the number of data entities (nuts).
Queries are restricted to the contents offered by the files which have been added to the Squirrel (which usually is a subset of the information collected in the attached global file meta-information database).
By default, temporary tables are created in the attached database to hold the names of the files in the selection as well as various indices and counters. These tables are only visible inside the application which created it. If a name is given to ``persistent``, a named selection is created, which is visible also in other applications using the same database.
Paths of files can be added to the selection using the :py:meth:`add` method. '''
self, env=None, database=None, cache_path=None, persistent=None):
'waveform': cache.ContentCache(), 'default': cache.ContentCache()}
'nuts': self.name + '_nuts', 'kind_codes_count': self.name + '_kind_codes_count', 'coverage': self.name + '_coverage'})
''' CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s ( nut_id integer PRIMARY KEY, file_id integer, file_segment integer, file_element integer, kind_id integer, kind_codes_id integer, tmin_seconds integer, tmin_offset integer, tmax_seconds integer, tmax_offset integer, kscale integer) ''')))
''' CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s ( kind_codes_id integer PRIMARY KEY, count integer) ''')))
''' CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element ON %(nuts)s (file_id, file_segment, file_element) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id ON %(nuts)s (file_id) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds ON %(nuts)s (tmin_seconds) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds ON %(nuts)s (tmax_seconds) '''))
''' CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale ON %(nuts)s (kind_id, kscale, tmin_seconds) '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts BEFORE DELETE ON main.files FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
# trigger only on size to make silent update of mtime possible ''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2 BEFORE UPDATE OF size ON main.files FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(file_states)s_delete_files BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW BEGIN DELETE FROM %(nuts)s WHERE file_id == old.file_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes BEFORE INSERT ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(kind_codes_count)s VALUES (new.kind_codes_id, 0); UPDATE %(kind_codes_count)s SET count = count + 1 WHERE new.kind_codes_id == %(kind_codes_count)s.kind_codes_id; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes BEFORE DELETE ON %(nuts)s FOR EACH ROW BEGIN UPDATE %(kind_codes_count)s SET count = count - 1 WHERE old.kind_codes_id == %(kind_codes_count)s.kind_codes_id; END '''))
''' CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s ( kind_codes_id integer, time_seconds integer, time_offset integer, step integer) ''')))
''' CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time ON %(coverage)s (kind_codes_id, time_seconds, time_offset) '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage AFTER INSERT ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(coverage)s VALUES (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0) ; UPDATE %(coverage)s SET step = step + 1 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmin_seconds == %(coverage)s.time_seconds AND new.tmin_offset == %(coverage)s.time_offset ; INSERT OR IGNORE INTO %(coverage)s VALUES (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0) ; UPDATE %(coverage)s SET step = step - 1 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmax_seconds == %(coverage)s.time_seconds AND new.tmax_offset == %(coverage)s.time_offset ; DELETE FROM %(coverage)s WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmin_seconds == %(coverage)s.time_seconds AND new.tmin_offset == %(coverage)s.time_offset AND step == 0 ; DELETE FROM %(coverage)s WHERE new.kind_codes_id == %(coverage)s.kind_codes_id AND new.tmax_seconds == %(coverage)s.time_seconds AND new.tmax_offset == %(coverage)s.time_offset AND step == 0 ; END '''))
''' CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage BEFORE DELETE ON %(nuts)s FOR EACH ROW BEGIN INSERT OR IGNORE INTO %(coverage)s VALUES (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0) ; UPDATE %(coverage)s SET step = step - 1 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmin_seconds == %(coverage)s.time_seconds AND old.tmin_offset == %(coverage)s.time_offset ; INSERT OR IGNORE INTO %(coverage)s VALUES (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0) ; UPDATE %(coverage)s SET step = step + 1 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmax_seconds == %(coverage)s.time_seconds AND old.tmax_offset == %(coverage)s.time_offset ; DELETE FROM %(coverage)s WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmin_seconds == %(coverage)s.time_seconds AND old.tmin_offset == %(coverage)s.time_offset AND step == 0 ; DELETE FROM %(coverage)s WHERE old.kind_codes_id == %(coverage)s.kind_codes_id AND old.tmax_seconds == %(coverage)s.time_seconds AND old.tmax_offset == %(coverage)s.time_offset AND step == 0 ; END '''))
'''Delete database tables associated with this Squirrel.'''
DROP TRIGGER %(db)s.%(nuts)s_delete_nuts; DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2; DROP TRIGGER %(db)s.%(file_states)s_delete_files; DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes; DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes; DROP TABLE %(db)s.%(nuts)s; DROP TABLE %(db)s.%(kind_codes_count)s; DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage; DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage; DROP TABLE IF EXISTS %(db)s.%(coverage)s; '''.strip().splitlines():
paths, kinds=None, format='detect', check=True, progress_viewer='terminal'):
''' Add files to the selection.
:param paths: Iterator yielding paths to files or directories to be added to the selection. Recurses into directories. If given a ``str``, it is treated as a single path to be added. :type paths: :py:class:`list` of :py:class:`str` :param kinds: Content types to be made available through the Squirrel selection. By default, all known content types are accepted. :type kinds: :py:class:`list` of :py:class:`str` :param format: File format identifier or ``'detect'`` to enable auto-detection. :type format: :py:class:`str`
Complexity: O(log N) '''
self, util.iter_select_files( paths, show_progress=False, pass_through=lambda path: path.startswith('virtual:') ), kind_mask, format)
''' Check for modifications and reindex modified files.
Based on file modification times. '''
''' Add content which is not backed by files.
:param nuts: Content pieces to be added. :type nuts: iterator yielding :py:class:`~pyrocko.squirrel.Nut` objects
:param virtual_paths: List of virtual paths to prevent creating a temporary list of the nuts while aggregating the file paths for the selection. :type virtual_paths: :py:class:`list` of :py:class:`str`
Stores to the main database and the selection. '''
virtual_paths = [virtual_paths]
nuts_add = [] virtual_paths = set() for nut in nuts: virtual_paths.add(nut.file_path) nuts_add.append(nut) else:
self, content=[], skip_unchanged=True, check=check):
''' INSERT INTO %(db)s.%(nuts)s SELECT NULL, nuts.file_id, nuts.file_segment, nuts.file_element, nuts.kind_id, nuts.kind_codes_id, nuts.tmin_seconds, nuts.tmin_offset, nuts.tmax_seconds, nuts.tmax_offset, nuts.kscale FROM %(db)s.%(file_states)s INNER JOIN nuts ON %(db)s.%(file_states)s.file_id == nuts.file_id INNER JOIN kind_codes ON nuts.kind_codes_id == kind_codes.kind_codes_id WHERE %(db)s.%(file_states)s.file_state != 2 AND (((1 << kind_codes.kind_id) & %(db)s.%(file_states)s.kind_mask) != 0) ''')).rowcount
''' Add remote resource.
:param source: Remote data access client instance. :type source: subclass of :py:class:`~pyrocko.squirrel.Source` '''
''' Add FDSN site for transparent remote data access.
Arguments are passed to :py:class:`~pyrocko.squirrel.FDSNSource`. '''
''' Add online catalog for transparent event data access.
Arguments are passed to :py:class:`~pyrocko.squirrel.CatalogSource`. '''
ds = dataset.read_dataset(path) ds.setup(self, check=check, progress_viewer=progress_viewer)
self, obj=None, tmin=None, tmax=None, time=None, codes=None):
tmin = time tmax = time
tmin = tmin if tmin is not None else obj.tmin tmax = tmax if tmax is not None else obj.tmax codes = codes if codes is not None else obj.codes
codes = tuple(codes.split('.'))
self, kind=None, tmin=None, tmax=None, codes=None, naiv=False, kind_codes_ids=None):
''' Iterate content entities matching given constraints.
:param kind: Content kind (or kinds) to extract. :type kind: :py:class:`str`, :py:class:`list` of :py:class:`str`
:param tmin: Start time of query interval. :type tmin: timestamp
:param tmax: End time of query interval. :type tmax: timestamp
:param codes: Pattern of content codes to be matched. :type codes: :py:class:`tuple` of :py:class:`str`
:param naiv: Bypass time span lookup through indices (slow, for testing). :type naiv: :py:class:`bool`
:param kind_codes_ids: Kind-codes IDs of contents to be retrieved (internal use). :type kind_codes_ids: :py:class:`list` of :py:class:`str`
Complexity: O(log N) for the time selection part due to heavy use of database indices.
Yields :py:class:`~pyrocko.squirrel.Nut` objects representing the intersecting content.
Query time span is treated as a half-open interval ``[tmin, tmax)``. However, if ``tmin`` equals ``tmax``, the edge logics are modified to closed-interval so that content intersecting with the time instant ``t = tmin = tmax`` is returned (otherwise nothing would be returned as ``[t, t)`` never matches anything).
Time spans of content entities to be matched are also treated as half open intervals, e.g. content span ``[0, 1)`` is matched by query span ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are modified to closed-interval when the content time span is an empty interval, i.e. to indicate a time instant. E.g. time instant 0 is matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. '''
tmin = self.get_time_span()[0] tmax = self.get_time_span()[1] + 1.0
else:
(%(db)s.%(nuts)s.kind_id = ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) ''') (to_kind_id(kind), kscale, tmin_seconds - tscale - 1, tmax_seconds + 1))
else: (%(db)s.%(nuts)s.kind_id == ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds <= ?) ''')
(to_kind_id(kind), kscale, tmax_seconds + 1))
pats = codes_patterns_for_kind(kind, codes) if pats: extra_cond.append( ' ( %s ) ' % ' OR '.join( ('kind_codes.codes GLOB ?',) * len(pats))) args.extend(separator.join(pat) for pat in pats)
' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join( '?'*len(kind_codes_ids)))
SELECT files.path, files.format, files.mtime, files.size, %(db)s.%(nuts)s.file_segment, %(db)s.%(nuts)s.file_element, kind_codes.kind_id, kind_codes.codes, %(db)s.%(nuts)s.tmin_seconds, %(db)s.%(nuts)s.tmin_offset, %(db)s.%(nuts)s.tmax_seconds, %(db)s.%(nuts)s.tmax_offset, kind_codes.deltat FROM files INNER JOIN %(db)s.%(nuts)s ON files.file_id == %(db)s.%(nuts)s.file_id INNER JOIN kind_codes ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id ''')
else: or (nut.tmin == nut.tmax and tmin == nut.tmin):
else: or (nut.tmin == nut.tmax and tmin <= nut.tmin < tmax):
''' Get content entities matching given constraints.
Like :py:meth:`iter_nuts` but returns results as a list. '''
self, kind, tmin=None, tmax=None, codes=None, path=None):
(%(db)s.%(nuts)s.kind_id = ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?) ''') (to_kind_id(kind), kscale, tmin_seconds - tscale - 1, tmax_seconds + 1))
else: (%(db)s.%(nuts)s.kind_id == ? AND %(db)s.%(nuts)s.kscale == ? AND %(db)s.%(nuts)s.tmin_seconds <= ?) ''')
(to_kind_id(kind), kscale, tmax_seconds + 1))
' ( %s ) ' % ' OR '.join( ('kind_codes.codes GLOB ?',) * len(pats)))
SELECT %(db)s.%(nuts)s.nut_id, %(db)s.%(nuts)s.tmin_seconds, %(db)s.%(nuts)s.tmin_offset, %(db)s.%(nuts)s.tmax_seconds, %(db)s.%(nuts)s.tmax_offset, kind_codes.deltat FROM files INNER JOIN %(db)s.%(nuts)s ON files.file_id == %(db)s.%(nuts)s.file_id INNER JOIN kind_codes ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id WHERE ( ''' + ' OR '.join(tmin_cond) + ''' ) AND ''' + ' AND '.join(extra_cond))
nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
nut_tmin_seconds, nut_tmin_offset, nut_deltat) nut_tmax_seconds, nut_tmax_offset, nut_deltat)
nut_tmin_seconds, nut_tmin_offset, tmin_seconds, tmin_offset, model.tscale_to_kscale( tmin_seconds - nut_tmin_seconds), nut_id))
tmax_seconds, tmax_offset, nut_tmax_seconds, nut_tmax_offset, model.tscale_to_kscale( nut_tmax_seconds - tmax_seconds), nut_id))
INSERT INTO %(db)s.%(nuts)s ( file_id, file_segment, file_element, kind_id, kind_codes_id, tmin_seconds, tmin_offset, tmax_seconds, tmax_offset, kscale ) SELECT file_id, file_segment, file_element, kind_id, kind_codes_id, ?, ?, ?, ?, ? FROM %(db)s.%(nuts)s WHERE nut_id == ? '''
''' Get time interval over all content in selection.
Complexity O(1), independent of the number of nuts.
:returns: (tmin, tmax) ''' SELECT MIN(tmin_seconds), MIN(tmin_offset) FROM %(db)s.%(nuts)s WHERE tmin_seconds == (SELECT MIN(tmin_seconds) FROM %(db)s.%(nuts)s) ''')
SELECT MAX(tmax_seconds), MAX(tmax_offset) FROM %(db)s.%(nuts)s WHERE tmax_seconds == (SELECT MAX(tmax_seconds) FROM %(db)s.%(nuts)s) ''')
''' Get min and max sampling interval of all content of given kind.
:param kind: Content kind :type kind: :py:class:`str`
:returns: (deltat_min, deltat_max) '''
deltat for deltat in self.get_deltats(kind) if deltat is not None]
else: return None, None
''' Iterate over content types available in selection.
:param codes: if given, get kinds only for selected codes identifier
Complexity: O(1), independent of number of nuts '''
codes=codes, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over sampling intervals available in selection.
:param kind: if given, get sampling intervals only for a given content type :type kind: :py:class:`str`
Complexity: O(1), independent of number of nuts ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over content identifier code sequences available in selection.
:param kind: if given, get codes only for a given content type :type kind: :py:class:`str`
Complexity: O(1), independent of number of nuts ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Iterate over number of occurrences of any (kind, codes) combination.
:param kind: if given, get counts only for selected content type
Yields tuples ``((kind, codes), count)``
Complexity: O(1), independent of number of nuts ''' kind=kind, kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
''' Get content types available in selection.
:param codes: if given, get kinds only for selected codes identifier
Complexity: O(1), independent of number of nuts
:returns: sorted list of available content types '''
''' Get sampling intervals available in selection.
:param kind: if given, get codes only for selected content type
Complexity: O(1), independent of number of nuts
:returns: sorted list of available sampling intervals '''
''' Get identifier code sequences available in selection.
:param kind: if given, get codes only for selected content type
Complexity: O(1), independent of number of nuts
:returns: sorted list of available codes as tuples of strings '''
''' Get number of occurrences of any (kind, codes) combination.
:param kind: if given, get codes only for selected content type
Complexity: O(1), independent of number of nuts
:returns: ``dict`` with ``counts[kind][codes] or ``counts[codes]`` if kind is not ``None`` ''' else:
else:
('kind_codes.codes GLOB ?',) * len(pats))
SELECT kind_codes_id, codes, deltat FROM kind_codes WHERE kind_id == ? AND ''' + codes_cond)
''' Update inventory of remote content for a given selection.
This function triggers all attached remote sources, to check for updates in the metadata. The sources will only submit queries when their expiration date has passed, or if the selection spans into previously unseen times or areas. '''
''' Get number of files in selection. '''
''' Get number of nuts in selection. '''
''' Get aggregated file size available in selection. '''
SELECT SUM(files.size) FROM %(db)s.%(file_states)s INNER JOIN files ON %(db)s.%(file_states)s.file_id = files.file_id ''')
''' Get statistics on contents available through this selection. '''
nfiles=self.get_nfiles(), nnuts=self.get_nnuts(), kinds=self.get_kinds(), codes=self.get_codes(), total_size=self.get_total_size(), counts=self.get_counts(), tmin=tmin, tmax=tmax)
''' Get and possibly load full content for a given index entry from file.
Loads the actual content objects (channel, station, waveform, ...) from file. For efficiency sibling content (all stuff in the same file segment) will also be loaded as a side effect. The loaded contents are cached in the Squirrel object. '''
nut.file_path, segment=nut.file_segment, format=nut.file_format, database=self._database):
except KeyError: raise error.NotAvailable( 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
self._content_caches.keys() if cache is None else [cache]):
self._content_caches.keys() if cache is None else [cache]):
'Multiple entries matching codes %s' % '.'.join(codes.split(separator)))
self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
args = self._get_selection_args(*args, **kwargs) nuts = sorted( self.iter_nuts('response', *args), key=lambda nut: nut.dkey) self.check_duplicates(nuts) return [self.get_content(nut) for nut in nuts]
self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
if isinstance(x, tuple): return tuple(tts(e) for e in x) elif isinstance(x, list): return list(tts(e) for e in x) else: return util.time_to_str(x)
max(tmin, promise.tmin), min(tmax, promise.tmax), promise.deltat):
WaveformOrder( source_id=promise.file_path, codes=tuple(promise.codes.split(separator)), tmin=block_tmin, tmax=block_tmax, deltat=promise.deltat, gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
logger.info( 'Waveform orders already satisified with cached/local data: ' '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
(source_id, i) for (i, source_id) in enumerate(source_ids))
key=lambda order: source_priority[order.source_id])
'Waveform orders standing for download: %i (%i)' % (len(order_groups), len(orders)))
'waveform_promise', order.tmin, order.tmax, codes=order.codes, path=order.source_id)
pass
split_promise(order)
# TODO: parallelize this loop self, by_source_id[source_id], success=success, error_permanent=split_promise, error_temporary=noop)
self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
nuts = self.get_waveform_nuts(*args, **kwargs) # self.check_duplicates(nuts) return [self.get_content(nut, 'waveform') for nut in nuts]
name=cargs[0], azimuth=cargs[1], dip=cargs[2]))
network=sargs[0], station=sargs[1], location=sargs[2], lat=sargs[3], lon=sargs[4], elevation=sargs[5], depth=sargs[6] or 0.0, channels=pchannels))
def pile(self):
self.pile.snuffle()
gather(codes) for codes in self.iter_codes(kind) if selector is None or selector(codes))
def __str__(self): return str(self.get_stats())
self, kind, tmin=None, tmax=None, codes_list=None, limit=None):
''' Get coverage information.
Get information about strips of gapless data coverage.
:param kind: Content kind to be queried. :param tmin: Start time of query interval. :param tmin: End time of query interval. :param codes_list: List of code patterns to query. If not given or empty, an empty list is returned. :param limit: Limit query to return only up to a given maximum number of entries per matching channel (without setting this option, very gappy data could cause the query to execute for a very long time).
:returns: list of entries of the form ``(pattern, codes, deltat, tmin, tmax, data)`` where ``pattern`` is the request pattern which yielded this entry, ``codes`` are the matching channel codes, ``tmin`` and ``tmax`` are the global min and max times for which data for this channel is available, regardless of any time restrictions in the query. ``data`` is another list with (up to ``limit``) checkpoints of the form ``(time, count)`` where a ``count`` of zero indicates a data gap, a value of 1 normal data coverage and higher values indicate duplicate/redundant data. '''
kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
SELECT time_seconds, time_offset FROM %(db)s.%(coverage)s WHERE kind_codes_id == ? ORDER BY kind_codes_id ''' + order + ''', time_seconds ''' + order + ''', time_offset ''' + order + ''' LIMIT 1 ''')
continue
# intentionally < because (== tmin) is queried from nuts 'OR ( ? == time_seconds AND ? < time_offset ) ) '
'OR ( ? == time_seconds AND time_offset <= ? ) ) '
sql_limit = ' LIMIT ?' args.append(limit)
SELECT time_seconds, time_offset, step FROM %(db)s.%(coverage)s WHERE kind_codes_id == ? ''' + sql_time + ''' ORDER BY kind_codes_id, time_seconds, time_offset ''' + sql_limit)
entry[-1] = None else:
''' Dump raw database tables in textual form (for debugging purposes).
:param table_names: Names of tables to be dumped or ``None`` to dump all. :type table_names: :py:class:`list` of :py:class:`str` :param stream: Open file or ``None`` to dump to standard output. '''
stream = sys.stdout
table_names = [table_names]
'selection_file_states', 'selection_nuts', 'selection_kind_codes_count', 'files', 'nuts', 'kind_codes', 'kind_codes_count']
'selection_file_states': '%(db)s.%(file_states)s', 'selection_nuts': '%(db)s.%(nuts)s', 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s', 'files': 'files', 'nuts': 'nuts', 'kind_codes': 'kind_codes', 'kind_codes_count': 'kind_codes_count'}
m[table_name] % self._names, stream=stream)
''' Container to hold statistics about contents cached in meta-information db. '''
help='number of files in database') help='number of index nuts in database') Tuple.T(content_t=String.T()), help='available code sequences in database, e.g. ' '(agency, network, station, location) for stations nuts.') String.T(), help='available content types in database') help='aggregated file size of files referenced in database') String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()), help='breakdown of how many nuts of any content type and code ' 'sequence are available in database, ``counts[kind][codes]``')
def __str__(self): kind_counts = dict( (kind, sum(self.counts[kind].values())) for kind in self.kinds)
codes = ['.'.join(x) for x in self.codes]
if len(codes) > 20: scodes = '\n' + util.ewrap(codes[:10], indent=' ') \ + '\n [%i more]\n' % (len(codes) - 20) \ + util.ewrap(codes[-10:], indent=' ') else: scodes = '\n' + util.ewrap(codes, indent=' ') \ if codes else '<none>'
s = ''' Available codes: %s Number of files: %i Total size of known files: %s Number of index nuts: %i Available content kinds: %s''' % ( scodes, self.nfiles, util.human_bytesize(self.total_size), self.nnuts, ', '.join('%s: %i' % ( kind, kind_counts[kind]) for kind in sorted(self.kinds)))
return s
''' Shared meta-information database used by Squirrel. '''
except sqlite3.OperationalError: raise error.SquirrelError( 'Cannot connect to database: %s' % database_path)
self._conn.set_trace_callback(self._log_statement)
logger.debug(statement)
c.execute( ''' SELECT name FROM sqlite_master WHERE type = 'table' AND name = '{files}' '''))): return
'''PRAGMA recursive_triggers = true''')
''' CREATE TABLE IF NOT EXISTS files ( file_id integer PRIMARY KEY, path text, format text, mtime float, size integer) '''))
''' CREATE UNIQUE INDEX IF NOT EXISTS index_files_path ON files (path) ''')
''' CREATE TABLE IF NOT EXISTS nuts ( nut_id integer PRIMARY KEY AUTOINCREMENT, file_id integer, file_segment integer, file_element integer, kind_id integer, kind_codes_id integer, tmin_seconds integer, tmin_offset integer, tmax_seconds integer, tmax_offset integer, kscale integer) '''))
''' CREATE UNIQUE INDEX IF NOT EXISTS index_nuts_file_element ON nuts (file_id, file_segment, file_element) ''')
''' CREATE TABLE IF NOT EXISTS kind_codes ( kind_codes_id integer PRIMARY KEY, kind_id integer, codes text, deltat float) '''))
''' CREATE UNIQUE INDEX IF NOT EXISTS index_kind_codes ON kind_codes (kind_id, codes, deltat) ''')
''' CREATE TABLE IF NOT EXISTS kind_codes_count ( kind_codes_id integer PRIMARY KEY, count integer) '''))
''' CREATE INDEX IF NOT EXISTS index_nuts_file_id ON nuts (file_id) ''')
''' CREATE TRIGGER IF NOT EXISTS delete_nuts_on_delete_file BEFORE DELETE ON files FOR EACH ROW BEGIN DELETE FROM nuts where file_id == old.file_id; END ''')
# trigger only on size to make silent update of mtime possible ''' CREATE TRIGGER IF NOT EXISTS delete_nuts_on_update_file BEFORE UPDATE OF size ON files FOR EACH ROW BEGIN DELETE FROM nuts where file_id == old.file_id; END ''')
''' CREATE TRIGGER IF NOT EXISTS increment_kind_codes BEFORE INSERT ON nuts FOR EACH ROW BEGIN INSERT OR IGNORE INTO kind_codes_count VALUES (new.kind_codes_id, 0); UPDATE kind_codes_count SET count = count + 1 WHERE new.kind_codes_id == kind_codes_id; END ''')
''' CREATE TRIGGER IF NOT EXISTS decrement_kind_codes BEFORE DELETE ON nuts FOR EACH ROW BEGIN UPDATE kind_codes_count SET count = count - 1 WHERE old.kind_codes_id == kind_codes_id; END ''')
''' Store or update content meta-information.
Given ``nuts`` are assumed to represent an up-to-date and complete inventory of a set of files. Any old information about these files is first pruned from the database (via database triggers). If such content is part of a live selection, it is also removed there. Then the new content meta-information is inserted into the main database. The content is not automatically inserted into the live selections again. It is in the responsibility of the selection object to perform this step. '''
nut.file_path, nut.file_format, nut.file_mtime, nut.file_size))
'INSERT OR IGNORE INTO files VALUES (NULL,?,?,?,?)', files)
'''UPDATE files SET format = ?, mtime = ?, size = ? WHERE path == ? ''', ((x[1], x[2], x[3], x[0]) for x in files))
'INSERT OR IGNORE INTO kind_codes VALUES (NULL,?,?,?)', kind_codes)
''' INSERT INTO nuts VALUES (NULL, ( SELECT file_id FROM files WHERE path == ? ),?,?,?, ( SELECT kind_codes_id FROM kind_codes WHERE kind_id == ? AND codes == ? AND deltat == ? ), ?,?,?,?,?) ''', ((nut.file_path, nut.file_segment, nut.file_element, nut.kind_id, nut.kind_id, nut.codes, nut.deltat or 0.0, nut.tmin_seconds, nut.tmin_offset, nut.tmax_seconds, nut.tmax_offset, nut.kscale) for nut in nuts))
SELECT files.path, files.format, files.mtime, files.size, nuts.file_segment, nuts.file_element, kind_codes.kind_id, kind_codes.codes, nuts.tmin_seconds, nuts.tmin_offset, nuts.tmax_seconds, nuts.tmax_offset, kind_codes.deltat FROM files INNER JOIN nuts ON files.file_id = nuts.file_id INNER JOIN kind_codes ON nuts.kind_codes_id == kind_codes.kind_codes_id WHERE path == ? '''
for row in self._conn.execute(sql, (path,))]
SELECT files.path, files.format, files.mtime, files.size, nuts.file_segment, nuts.file_element, kind_codes.kind_id, kind_codes.codes, nuts.tmin_seconds, nuts.tmin_offset, nuts.tmax_seconds, nuts.tmax_offset, kind_codes.deltat FROM files INNER JOIN nuts ON files.file_id == nuts.file_id INNER JOIN kind_codes ON nuts.kind_codes_id == kind_codes.kind_codes_id '''
return None
''' Prune content meta-inforamation about a given file.
All content pieces belonging to file ``path`` are removed from the main database and any attached live selections (via database triggers). '''
self._conn.execute( 'DELETE FROM files WHERE path = ?', (path,))
''' Prune information associated with a given file, but keep the file path.
This method is called when reading a file failed. File attributes, format, size and modification time are set to NULL. File content meta-information is removed from the database and any attached live selections (via database triggers). '''
''' UPDATE files SET format = NULL, mtime = NULL, size = NULL WHERE path = ? ''', (path,))
''' Update modification time of file without initiating reindexing.
Useful to prolong validity period of data with expiration date. '''
raise FileLoadError( 'Silent update for file "%s" failed: size has changed.' % path)
UPDATE files SET mtime = ? WHERE path = ? '''
sel = 'AND kind_codes.kind_id == ?' args.append(to_kind_id(kind))
SELECT kind_codes.kind_id, kind_codes.codes, kind_codes.deltat, %(kind_codes_count)s.count FROM %(kind_codes_count)s INNER JOIN kind_codes ON %(kind_codes_count)s.kind_codes_id == kind_codes.kind_codes_id WHERE %(kind_codes_count)s.count > 0 ''' + sel + ''' ''') % {'kind_codes_count': kind_codes_count}
to_kind(kind_id), tuple(codes.split(separator)), deltat), count
SELECT DISTINCT kind_codes.deltat FROM %(kind_codes_count)s INNER JOIN kind_codes ON %(kind_codes_count)s.kind_codes_id == kind_codes.kind_codes_id WHERE %(kind_codes_count)s.count > 0 ''' + sel + ''' ORDER BY kind_codes.deltat ''') % {'kind_codes_count': kind_codes_count}
SELECT DISTINCT kind_codes.codes FROM %(kind_codes_count)s INNER JOIN kind_codes ON %(kind_codes_count)s.kind_codes_id == kind_codes.kind_codes_id WHERE %(kind_codes_count)s.count > 0 ''' + sel + ''' ORDER BY kind_codes.codes ''') % {'kind_codes_count': kind_codes_count}
SELECT DISTINCT kind_codes.kind_id FROM %(kind_codes_count)s INNER JOIN kind_codes ON %(kind_codes_count)s.kind_codes_id == kind_codes.kind_codes_id WHERE %(kind_codes_count)s.count > 0 ''' + sel + ''' ORDER BY kind_codes.kind_id ''') % {'kind_codes_count': kind_codes_count}
else:
else:
SELECT SUM(files.size) FROM files '''
nfiles=self.get_nfiles(), nnuts=self.get_nnuts(), kinds=self.get_kinds(), codes=self.get_codes(), counts=self.get_counts(), total_size=self.get_total_size())
def __str__(self): return str(self.get_stats())
'files', 'nuts', 'kind_codes', 'kind_codes_count']:
stream = sys.stdout
def __repr__(self): return self
max((len(repr(x)) if x is not None else 0) for x in col) for col in zip(*tab)]
(repr(x).ljust(wid) if x is not None else ''.ljust(wid, '-')) for (x, wid) in zip(row, widths)))
'Squirrel', 'Selection', 'SquirrelStats', 'Database', 'DatabaseStats', ] |