1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, print_function
8import sys
9import os
11import math
12import logging
13import threading
14import queue
15from collections import defaultdict
17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
18from pyrocko import util, trace
19from pyrocko.progress import progress
21from . import model, io, cache, dataset
23from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
24 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
25 codes_patterns_for_kind
26from .client import fdsn, catalog
27from .selection import Selection, filldocs
28from .database import abspath
29from .operators.base import Operator, CodesPatternFiltering
30from . import client, environment, error
32logger = logging.getLogger('psq.base')
34guts_prefix = 'squirrel'
37def make_task(*args):
38 return progress.task(*args, logger=logger)
41def lpick(condition, seq):
42 ft = [], []
43 for ele in seq:
44 ft[int(bool(condition(ele)))].append(ele)
46 return ft
49def blocks(tmin, tmax, deltat, nsamples_block=100000):
50 tblock = util.to_time_float(deltat * nsamples_block)
51 iblock_min = int(math.floor(tmin / tblock))
52 iblock_max = int(math.ceil(tmax / tblock))
53 for iblock in range(iblock_min, iblock_max):
54 yield iblock * tblock, (iblock+1) * tblock
57def gaps(avail, tmin, tmax):
58 assert tmin < tmax
60 data = [(tmax, 1), (tmin, -1)]
61 for (tmin_a, tmax_a) in avail:
62 assert tmin_a < tmax_a
63 data.append((tmin_a, 1))
64 data.append((tmax_a, -1))
66 data.sort()
67 s = 1
68 gaps = []
69 tmin_g = None
70 for t, x in data:
71 if s == 1 and x == -1:
72 tmin_g = t
73 elif s == 0 and x == 1 and tmin_g is not None:
74 tmax_g = t
75 if tmin_g != tmax_g:
76 gaps.append((tmin_g, tmax_g))
78 s += x
80 return gaps
83def order_key(order):
84 return (order.codes, order.tmin, order.tmax)
87def _is_exact(pat):
88 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
91def prefix_tree(tups):
92 if not tups:
93 return []
95 if len(tups[0]) == 1:
96 return sorted((tup[0], []) for tup in tups)
98 d = defaultdict(list)
99 for tup in tups:
100 d[tup[0]].append(tup[1:])
102 sub = []
103 for k in sorted(d.keys()):
104 sub.append((k, prefix_tree(d[k])))
106 return sub
109class Batch(object):
110 '''
111 Batch of waveforms from window-wise data extraction.
113 Encapsulates state and results yielded for each window in window-wise
114 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
116 *Attributes:*
118 .. py:attribute:: tmin
120 Start of this time window.
122 .. py:attribute:: tmax
124 End of this time window.
126 .. py:attribute:: i
128 Index of this time window in sequence.
130 .. py:attribute:: n
132 Total number of time windows in sequence.
134 .. py:attribute:: igroup
136 Index of this time window's sequence group.
138 .. py:attribute:: ngroups
140 Total number of sequence groups.
142 .. py:attribute:: traces
144 Extracted waveforms for this time window.
145 '''
147 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces):
148 self.tmin = tmin
149 self.tmax = tmax
150 self.i = i
151 self.n = n
152 self.igroup = igroup
153 self.ngroups = ngroups
154 self.traces = traces
157class Squirrel(Selection):
158 '''
159 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
161 :param env:
162 Squirrel environment instance or directory path to use as starting
163 point for its detection. By default, the current directory is used as
164 starting point. When searching for a usable environment the directory
165 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
166 directory is used if it exists, otherwise the parent directories are
167 search upwards for the existence of such a directory. If no such
168 directory is found, the user's global Squirrel environment
169 ``'$HOME/.pyrocko/squirrel'`` is used.
170 :type env:
171 :py:class:`~pyrocko.squirrel.environment.Environment` or
172 :py:class:`str`
174 :param database:
175 Database instance or path to database. By default the
176 database found in the detected Squirrel environment is used.
177 :type database:
178 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
180 :param cache_path:
181 Directory path to use for data caching. By default, the ``'cache'``
182 directory in the detected Squirrel environment is used.
183 :type cache_path:
184 :py:class:`str`
186 :param persistent:
187 If given a name, create a persistent selection.
188 :type persistent:
189 :py:class:`str`
191 This is the central class of the Squirrel framework. It provides a unified
192 interface to query and access seismic waveforms, station meta-data and
193 event information from local file collections and remote data sources. For
194 prompt responses, a profound database setup is used under the hood. To
195 speed up assemblage of ad-hoc data selections, files are indexed on first
196 use and the extracted meta-data is remembered in the database for
197 subsequent accesses. Bulk data is lazily loaded from disk and remote
198 sources, just when requested. Once loaded, data is cached in memory to
199 expedite typical access patterns. Files and data sources can be dynamically
200 added to and removed from the Squirrel selection at runtime.
202 Queries are restricted to the contents of the files currently added to the
203 Squirrel selection (usually a subset of the file meta-information
204 collection in the database). This list of files is referred to here as the
205 "selection". By default, temporary tables are created in the attached
206 database to hold the names of the files in the selection as well as various
207 indices and counters. These tables are only visible inside the application
208 which created them and are deleted when the database connection is closed
209 or the application exits. To create a selection which is not deleted at
210 exit, supply a name to the ``persistent`` argument of the Squirrel
211 constructor. Persistent selections are shared among applications using the
212 same database.
214 **Method summary**
216 Some of the methods are implemented in :py:class:`Squirrel`'s base class
217 :py:class:`~pyrocko.squirrel.selection.Selection`.
219 .. autosummary::
221 ~Squirrel.add
222 ~Squirrel.add_source
223 ~Squirrel.add_fdsn
224 ~Squirrel.add_catalog
225 ~Squirrel.add_dataset
226 ~Squirrel.add_virtual
227 ~Squirrel.update
228 ~Squirrel.update_waveform_promises
229 ~Squirrel.advance_accessor
230 ~Squirrel.clear_accessor
231 ~Squirrel.reload
232 ~pyrocko.squirrel.selection.Selection.iter_paths
233 ~Squirrel.iter_nuts
234 ~Squirrel.iter_kinds
235 ~Squirrel.iter_deltats
236 ~Squirrel.iter_codes
237 ~pyrocko.squirrel.selection.Selection.get_paths
238 ~Squirrel.get_nuts
239 ~Squirrel.get_kinds
240 ~Squirrel.get_deltats
241 ~Squirrel.get_codes
242 ~Squirrel.get_counts
243 ~Squirrel.get_time_span
244 ~Squirrel.get_deltat_span
245 ~Squirrel.get_nfiles
246 ~Squirrel.get_nnuts
247 ~Squirrel.get_total_size
248 ~Squirrel.get_stats
249 ~Squirrel.get_content
250 ~Squirrel.get_stations
251 ~Squirrel.get_channels
252 ~Squirrel.get_responses
253 ~Squirrel.get_events
254 ~Squirrel.get_waveform_nuts
255 ~Squirrel.get_waveforms
256 ~Squirrel.chopper_waveforms
257 ~Squirrel.get_coverage
258 ~Squirrel.pile
259 ~Squirrel.snuffle
260 ~Squirrel.glob_codes
261 ~pyrocko.squirrel.selection.Selection.get_database
262 ~Squirrel.print_tables
263 '''
265 def __init__(
266 self, env=None, database=None, cache_path=None, persistent=None):
268 if not isinstance(env, environment.Environment):
269 env = environment.get_environment(env)
271 if database is None:
272 database = env.expand_path(env.database_path)
274 if cache_path is None:
275 cache_path = env.expand_path(env.cache_path)
277 if persistent is None:
278 persistent = env.persistent
280 Selection.__init__(
281 self, database=database, persistent=persistent)
283 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
285 self._content_caches = {
286 'waveform': cache.ContentCache(),
287 'default': cache.ContentCache()}
289 self._cache_path = cache_path
291 self._sources = []
292 self._operators = []
293 self._operator_registry = {}
295 self._pile = None
296 self._n_choppers_active = 0
298 self._names.update({
299 'nuts': self.name + '_nuts',
300 'kind_codes_count': self.name + '_kind_codes_count',
301 'coverage': self.name + '_coverage'})
303 with self.transaction('create tables') as cursor:
304 self._create_tables_squirrel(cursor)
306 def _create_tables_squirrel(self, cursor):
308 cursor.execute(self._register_table(self._sql(
309 '''
310 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
311 nut_id integer PRIMARY KEY,
312 file_id integer,
313 file_segment integer,
314 file_element integer,
315 kind_id integer,
316 kind_codes_id integer,
317 tmin_seconds integer,
318 tmin_offset integer,
319 tmax_seconds integer,
320 tmax_offset integer,
321 kscale integer)
322 ''')))
324 cursor.execute(self._register_table(self._sql(
325 '''
326 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
327 kind_codes_id integer PRIMARY KEY,
328 count integer)
329 ''')))
331 cursor.execute(self._sql(
332 '''
333 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
334 ON %(nuts)s (file_id, file_segment, file_element)
335 '''))
337 cursor.execute(self._sql(
338 '''
339 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
340 ON %(nuts)s (file_id)
341 '''))
343 cursor.execute(self._sql(
344 '''
345 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
346 ON %(nuts)s (kind_id, tmin_seconds)
347 '''))
349 cursor.execute(self._sql(
350 '''
351 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
352 ON %(nuts)s (kind_id, tmax_seconds)
353 '''))
355 cursor.execute(self._sql(
356 '''
357 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
358 ON %(nuts)s (kind_id, kscale, tmin_seconds)
359 '''))
361 cursor.execute(self._sql(
362 '''
363 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
364 BEFORE DELETE ON main.files FOR EACH ROW
365 BEGIN
366 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
367 END
368 '''))
370 # trigger only on size to make silent update of mtime possible
371 cursor.execute(self._sql(
372 '''
373 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
374 BEFORE UPDATE OF size ON main.files FOR EACH ROW
375 BEGIN
376 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
377 END
378 '''))
380 cursor.execute(self._sql(
381 '''
382 CREATE TRIGGER IF NOT EXISTS
383 %(db)s.%(file_states)s_delete_files
384 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
385 BEGIN
386 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
387 END
388 '''))
390 cursor.execute(self._sql(
391 '''
392 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
393 BEFORE INSERT ON %(nuts)s FOR EACH ROW
394 BEGIN
395 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
396 (new.kind_codes_id, 0);
397 UPDATE %(kind_codes_count)s
398 SET count = count + 1
399 WHERE new.kind_codes_id
400 == %(kind_codes_count)s.kind_codes_id;
401 END
402 '''))
404 cursor.execute(self._sql(
405 '''
406 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
407 BEFORE DELETE ON %(nuts)s FOR EACH ROW
408 BEGIN
409 UPDATE %(kind_codes_count)s
410 SET count = count - 1
411 WHERE old.kind_codes_id
412 == %(kind_codes_count)s.kind_codes_id;
413 END
414 '''))
416 cursor.execute(self._register_table(self._sql(
417 '''
418 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
419 kind_codes_id integer,
420 time_seconds integer,
421 time_offset integer,
422 step integer)
423 ''')))
425 cursor.execute(self._sql(
426 '''
427 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
428 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
429 '''))
431 cursor.execute(self._sql(
432 '''
433 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
434 AFTER INSERT ON %(nuts)s FOR EACH ROW
435 BEGIN
436 INSERT OR IGNORE INTO %(coverage)s VALUES
437 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
438 ;
439 UPDATE %(coverage)s
440 SET step = step + 1
441 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
442 AND new.tmin_seconds == %(coverage)s.time_seconds
443 AND new.tmin_offset == %(coverage)s.time_offset
444 ;
445 INSERT OR IGNORE INTO %(coverage)s VALUES
446 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
447 ;
448 UPDATE %(coverage)s
449 SET step = step - 1
450 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
451 AND new.tmax_seconds == %(coverage)s.time_seconds
452 AND new.tmax_offset == %(coverage)s.time_offset
453 ;
454 DELETE FROM %(coverage)s
455 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
456 AND new.tmin_seconds == %(coverage)s.time_seconds
457 AND new.tmin_offset == %(coverage)s.time_offset
458 AND step == 0
459 ;
460 DELETE FROM %(coverage)s
461 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
462 AND new.tmax_seconds == %(coverage)s.time_seconds
463 AND new.tmax_offset == %(coverage)s.time_offset
464 AND step == 0
465 ;
466 END
467 '''))
469 cursor.execute(self._sql(
470 '''
471 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
472 BEFORE DELETE ON %(nuts)s FOR EACH ROW
473 BEGIN
474 INSERT OR IGNORE INTO %(coverage)s VALUES
475 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
476 ;
477 UPDATE %(coverage)s
478 SET step = step - 1
479 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
480 AND old.tmin_seconds == %(coverage)s.time_seconds
481 AND old.tmin_offset == %(coverage)s.time_offset
482 ;
483 INSERT OR IGNORE INTO %(coverage)s VALUES
484 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
485 ;
486 UPDATE %(coverage)s
487 SET step = step + 1
488 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
489 AND old.tmax_seconds == %(coverage)s.time_seconds
490 AND old.tmax_offset == %(coverage)s.time_offset
491 ;
492 DELETE FROM %(coverage)s
493 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
494 AND old.tmin_seconds == %(coverage)s.time_seconds
495 AND old.tmin_offset == %(coverage)s.time_offset
496 AND step == 0
497 ;
498 DELETE FROM %(coverage)s
499 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
500 AND old.tmax_seconds == %(coverage)s.time_seconds
501 AND old.tmax_offset == %(coverage)s.time_offset
502 AND step == 0
503 ;
504 END
505 '''))
507 def _delete(self):
508 '''Delete database tables associated with this Squirrel.'''
510 with self.transaction('delete tables') as cursor:
511 for s in '''
512 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
513 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
514 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
515 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
516 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
517 DROP TABLE %(db)s.%(nuts)s;
518 DROP TABLE %(db)s.%(kind_codes_count)s;
519 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
520 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
521 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
522 '''.strip().splitlines():
524 cursor.execute(self._sql(s))
526 Selection._delete(self)
528 @filldocs
529 def add(self,
530 paths,
531 kinds=None,
532 format='detect',
533 include=None,
534 exclude=None,
535 check=True):
537 '''
538 Add files to the selection.
540 :param paths:
541 Iterator yielding paths to files or directories to be added to the
542 selection. Recurses into directories. If given a ``str``, it
543 is treated as a single path to be added.
544 :type paths:
545 :py:class:`list` of :py:class:`str`
547 :param kinds:
548 Content types to be made available through the Squirrel selection.
549 By default, all known content types are accepted.
550 :type kinds:
551 :py:class:`list` of :py:class:`str`
553 :param format:
554 File format identifier or ``'detect'`` to enable auto-detection
555 (available: %(file_formats)s).
556 :type format:
557 str
559 :param include:
560 If not ``None``, files are only included if their paths match the
561 given regular expression pattern.
562 :type format:
563 str
565 :param exclude:
566 If not ``None``, files are only included if their paths do not
567 match the given regular expression pattern.
568 :type format:
569 str
571 :param check:
572 If ``True``, all file modification times are checked to see if
573 cached information has to be updated (slow). If ``False``, only
574 previously unknown files are indexed and cached information is used
575 for known files, regardless of file state (fast, corrresponds to
576 Squirrel's ``--optimistic`` mode). File deletions will go
577 undetected in the latter case.
578 :type check:
579 bool
581 :Complexity:
582 O(log N)
583 '''
585 if isinstance(kinds, str):
586 kinds = (kinds,)
588 if isinstance(paths, str):
589 paths = [paths]
591 kind_mask = model.to_kind_mask(kinds)
593 with progress.view():
594 Selection.add(
595 self, util.iter_select_files(
596 paths,
597 show_progress=False,
598 include=include,
599 exclude=exclude,
600 pass_through=lambda path: path.startswith('virtual:')
601 ), kind_mask, format)
603 self._load(check)
604 self._update_nuts()
606 def reload(self):
607 '''
608 Check for modifications and reindex modified files.
610 Based on file modification times.
611 '''
613 self._set_file_states_force_check()
614 self._load(check=True)
615 self._update_nuts()
617 def add_virtual(self, nuts, virtual_paths=None):
618 '''
619 Add content which is not backed by files.
621 :param nuts:
622 Content pieces to be added.
623 :type nuts:
624 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
626 :param virtual_paths:
627 List of virtual paths to prevent creating a temporary list of the
628 nuts while aggregating the file paths for the selection.
629 :type virtual_paths:
630 :py:class:`list` of :py:class:`str`
632 Stores to the main database and the selection.
633 '''
635 if isinstance(virtual_paths, str):
636 virtual_paths = [virtual_paths]
638 if virtual_paths is None:
639 if not isinstance(nuts, list):
640 nuts = list(nuts)
641 virtual_paths = set(nut.file_path for nut in nuts)
643 Selection.add(self, virtual_paths)
644 self.get_database().dig(nuts)
645 self._update_nuts()
647 def add_volatile(self, nuts):
648 if not isinstance(nuts, list):
649 nuts = list(nuts)
651 paths = list(set(nut.file_path for nut in nuts))
652 io.backends.virtual.add_nuts(nuts)
653 self.add_virtual(nuts, paths)
654 self._volatile_paths.extend(paths)
656 def add_volatile_waveforms(self, traces):
657 '''
658 Add in-memory waveforms which will be removed when the app closes.
659 '''
661 name = model.random_name()
663 path = 'virtual:volatile:%s' % name
665 nuts = []
666 for itr, tr in enumerate(traces):
667 assert tr.tmin <= tr.tmax
668 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
669 tmax_seconds, tmax_offset = model.tsplit(
670 tr.tmin + tr.data_len()*tr.deltat)
672 nuts.append(model.Nut(
673 file_path=path,
674 file_format='virtual',
675 file_segment=itr,
676 file_element=0,
677 file_mtime=0,
678 codes=tr.codes,
679 tmin_seconds=tmin_seconds,
680 tmin_offset=tmin_offset,
681 tmax_seconds=tmax_seconds,
682 tmax_offset=tmax_offset,
683 deltat=tr.deltat,
684 kind_id=to_kind_id('waveform'),
685 content=tr))
687 self.add_volatile(nuts)
688 return path
690 def _load(self, check):
691 for _ in io.iload(
692 self,
693 content=[],
694 skip_unchanged=True,
695 check=check):
696 pass
698 def _update_nuts(self, transaction=None):
699 transaction = transaction or self.transaction('update nuts')
700 with make_task('Aggregating selection') as task, \
701 transaction as cursor:
703 self._conn.set_progress_handler(task.update, 100000)
704 nrows = cursor.execute(self._sql(
705 '''
706 INSERT INTO %(db)s.%(nuts)s
707 SELECT NULL,
708 nuts.file_id, nuts.file_segment, nuts.file_element,
709 nuts.kind_id, nuts.kind_codes_id,
710 nuts.tmin_seconds, nuts.tmin_offset,
711 nuts.tmax_seconds, nuts.tmax_offset,
712 nuts.kscale
713 FROM %(db)s.%(file_states)s
714 INNER JOIN nuts
715 ON %(db)s.%(file_states)s.file_id == nuts.file_id
716 INNER JOIN kind_codes
717 ON nuts.kind_codes_id ==
718 kind_codes.kind_codes_id
719 WHERE %(db)s.%(file_states)s.file_state != 2
720 AND (((1 << kind_codes.kind_id)
721 & %(db)s.%(file_states)s.kind_mask) != 0)
722 ''')).rowcount
724 task.update(nrows)
725 self._set_file_states_known(transaction)
726 self._conn.set_progress_handler(None, 0)
728 def add_source(self, source, check=True):
729 '''
730 Add remote resource.
732 :param source:
733 Remote data access client instance.
734 :type source:
735 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
736 '''
738 self._sources.append(source)
739 source.setup(self, check=check)
741 def add_fdsn(self, *args, **kwargs):
742 '''
743 Add FDSN site for transparent remote data access.
745 Arguments are passed to
746 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
747 '''
749 self.add_source(fdsn.FDSNSource(*args, **kwargs))
751 def add_catalog(self, *args, **kwargs):
752 '''
753 Add online catalog for transparent event data access.
755 Arguments are passed to
756 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
757 '''
759 self.add_source(catalog.CatalogSource(*args, **kwargs))
761 def add_dataset(self, ds, check=True):
762 '''
763 Read dataset description from file and add its contents.
765 :param ds:
766 Path to dataset description file or dataset description object
767 . See :py:mod:`~pyrocko.squirrel.dataset`.
768 :type ds:
769 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
771 :param check:
772 If ``True``, all file modification times are checked to see if
773 cached information has to be updated (slow). If ``False``, only
774 previously unknown files are indexed and cached information is used
775 for known files, regardless of file state (fast, corrresponds to
776 Squirrel's ``--optimistic`` mode). File deletions will go
777 undetected in the latter case.
778 :type check:
779 bool
780 '''
781 if isinstance(ds, str):
782 ds = dataset.read_dataset(ds)
784 ds.setup(self, check=check)
786 def _get_selection_args(
787 self, kind_id,
788 obj=None, tmin=None, tmax=None, time=None, codes=None):
790 if codes is not None:
791 codes = codes_patterns_for_kind(kind_id, codes)
793 if time is not None:
794 tmin = time
795 tmax = time
797 if obj is not None:
798 tmin = tmin if tmin is not None else obj.tmin
799 tmax = tmax if tmax is not None else obj.tmax
800 codes = codes if codes is not None else codes_patterns_for_kind(
801 kind_id, obj.codes)
803 return tmin, tmax, codes
805 def _get_selection_args_str(self, *args, **kwargs):
807 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
808 return 'tmin: %s, tmax: %s, codes: %s' % (
809 util.time_to_str(tmin) if tmin is not None else 'none',
810 util.time_to_str(tmax) if tmax is not None else 'none',
811 ','.join(str(entry) for entry in codes))
813 def _selection_args_to_kwargs(
814 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
816 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
818 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
820 tmin_seconds, tmin_offset = model.tsplit(tmin)
821 tmax_seconds, tmax_offset = model.tsplit(tmax)
822 if naiv:
823 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
824 args.append(tmax_seconds)
825 else:
826 tscale_edges = model.tscale_edges
827 tmin_cond = []
828 for kscale in range(tscale_edges.size + 1):
829 if kscale != tscale_edges.size:
830 tscale = int(tscale_edges[kscale])
831 tmin_cond.append('''
832 (%(db)s.%(nuts)s.kind_id = ?
833 AND %(db)s.%(nuts)s.kscale == ?
834 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
835 ''')
836 args.extend(
837 (to_kind_id(kind), kscale,
838 tmin_seconds - tscale - 1, tmax_seconds + 1))
840 else:
841 tmin_cond.append('''
842 (%(db)s.%(nuts)s.kind_id == ?
843 AND %(db)s.%(nuts)s.kscale == ?
844 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
845 ''')
847 args.extend(
848 (to_kind_id(kind), kscale, tmax_seconds + 1))
849 if tmin_cond:
850 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
852 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
853 args.append(tmin_seconds)
855 def _codes_match_sql(self, kind_id, codes, cond, args):
856 pats = codes_patterns_for_kind(kind_id, codes)
857 if pats is None:
858 return
860 pats_exact = []
861 pats_nonexact = []
862 for pat in pats:
863 spat = pat.safe_str
864 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
866 cond_exact = None
867 if pats_exact:
868 cond_exact = ' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
869 '?'*len(pats_exact))
871 args.extend(pats_exact)
873 cond_nonexact = None
874 if pats_nonexact:
875 cond_nonexact = ' ( %s ) ' % ' OR '.join(
876 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))
878 args.extend(pats_nonexact)
880 if cond_exact and cond_nonexact:
881 cond.append(' ( %s OR %s ) ' % (cond_exact, cond_nonexact))
883 elif cond_exact:
884 cond.append(cond_exact)
886 elif cond_nonexact:
887 cond.append(cond_nonexact)
889 def iter_nuts(
890 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False,
891 kind_codes_ids=None, path=None):
893 '''
894 Iterate over content entities matching given constraints.
896 :param kind:
897 Content kind (or kinds) to extract.
898 :type kind:
899 :py:class:`str`, :py:class:`list` of :py:class:`str`
901 :param tmin:
902 Start time of query interval.
903 :type tmin:
904 timestamp
906 :param tmax:
907 End time of query interval.
908 :type tmax:
909 timestamp
911 :param codes:
912 List of code patterns to query.
913 :type codes:
914 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
915 objects appropriate for the queried content type, or anything which
916 can be converted to such objects.
918 :param naiv:
919 Bypass time span lookup through indices (slow, for testing).
920 :type naiv:
921 :py:class:`bool`
923 :param kind_codes_ids:
924 Kind-codes IDs of contents to be retrieved (internal use).
925 :type kind_codes_ids:
926 :py:class:`list` of :py:class:`int`
928 :yields:
929 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
930 intersecting content.
932 :complexity:
933 O(log N) for the time selection part due to heavy use of database
934 indices.
936 Query time span is treated as a half-open interval ``[tmin, tmax)``.
937 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
938 closed-interval so that content intersecting with the time instant ``t
939 = tmin = tmax`` is returned (otherwise nothing would be returned as
940 ``[t, t)`` never matches anything).
942 Time spans of content entities to be matched are also treated as half
943 open intervals, e.g. content span ``[0, 1)`` is matched by query span
944 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
945 modified to closed-interval when the content time span is an empty
946 interval, i.e. to indicate a time instant. E.g. time instant 0 is
947 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
948 '''
950 if not isinstance(kind, str):
951 if kind is None:
952 kind = model.g_content_kinds
953 for kind_ in kind:
954 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
955 yield nut
957 return
959 kind_id = to_kind_id(kind)
961 cond = []
962 args = []
963 if tmin is not None or tmax is not None:
964 assert kind is not None
965 if tmin is None:
966 tmin = self.get_time_span()[0]
967 if tmax is None:
968 tmax = self.get_time_span()[1] + 1.0
970 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
972 cond.append('kind_codes.kind_id == ?')
973 args.append(kind_id)
975 if codes is not None:
976 self._codes_match_sql(kind_id, codes, cond, args)
978 if kind_codes_ids is not None:
979 cond.append(
980 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
981 '?'*len(kind_codes_ids)))
983 args.extend(kind_codes_ids)
985 db = self.get_database()
986 if path is not None:
987 cond.append('files.path == ?')
988 args.append(db.relpath(abspath(path)))
990 sql = ('''
991 SELECT
992 files.path,
993 files.format,
994 files.mtime,
995 files.size,
996 %(db)s.%(nuts)s.file_segment,
997 %(db)s.%(nuts)s.file_element,
998 kind_codes.kind_id,
999 kind_codes.codes,
1000 %(db)s.%(nuts)s.tmin_seconds,
1001 %(db)s.%(nuts)s.tmin_offset,
1002 %(db)s.%(nuts)s.tmax_seconds,
1003 %(db)s.%(nuts)s.tmax_offset,
1004 kind_codes.deltat
1005 FROM files
1006 INNER JOIN %(db)s.%(nuts)s
1007 ON files.file_id == %(db)s.%(nuts)s.file_id
1008 INNER JOIN kind_codes
1009 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1010 ''')
1012 if cond:
1013 sql += ''' WHERE ''' + ' AND '.join(cond)
1015 sql = self._sql(sql)
1016 if tmin is None and tmax is None:
1017 for row in self._conn.execute(sql, args):
1018 row = (db.abspath(row[0]),) + row[1:]
1019 nut = model.Nut(values_nocheck=row)
1020 yield nut
1021 else:
1022 assert tmin is not None and tmax is not None
1023 if tmin == tmax:
1024 for row in self._conn.execute(sql, args):
1025 row = (db.abspath(row[0]),) + row[1:]
1026 nut = model.Nut(values_nocheck=row)
1027 if (nut.tmin <= tmin < nut.tmax) \
1028 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1030 yield nut
1031 else:
1032 for row in self._conn.execute(sql, args):
1033 row = (db.abspath(row[0]),) + row[1:]
1034 nut = model.Nut(values_nocheck=row)
1035 if (tmin < nut.tmax and nut.tmin < tmax) \
1036 or (nut.tmin == nut.tmax
1037 and tmin <= nut.tmin < tmax):
1039 yield nut
1041 def get_nuts(self, *args, **kwargs):
1042 '''
1043 Get content entities matching given constraints.
1045 Like :py:meth:`iter_nuts` but returns results as a list.
1046 '''
1048 return list(self.iter_nuts(*args, **kwargs))
1050 def _split_nuts(
1051 self, kind, tmin=None, tmax=None, codes=None, path=None):
1053 kind_id = to_kind_id(kind)
1054 tmin_seconds, tmin_offset = model.tsplit(tmin)
1055 tmax_seconds, tmax_offset = model.tsplit(tmax)
1057 names_main_nuts = dict(self._names)
1058 names_main_nuts.update(db='main', nuts='nuts')
1060 db = self.get_database()
1062 def main_nuts(s):
1063 return s % names_main_nuts
1065 with self.transaction('split nuts') as cursor:
1066 # modify selection and main
1067 for sql_subst in [
1068 self._sql, main_nuts]:
1070 cond = []
1071 args = []
1073 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1075 if codes is not None:
1076 self._codes_match_sql(kind_id, codes, cond, args)
1078 if path is not None:
1079 cond.append('files.path == ?')
1080 args.append(db.relpath(abspath(path)))
1082 sql = sql_subst('''
1083 SELECT
1084 %(db)s.%(nuts)s.nut_id,
1085 %(db)s.%(nuts)s.tmin_seconds,
1086 %(db)s.%(nuts)s.tmin_offset,
1087 %(db)s.%(nuts)s.tmax_seconds,
1088 %(db)s.%(nuts)s.tmax_offset,
1089 kind_codes.deltat
1090 FROM files
1091 INNER JOIN %(db)s.%(nuts)s
1092 ON files.file_id == %(db)s.%(nuts)s.file_id
1093 INNER JOIN kind_codes
1094 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1095 WHERE ''' + ' AND '.join(cond)) # noqa
1097 insert = []
1098 delete = []
1099 for row in cursor.execute(sql, args):
1100 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1101 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1103 nut_tmin = model.tjoin(
1104 nut_tmin_seconds, nut_tmin_offset)
1105 nut_tmax = model.tjoin(
1106 nut_tmax_seconds, nut_tmax_offset)
1108 if nut_tmin < tmax and tmin < nut_tmax:
1109 if nut_tmin < tmin:
1110 insert.append((
1111 nut_tmin_seconds, nut_tmin_offset,
1112 tmin_seconds, tmin_offset,
1113 model.tscale_to_kscale(
1114 tmin_seconds - nut_tmin_seconds),
1115 nut_id))
1117 if tmax < nut_tmax:
1118 insert.append((
1119 tmax_seconds, tmax_offset,
1120 nut_tmax_seconds, nut_tmax_offset,
1121 model.tscale_to_kscale(
1122 nut_tmax_seconds - tmax_seconds),
1123 nut_id))
1125 delete.append((nut_id,))
1127 sql_add = '''
1128 INSERT INTO %(db)s.%(nuts)s (
1129 file_id, file_segment, file_element, kind_id,
1130 kind_codes_id, tmin_seconds, tmin_offset,
1131 tmax_seconds, tmax_offset, kscale )
1132 SELECT
1133 file_id, file_segment, file_element,
1134 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1135 FROM %(db)s.%(nuts)s
1136 WHERE nut_id == ?
1137 '''
1138 cursor.executemany(sql_subst(sql_add), insert)
1140 sql_delete = '''
1141 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1142 '''
1143 cursor.executemany(sql_subst(sql_delete), delete)
1145 def get_time_span(self, kinds=None):
1146 '''
1147 Get time interval over all content in selection.
1149 :param kinds:
1150 If not ``None``, restrict query to given content kinds.
1151 :type kind:
1152 list of str
1154 :complexity:
1155 O(1), independent of the number of nuts.
1157 :returns:
1158 ``(tmin, tmax)``, combined time interval of queried content kinds.
1159 '''
1161 sql_min = self._sql('''
1162 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1163 FROM %(db)s.%(nuts)s
1164 WHERE kind_id == ?
1165 AND tmin_seconds == (
1166 SELECT MIN(tmin_seconds)
1167 FROM %(db)s.%(nuts)s
1168 WHERE kind_id == ?)
1169 ''')
1171 sql_max = self._sql('''
1172 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1173 FROM %(db)s.%(nuts)s
1174 WHERE kind_id == ?
1175 AND tmax_seconds == (
1176 SELECT MAX(tmax_seconds)
1177 FROM %(db)s.%(nuts)s
1178 WHERE kind_id == ?)
1179 ''')
1181 gtmin = None
1182 gtmax = None
1184 if isinstance(kinds, str):
1185 kinds = [kinds]
1187 if kinds is None:
1188 kind_ids = model.g_content_kind_ids
1189 else:
1190 kind_ids = model.to_kind_ids(kinds)
1192 for kind_id in kind_ids:
1193 for tmin_seconds, tmin_offset in self._conn.execute(
1194 sql_min, (kind_id, kind_id)):
1195 tmin = model.tjoin(tmin_seconds, tmin_offset)
1196 if tmin is not None and (gtmin is None or tmin < gtmin):
1197 gtmin = tmin
1199 for (tmax_seconds, tmax_offset) in self._conn.execute(
1200 sql_max, (kind_id, kind_id)):
1201 tmax = model.tjoin(tmax_seconds, tmax_offset)
1202 if tmax is not None and (gtmax is None or tmax > gtmax):
1203 gtmax = tmax
1205 return gtmin, gtmax
1207 def has(self, kinds):
1208 '''
1209 Check availability of given content kinds.
1211 :param kinds:
1212 Content kinds to query.
1213 :type kind:
1214 list of str
1216 :returns:
1217 ``True`` if any of the queried content kinds is available
1218 in the selection.
1219 '''
1220 self_tmin, self_tmax = self.get_time_span(kinds)
1222 return None not in (self_tmin, self_tmax)
1224 def get_deltat_span(self, kind):
1225 '''
1226 Get min and max sampling interval of all content of given kind.
1228 :param kind:
1229 Content kind
1230 :type kind:
1231 str
1233 :returns: ``(deltat_min, deltat_max)``
1234 '''
1236 deltats = [
1237 deltat for deltat in self.get_deltats(kind)
1238 if deltat is not None]
1240 if deltats:
1241 return min(deltats), max(deltats)
1242 else:
1243 return None, None
1245 def iter_kinds(self, codes=None):
1246 '''
1247 Iterate over content types available in selection.
1249 :param codes:
1250 If given, get kinds only for selected codes identifier.
1251 Only a single identifier may be given here and no pattern matching
1252 is done, currently.
1253 :type codes:
1254 :py:class:`~pyrocko.squirrel.model.Codes`
1256 :yields:
1257 Available content kinds as :py:class:`str`.
1259 :complexity:
1260 O(1), independent of number of nuts.
1261 '''
1263 return self._database._iter_kinds(
1264 codes=codes,
1265 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1267 def iter_deltats(self, kind=None):
1268 '''
1269 Iterate over sampling intervals available in selection.
1271 :param kind:
1272 If given, get sampling intervals only for a given content type.
1273 :type kind:
1274 str
1276 :yields:
1277 :py:class:`float` values.
1279 :complexity:
1280 O(1), independent of number of nuts.
1281 '''
1282 return self._database._iter_deltats(
1283 kind=kind,
1284 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1286 def iter_codes(self, kind=None):
1287 '''
1288 Iterate over content identifier code sequences available in selection.
1290 :param kind:
1291 If given, get codes only for a given content type.
1292 :type kind:
1293 str
1295 :yields:
1296 :py:class:`tuple` of :py:class:`str`
1298 :complexity:
1299 O(1), independent of number of nuts.
1300 '''
1301 return self._database._iter_codes(
1302 kind=kind,
1303 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1305 def _iter_codes_info(self, kind=None, codes=None):
1306 '''
1307 Iterate over number of occurrences of any (kind, codes) combination.
1309 :param kind:
1310 If given, get counts only for selected content type.
1311 :type kind:
1312 str
1314 :yields:
1315 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1317 :complexity:
1318 O(1), independent of number of nuts.
1319 '''
1320 return self._database._iter_codes_info(
1321 kind=kind,
1322 codes=codes,
1323 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1325 def get_kinds(self, codes=None):
1326 '''
1327 Get content types available in selection.
1329 :param codes:
1330 If given, get kinds only for selected codes identifier.
1331 Only a single identifier may be given here and no pattern matching
1332 is done, currently.
1333 :type codes:
1334 :py:class:`~pyrocko.squirrel.model.Codes`
1336 :returns:
1337 Sorted list of available content types.
1338 :rtype:
1339 py:class:`list` of :py:class:`str`
1341 :complexity:
1342 O(1), independent of number of nuts.
1344 '''
1345 return sorted(list(self.iter_kinds(codes=codes)))
1347 def get_deltats(self, kind=None):
1348 '''
1349 Get sampling intervals available in selection.
1351 :param kind:
1352 If given, get sampling intervals only for selected content type.
1353 :type kind:
1354 str
1356 :complexity:
1357 O(1), independent of number of nuts.
1359 :returns: Sorted list of available sampling intervals.
1360 '''
1361 return sorted(list(self.iter_deltats(kind=kind)))
1363 def get_codes(self, kind=None):
1364 '''
1365 Get identifier code sequences available in selection.
1367 :param kind:
1368 If given, get codes only for selected content type.
1369 :type kind:
1370 str
1372 :complexity:
1373 O(1), independent of number of nuts.
1375 :returns: Sorted list of available codes as tuples of strings.
1376 '''
1377 return sorted(list(self.iter_codes(kind=kind)))
1379 def get_counts(self, kind=None):
1380 '''
1381 Get number of occurrences of any (kind, codes) combination.
1383 :param kind:
1384 If given, get codes only for selected content type.
1385 :type kind:
1386 str
1388 :complexity:
1389 O(1), independent of number of nuts.
1391 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1392 if kind is not ``None``
1393 '''
1394 d = {}
1395 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1396 if kind_id not in d:
1397 v = d[kind_id] = {}
1398 else:
1399 v = d[kind_id]
1401 if codes not in v:
1402 v[codes] = 0
1404 v[codes] += count
1406 if kind is not None:
1407 return d[to_kind_id(kind)]
1408 else:
1409 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1411 def glob_codes(self, kind, codes):
1412 '''
1413 Find codes matching given patterns.
1415 :param kind:
1416 Content kind to be queried.
1417 :type kind:
1418 str
1420 :param codes:
1421 List of code patterns to query.
1422 :type codes:
1423 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1424 objects appropriate for the queried content type, or anything which
1425 can be converted to such objects.
1427 :returns:
1428 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1429 '''
1431 kind_id = to_kind_id(kind)
1432 args = [kind_id]
1433 pats = codes_patterns_for_kind(kind_id, codes)
1435 if pats:
1436 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1437 ('kind_codes.codes GLOB ?',) * len(pats))
1439 args.extend(pat.safe_str for pat in pats)
1440 else:
1441 codes_cond = ''
1443 sql = self._sql('''
1444 SELECT kind_codes_id, codes, deltat FROM kind_codes
1445 WHERE
1446 kind_id == ? ''' + codes_cond)
1448 return list(map(list, self._conn.execute(sql, args)))
1450 def update(self, constraint=None, **kwargs):
1451 '''
1452 Update or partially update channel and event inventories.
1454 :param constraint:
1455 Selection of times or areas to be brought up to date.
1456 :type constraint:
1457 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1459 :param \\*\\*kwargs:
1460 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1462 This function triggers all attached remote sources, to check for
1463 updates in the meta-data. The sources will only submit queries when
1464 their expiration date has passed, or if the selection spans into
1465 previously unseen times or areas.
1466 '''
1468 if constraint is None:
1469 constraint = client.Constraint(**kwargs)
1471 for source in self._sources:
1472 source.update_channel_inventory(self, constraint)
1473 source.update_event_inventory(self, constraint)
1475 def update_waveform_promises(self, constraint=None, **kwargs):
1476 '''
1477 Permit downloading of remote waveforms.
1479 :param constraint:
1480 Remote waveforms compatible with the given constraint are enabled
1481 for download.
1482 :type constraint:
1483 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1485 :param \\*\\*kwargs:
1486 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1488 Calling this method permits Squirrel to download waveforms from remote
1489 sources when processing subsequent waveform requests. This works by
1490 inserting so called waveform promises into the database. It will look
1491 into the available channels for each remote source and create a promise
1492 for each channel compatible with the given constraint. If the promise
1493 then matches in a waveform request, Squirrel tries to download the
1494 waveform. If the download is successful, the downloaded waveform is
1495 added to the Squirrel and the promise is deleted. If the download
1496 fails, the promise is kept if the reason of failure looks like being
1497 temporary, e.g. because of a network failure. If the cause of failure
1498 however seems to be permanent, the promise is deleted so that no
1499 further attempts are made to download a waveform which might not be
1500 available from that server at all. To force re-scheduling after a
1501 permanent failure, call :py:meth:`update_waveform_promises`
1502 yet another time.
1503 '''
1505 if constraint is None:
1506 constraint = client.Constraint(**kwargs)
1508 for source in self._sources:
1509 source.update_waveform_promises(self, constraint)
1511 def remove_waveform_promises(self, from_database='selection'):
1512 '''
1513 Remove waveform promises from live selection or global database.
1515 Calling this function removes all waveform promises provided by the
1516 attached sources.
1518 :param from_database:
1519 Remove from live selection ``'selection'`` or global database
1520 ``'global'``.
1521 '''
1522 for source in self._sources:
1523 source.remove_waveform_promises(self, from_database=from_database)
1525 def update_responses(self, constraint=None, **kwargs):
1526 if constraint is None:
1527 constraint = client.Constraint(**kwargs)
1529 for source in self._sources:
1530 source.update_response_inventory(self, constraint)
1532 def get_nfiles(self):
1533 '''
1534 Get number of files in selection.
1535 '''
1537 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1538 for row in self._conn.execute(sql):
1539 return row[0]
1541 def get_nnuts(self):
1542 '''
1543 Get number of nuts in selection.
1544 '''
1546 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1547 for row in self._conn.execute(sql):
1548 return row[0]
1550 def get_total_size(self):
1551 '''
1552 Get aggregated file size available in selection.
1553 '''
1555 sql = self._sql('''
1556 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1557 INNER JOIN files
1558 ON %(db)s.%(file_states)s.file_id = files.file_id
1559 ''')
1561 for row in self._conn.execute(sql):
1562 return row[0] or 0
1564 def get_stats(self):
1565 '''
1566 Get statistics on contents available through this selection.
1567 '''
1569 kinds = self.get_kinds()
1570 time_spans = {}
1571 for kind in kinds:
1572 time_spans[kind] = self.get_time_span([kind])
1574 return SquirrelStats(
1575 nfiles=self.get_nfiles(),
1576 nnuts=self.get_nnuts(),
1577 kinds=kinds,
1578 codes=self.get_codes(),
1579 total_size=self.get_total_size(),
1580 counts=self.get_counts(),
1581 time_spans=time_spans,
1582 sources=[s.describe() for s in self._sources],
1583 operators=[op.describe() for op in self._operators])
1585 def get_content(
1586 self,
1587 nut,
1588 cache_id='default',
1589 accessor_id='default',
1590 show_progress=False,
1591 model='squirrel'):
1593 '''
1594 Get and possibly load full content for a given index entry from file.
1596 Loads the actual content objects (channel, station, waveform, ...) from
1597 file. For efficiency, sibling content (all stuff in the same file
1598 segment) will also be loaded as a side effect. The loaded contents are
1599 cached in the Squirrel object.
1600 '''
1602 content_cache = self._content_caches[cache_id]
1603 if not content_cache.has(nut):
1605 for nut_loaded in io.iload(
1606 nut.file_path,
1607 segment=nut.file_segment,
1608 format=nut.file_format,
1609 database=self._database,
1610 update_selection=self,
1611 show_progress=show_progress):
1613 content_cache.put(nut_loaded)
1615 try:
1616 return content_cache.get(nut, accessor_id, model)
1618 except KeyError:
1619 raise error.NotAvailable(
1620 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1622 def advance_accessor(self, accessor_id='default', cache_id=None):
1623 '''
1624 Notify memory caches about consumer moving to a new data batch.
1626 :param accessor_id:
1627 Name of accessing consumer to be advanced.
1628 :type accessor_id:
1629 str
1631 :param cache_id:
1632 Name of cache to for which the accessor should be advanced. By
1633 default the named accessor is advanced in all registered caches.
1634 By default, two caches named ``'default'`` and ``'waveform'`` are
1635 available.
1636 :type cache_id:
1637 str
1639 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1640 Squirrel's memory caching works and can be tuned. Default behaviour is
1641 to release data when it has not been used in the latest data
1642 window/batch. If the accessor is never advanced, data is cached
1643 indefinitely - which is often desired e.g. for station meta-data.
1644 Methods for consecutive data traversal, like
1645 :py:meth:`chopper_waveforms` automatically advance and clear
1646 their accessor.
1647 '''
1648 for cache_ in (
1649 self._content_caches.keys()
1650 if cache_id is None
1651 else [cache_id]):
1653 self._content_caches[cache_].advance_accessor(accessor_id)
1655 def clear_accessor(self, accessor_id, cache_id=None):
1656 '''
1657 Notify memory caches about a consumer having finished.
1659 :param accessor_id:
1660 Name of accessor to be cleared.
1661 :type accessor_id:
1662 str
1664 :param cache_id:
1665 Name of cache for which the accessor should be cleared. By default
1666 the named accessor is cleared from all registered caches. By
1667 default, two caches named ``'default'`` and ``'waveform'`` are
1668 available.
1669 :type cache_id:
1670 str
1672 Calling this method clears all references to cache entries held by the
1673 named accessor. Cache entries are then freed if not referenced by any
1674 other accessor.
1675 '''
1677 for cache_ in (
1678 self._content_caches.keys()
1679 if cache_id is None
1680 else [cache_id]):
1682 self._content_caches[cache_].clear_accessor(accessor_id)
1684 def get_cache_stats(self, cache_id):
1685 return self._content_caches[cache_id].get_stats()
1687 @filldocs
1688 def get_stations(
1689 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1690 model='squirrel'):
1692 '''
1693 Get stations matching given constraints.
1695 %(query_args)s
1697 :param model:
1698 Select object model for returned values: ``'squirrel'`` to get
1699 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1700 objects with channel information attached.
1701 :type model:
1702 str
1704 :returns:
1705 List of :py:class:`pyrocko.squirrel.Station
1706 <pyrocko.squirrel.model.Station>` objects by default or list of
1707 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1708 objects if ``model='pyrocko'`` is requested.
1710 See :py:meth:`iter_nuts` for details on time span matching.
1711 '''
1713 if model == 'pyrocko':
1714 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes)
1715 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1716 args = self._get_selection_args(
1717 STATION, obj, tmin, tmax, time, codes)
1719 nuts = sorted(
1720 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1722 return [self.get_content(nut, model=model) for nut in nuts]
1723 else:
1724 raise ValueError('Invalid station model: %s' % model)
1726 @filldocs
1727 def get_channels(
1728 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1729 model='squirrel'):
1731 '''
1732 Get channels matching given constraints.
1734 %(query_args)s
1736 :returns:
1737 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1739 See :py:meth:`iter_nuts` for details on time span matching.
1740 '''
1742 args = self._get_selection_args(
1743 CHANNEL, obj, tmin, tmax, time, codes)
1745 nuts = sorted(
1746 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1748 return [self.get_content(nut, model=model) for nut in nuts]
1750 @filldocs
1751 def get_sensors(
1752 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1754 '''
1755 Get sensors matching given constraints.
1757 %(query_args)s
1759 :returns:
1760 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1762 See :py:meth:`iter_nuts` for details on time span matching.
1763 '''
1765 tmin, tmax, codes = self._get_selection_args(
1766 CHANNEL, obj, tmin, tmax, time, codes)
1768 if codes is not None:
1769 codes = codes_patterns_list(
1770 (entry.replace(channel=entry.channel[:-1] + '?')
1771 if entry != '*' else entry)
1772 for entry in codes)
1774 nuts = sorted(
1775 self.iter_nuts(
1776 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1778 return model.Sensor.from_channels(
1779 self.get_content(nut) for nut in nuts)
1781 @filldocs
1782 def get_responses(
1783 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1784 model='squirrel'):
1786 '''
1787 Get instrument responses matching given constraints.
1789 %(query_args)s
1791 :returns:
1792 List of :py:class:`~pyrocko.squirrel.model.Response` objects.
1794 See :py:meth:`iter_nuts` for details on time span matching.
1795 '''
1797 args = self._get_selection_args(
1798 RESPONSE, obj, tmin, tmax, time, codes)
1800 nuts = sorted(
1801 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1803 return [self.get_content(nut, model=model) for nut in nuts]
1805 @filldocs
1806 def get_response(
1807 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1808 model='squirrel'):
1810 '''
1811 Get instrument response matching given constraints.
1813 %(query_args)s
1815 :returns:
1816 :py:class:`~pyrocko.squirrel.model.Response` object.
1818 Same as :py:meth:`get_responses` but returning exactly one response.
1819 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more
1820 than one is available.
1822 See :py:meth:`iter_nuts` for details on time span matching.
1823 '''
1825 if model == 'stationxml':
1826 model_ = 'stationxml+'
1827 else:
1828 model_ = model
1830 responses = self.get_responses(
1831 obj, tmin, tmax, time, codes, model=model_)
1832 if len(responses) == 0:
1833 raise error.NotAvailable(
1834 'No instrument response available (%s).'
1835 % self._get_selection_args_str(
1836 RESPONSE, obj, tmin, tmax, time, codes))
1838 elif len(responses) > 1:
1839 if model_ == 'squirrel':
1840 resps_sq = responses
1841 elif model_ == 'stationxml+':
1842 resps_sq = [resp[0] for resp in responses]
1843 else:
1844 raise ValueError('Invalid response model: %s' % model)
1846 rinfo = ':\n' + '\n'.join(
1847 ' ' + resp.summary for resp in resps_sq)
1849 raise error.NotAvailable(
1850 'Multiple instrument responses matching given constraints '
1851 '(%s)%s' % (
1852 self._get_selection_args_str(
1853 RESPONSE, obj, tmin, tmax, time, codes), rinfo))
1855 if model == 'stationxml':
1856 return responses[0][1]
1857 else:
1858 return responses[0]
1860 @filldocs
1861 def get_events(
1862 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1864 '''
1865 Get events matching given constraints.
1867 %(query_args)s
1869 :returns:
1870 List of :py:class:`~pyrocko.model.event.Event` objects.
1872 See :py:meth:`iter_nuts` for details on time span matching.
1873 '''
1875 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
1876 nuts = sorted(
1877 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
1879 return [self.get_content(nut) for nut in nuts]
1881 def _redeem_promises(self, *args):
1883 tmin, tmax, _ = args
1885 waveforms = list(self.iter_nuts('waveform', *args))
1886 promises = list(self.iter_nuts('waveform_promise', *args))
1888 codes_to_avail = defaultdict(list)
1889 for nut in waveforms:
1890 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
1892 def tts(x):
1893 if isinstance(x, tuple):
1894 return tuple(tts(e) for e in x)
1895 elif isinstance(x, list):
1896 return list(tts(e) for e in x)
1897 else:
1898 return util.time_to_str(x)
1900 orders = []
1901 for promise in promises:
1902 waveforms_avail = codes_to_avail[promise.codes]
1903 for block_tmin, block_tmax in blocks(
1904 max(tmin, promise.tmin),
1905 min(tmax, promise.tmax),
1906 promise.deltat):
1908 orders.append(
1909 WaveformOrder(
1910 source_id=promise.file_path,
1911 codes=promise.codes,
1912 tmin=block_tmin,
1913 tmax=block_tmax,
1914 deltat=promise.deltat,
1915 gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
1917 orders_noop, orders = lpick(lambda order: order.gaps, orders)
1919 order_keys_noop = set(order_key(order) for order in orders_noop)
1920 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
1921 logger.info(
1922 'Waveform orders already satisified with cached/local data: '
1923 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
1925 source_ids = []
1926 sources = {}
1927 for source in self._sources:
1928 if isinstance(source, fdsn.FDSNSource):
1929 source_ids.append(source._source_id)
1930 sources[source._source_id] = source
1932 source_priority = dict(
1933 (source_id, i) for (i, source_id) in enumerate(source_ids))
1935 order_groups = defaultdict(list)
1936 for order in orders:
1937 order_groups[order_key(order)].append(order)
1939 for k, order_group in order_groups.items():
1940 order_group.sort(
1941 key=lambda order: source_priority[order.source_id])
1943 n_order_groups = len(order_groups)
1945 if len(order_groups) != 0 or len(orders) != 0:
1946 logger.info(
1947 'Waveform orders standing for download: %i (%i)'
1948 % (len(order_groups), len(orders)))
1950 task = make_task('Waveform orders processed', n_order_groups)
1951 else:
1952 task = None
1954 def split_promise(order):
1955 self._split_nuts(
1956 'waveform_promise',
1957 order.tmin, order.tmax,
1958 codes=order.codes,
1959 path=order.source_id)
1961 def release_order_group(order):
1962 okey = order_key(order)
1963 for followup in order_groups[okey]:
1964 split_promise(followup)
1966 del order_groups[okey]
1968 if task:
1969 task.update(n_order_groups - len(order_groups))
1971 def noop(order):
1972 pass
1974 def success(order):
1975 release_order_group(order)
1976 split_promise(order)
1978 def batch_add(paths):
1979 self.add(paths)
1981 calls = queue.Queue()
1983 def enqueue(f):
1984 def wrapper(*args):
1985 calls.put((f, args))
1987 return wrapper
1989 for order in orders_noop:
1990 split_promise(order)
1992 while order_groups:
1994 orders_now = []
1995 empty = []
1996 for k, order_group in order_groups.items():
1997 try:
1998 orders_now.append(order_group.pop(0))
1999 except IndexError:
2000 empty.append(k)
2002 for k in empty:
2003 del order_groups[k]
2005 by_source_id = defaultdict(list)
2006 for order in orders_now:
2007 by_source_id[order.source_id].append(order)
2009 threads = []
2010 for source_id in by_source_id:
2011 def download():
2012 try:
2013 sources[source_id].download_waveforms(
2014 by_source_id[source_id],
2015 success=enqueue(success),
2016 error_permanent=enqueue(split_promise),
2017 error_temporary=noop,
2018 batch_add=enqueue(batch_add))
2020 finally:
2021 calls.put(None)
2023 thread = threading.Thread(target=download)
2024 thread.start()
2025 threads.append(thread)
2027 ndone = 0
2028 while ndone < len(threads):
2029 ret = calls.get()
2030 if ret is None:
2031 ndone += 1
2032 else:
2033 ret[0](*ret[1])
2035 for thread in threads:
2036 thread.join()
2038 if task:
2039 task.update(n_order_groups - len(order_groups))
2041 if task:
2042 task.done()
2044 @filldocs
2045 def get_waveform_nuts(
2046 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2048 '''
2049 Get waveform content entities matching given constraints.
2051 %(query_args)s
2053 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2054 resolves matching waveform promises (downloads waveforms from remote
2055 sources).
2057 See :py:meth:`iter_nuts` for details on time span matching.
2058 '''
2060 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2061 self._redeem_promises(*args)
2062 return sorted(
2063 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2065 @filldocs
2066 def get_waveforms(
2067 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2068 uncut=False, want_incomplete=True, degap=True, maxgap=5,
2069 maxlap=None, snap=None, include_last=False, load_data=True,
2070 accessor_id='default', operator_params=None):
2072 '''
2073 Get waveforms matching given constraints.
2075 %(query_args)s
2077 :param uncut:
2078 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2079 and to disable degapping/deoverlapping. Returns untouched traces as
2080 they are read from file segment. File segments are always read in
2081 their entirety.
2082 :type uncut:
2083 bool
2085 :param want_incomplete:
2086 If ``True``, gappy/incomplete traces are included in the result.
2087 :type want_incomplete:
2088 bool
2090 :param degap:
2091 If ``True``, connect traces and remove gaps and overlaps.
2092 :type degap:
2093 bool
2095 :param maxgap:
2096 Maximum gap size in samples which is filled with interpolated
2097 samples when ``degap`` is ``True``.
2098 :type maxgap:
2099 int
2101 :param maxlap:
2102 Maximum overlap size in samples which is removed when ``degap`` is
2103 ``True``.
2104 :type maxlap:
2105 int
2107 :param snap:
2108 Rounding functions used when computing sample index from time
2109 instance, for trace start and trace end, respectively. By default,
2110 ``(round, round)`` is used.
2111 :type snap:
2112 tuple of 2 callables
2114 :param include_last:
2115 If ``True``, add one more sample to the returned traces (the sample
2116 which would be the first sample of a query with ``tmin`` set to the
2117 current value of ``tmax``).
2118 :type include_last:
2119 bool
2121 :param load_data:
2122 If ``True``, waveform data samples are read from files (or cache).
2123 If ``False``, meta-information-only traces are returned (dummy
2124 traces with no data samples).
2125 :type load_data:
2126 bool
2128 :param accessor_id:
2129 Name of consumer on who's behalf data is accessed. Used in cache
2130 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2131 to distinguish different points of extraction for the decision of
2132 when to release cached waveform data. Should be used when data is
2133 alternately extracted from more than one region / selection.
2134 :type accessor_id:
2135 str
2137 See :py:meth:`iter_nuts` for details on time span matching.
2139 Loaded data is kept in memory (at least) until
2140 :py:meth:`clear_accessor` has been called or
2141 :py:meth:`advance_accessor` has been called two consecutive times
2142 without data being accessed between the two calls (by this accessor).
2143 Data may still be further kept in the memory cache if held alive by
2144 consumers with a different ``accessor_id``.
2145 '''
2147 tmin, tmax, codes = self._get_selection_args(
2148 WAVEFORM, obj, tmin, tmax, time, codes)
2150 self_tmin, self_tmax = self.get_time_span(
2151 ['waveform', 'waveform_promise'])
2153 if None in (self_tmin, self_tmax):
2154 logger.warning(
2155 'No waveforms available.')
2156 return []
2158 tmin = tmin if tmin is not None else self_tmin
2159 tmax = tmax if tmax is not None else self_tmax
2161 if codes is not None and len(codes) == 1:
2162 # TODO: fix for multiple / mixed codes
2163 operator = self.get_operator(codes[0])
2164 if operator is not None:
2165 return operator.get_waveforms(
2166 self, codes[0],
2167 tmin=tmin, tmax=tmax,
2168 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2169 maxgap=maxgap, maxlap=maxlap, snap=snap,
2170 include_last=include_last, load_data=load_data,
2171 accessor_id=accessor_id, params=operator_params)
2173 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes)
2175 if load_data:
2176 traces = [
2177 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2179 else:
2180 traces = [
2181 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2183 if uncut:
2184 return traces
2186 if snap is None:
2187 snap = (round, round)
2189 chopped = []
2190 for tr in traces:
2191 if not load_data and tr.ydata is not None:
2192 tr = tr.copy(data=False)
2193 tr.ydata = None
2195 try:
2196 chopped.append(tr.chop(
2197 tmin, tmax,
2198 inplace=False,
2199 snap=snap,
2200 include_last=include_last))
2202 except trace.NoData:
2203 pass
2205 processed = self._process_chopped(
2206 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2208 return processed
2210 @filldocs
2211 def chopper_waveforms(
2212 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2213 tinc=None, tpad=0.,
2214 want_incomplete=True, snap_window=False,
2215 degap=True, maxgap=5, maxlap=None,
2216 snap=None, include_last=False, load_data=True,
2217 accessor_id=None, clear_accessor=True, operator_params=None,
2218 grouping=None):
2220 '''
2221 Iterate window-wise over waveform archive.
2223 %(query_args)s
2225 :param tinc:
2226 Time increment (window shift time) (default uses ``tmax-tmin``).
2227 :type tinc:
2228 timestamp
2230 :param tpad:
2231 Padding time appended on either side of the data window (window
2232 overlap is ``2*tpad``).
2233 :type tpad:
2234 timestamp
2236 :param want_incomplete:
2237 If ``True``, gappy/incomplete traces are included in the result.
2238 :type want_incomplete:
2239 bool
2241 :param snap_window:
2242 If ``True``, start time windows at multiples of tinc with respect
2243 to system time zero.
2244 :type snap_window:
2245 bool
2247 :param degap:
2248 If ``True``, connect traces and remove gaps and overlaps.
2249 :type degap:
2250 bool
2252 :param maxgap:
2253 Maximum gap size in samples which is filled with interpolated
2254 samples when ``degap`` is ``True``.
2255 :type maxgap:
2256 int
2258 :param maxlap:
2259 Maximum overlap size in samples which is removed when ``degap`` is
2260 ``True``.
2261 :type maxlap:
2262 int
2264 :param snap:
2265 Rounding functions used when computing sample index from time
2266 instance, for trace start and trace end, respectively. By default,
2267 ``(round, round)`` is used.
2268 :type snap:
2269 tuple of 2 callables
2271 :param include_last:
2272 If ``True``, add one more sample to the returned traces (the sample
2273 which would be the first sample of a query with ``tmin`` set to the
2274 current value of ``tmax``).
2275 :type include_last:
2276 bool
2278 :param load_data:
2279 If ``True``, waveform data samples are read from files (or cache).
2280 If ``False``, meta-information-only traces are returned (dummy
2281 traces with no data samples).
2282 :type load_data:
2283 bool
2285 :param accessor_id:
2286 Name of consumer on who's behalf data is accessed. Used in cache
2287 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2288 to distinguish different points of extraction for the decision of
2289 when to release cached waveform data. Should be used when data is
2290 alternately extracted from more than one region / selection.
2291 :type accessor_id:
2292 str
2294 :param clear_accessor:
2295 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2296 chopper finishes. Set to ``False`` to keep loaded waveforms in
2297 memory when the generator returns.
2298 :type clear_accessor:
2299 bool
2301 :param grouping:
2302 By default, traversal over the data is over time and all matching
2303 traces of a time window are yielded. Using this option, it is
2304 possible to traverse the data first by group (e.g. station or
2305 network) and second by time. This can reduce the number of traces
2306 in each batch and thus reduce the memory footprint of the process.
2307 :type grouping:
2308 :py:class:`~pyrocko.squirrel.operator.Grouping`
2310 :yields:
2311 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2312 extracted time window.
2314 See :py:meth:`iter_nuts` for details on time span matching.
2315 '''
2317 tmin, tmax, codes = self._get_selection_args(
2318 WAVEFORM, obj, tmin, tmax, time, codes)
2320 self_tmin, self_tmax = self.get_time_span(
2321 ['waveform', 'waveform_promise'])
2323 if None in (self_tmin, self_tmax):
2324 logger.warning(
2325 'Content has undefined time span. No waveforms and no '
2326 'waveform promises?')
2327 return
2329 if snap_window and tinc is not None:
2330 tmin = tmin if tmin is not None else self_tmin
2331 tmax = tmax if tmax is not None else self_tmax
2332 tmin = math.floor(tmin / tinc) * tinc
2333 tmax = math.ceil(tmax / tinc) * tinc
2334 else:
2335 tmin = tmin if tmin is not None else self_tmin + tpad
2336 tmax = tmax if tmax is not None else self_tmax - tpad
2338 tinc = tinc if tinc is not None else tmax - tmin
2340 try:
2341 if accessor_id is None:
2342 accessor_id = 'chopper%i' % self._n_choppers_active
2344 self._n_choppers_active += 1
2346 eps = tinc * 1e-6
2347 if tinc != 0.0:
2348 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2349 else:
2350 nwin = 1
2352 if grouping is None:
2353 codes_list = [codes]
2354 else:
2355 operator = Operator(
2356 filtering=CodesPatternFiltering(codes=codes),
2357 grouping=grouping)
2359 available = set(self.get_codes(kind='waveform'))
2360 available.update(self.get_codes(kind='waveform_promise'))
2361 operator.update_mappings(sorted(available))
2363 codes_list = [
2364 codes_patterns_list(scl)
2365 for scl in operator.iter_in_codes()]
2367 ngroups = len(codes_list)
2368 for igroup, scl in enumerate(codes_list):
2369 for iwin in range(nwin):
2370 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2372 chopped = self.get_waveforms(
2373 tmin=wmin-tpad,
2374 tmax=wmax+tpad,
2375 codes=scl,
2376 snap=snap,
2377 include_last=include_last,
2378 load_data=load_data,
2379 want_incomplete=want_incomplete,
2380 degap=degap,
2381 maxgap=maxgap,
2382 maxlap=maxlap,
2383 accessor_id=accessor_id,
2384 operator_params=operator_params)
2386 self.advance_accessor(accessor_id)
2388 yield Batch(
2389 tmin=wmin,
2390 tmax=wmax,
2391 i=iwin,
2392 n=nwin,
2393 igroup=igroup,
2394 ngroups=ngroups,
2395 traces=chopped)
2397 finally:
2398 self._n_choppers_active -= 1
2399 if clear_accessor:
2400 self.clear_accessor(accessor_id, 'waveform')
2402 def _process_chopped(
2403 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2405 chopped.sort(key=lambda a: a.full_id)
2406 if degap:
2407 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2409 if not want_incomplete:
2410 chopped_weeded = []
2411 for tr in chopped:
2412 emin = tr.tmin - tmin
2413 emax = tr.tmax + tr.deltat - tmax
2414 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2415 chopped_weeded.append(tr)
2417 elif degap:
2418 if (0. < emin <= 5. * tr.deltat
2419 and -5. * tr.deltat <= emax < 0.):
2421 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2422 chopped_weeded.append(tr)
2424 chopped = chopped_weeded
2426 return chopped
2428 def _get_pyrocko_stations(
2429 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2431 from pyrocko import model as pmodel
2433 by_nsl = defaultdict(lambda: (list(), list()))
2434 for station in self.get_stations(obj, tmin, tmax, time, codes):
2435 sargs = station._get_pyrocko_station_args()
2436 by_nsl[station.codes.nsl][0].append(sargs)
2438 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2439 sargs = channel._get_pyrocko_station_args()
2440 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2441 sargs_list.append(sargs)
2442 channels_list.append(channel)
2444 pstations = []
2445 nsls = list(by_nsl.keys())
2446 nsls.sort()
2447 for nsl in nsls:
2448 sargs_list, channels_list = by_nsl[nsl]
2449 sargs = util.consistency_merge(
2450 [('',) + x for x in sargs_list])
2452 by_c = defaultdict(list)
2453 for ch in channels_list:
2454 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2456 chas = list(by_c.keys())
2457 chas.sort()
2458 pchannels = []
2459 for cha in chas:
2460 list_of_cargs = by_c[cha]
2461 cargs = util.consistency_merge(
2462 [('',) + x for x in list_of_cargs])
2463 pchannels.append(pmodel.Channel(*cargs))
2465 pstations.append(
2466 pmodel.Station(*sargs, channels=pchannels))
2468 return pstations
2470 @property
2471 def pile(self):
2473 '''
2474 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2476 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2477 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2478 but uses the fluffy power of the Squirrel under the hood.
2480 This interface can be used as a drop-in replacement for piles which are
2481 used in existing scripts and programs for efficient waveform data
2482 access. The Squirrel-based pile scales better for large datasets. Newer
2483 scripts should use Squirrel's native methods to avoid the emulation
2484 overhead.
2485 '''
2486 from . import pile
2488 if self._pile is None:
2489 self._pile = pile.Pile(self)
2491 return self._pile
2493 def snuffle(self):
2494 '''
2495 Look at dataset in Snuffler.
2496 '''
2497 self.pile.snuffle()
2499 def _gather_codes_keys(self, kind, gather, selector):
2500 return set(
2501 gather(codes)
2502 for codes in self.iter_codes(kind)
2503 if selector is None or selector(codes))
2505 def __str__(self):
2506 return str(self.get_stats())
2508 def get_coverage(
2509 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2511 '''
2512 Get coverage information.
2514 Get information about strips of gapless data coverage.
2516 :param kind:
2517 Content kind to be queried.
2518 :type kind:
2519 str
2521 :param tmin:
2522 Start time of query interval.
2523 :type tmin:
2524 timestamp
2526 :param tmax:
2527 End time of query interval.
2528 :type tmax:
2529 timestamp
2531 :param codes:
2532 If given, restrict query to given content codes patterns.
2533 :type codes:
2534 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2535 objects appropriate for the queried content type, or anything which
2536 can be converted to such objects.
2538 :param limit:
2539 Limit query to return only up to a given maximum number of entries
2540 per matching time series (without setting this option, very gappy
2541 data could cause the query to execute for a very long time).
2542 :type limit:
2543 int
2545 :returns:
2546 Information about time spans covered by the requested time series
2547 data.
2548 :rtype:
2549 :py:class:`list` of :py:class:`Coverage` objects
2550 '''
2552 tmin_seconds, tmin_offset = model.tsplit(tmin)
2553 tmax_seconds, tmax_offset = model.tsplit(tmax)
2554 kind_id = to_kind_id(kind)
2556 codes_info = list(self._iter_codes_info(kind=kind))
2558 kdata_all = []
2559 if codes is None:
2560 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2561 kdata_all.append(
2562 (codes_entry, kind_codes_id, codes_entry, deltat))
2564 else:
2565 for codes_entry in codes:
2566 pattern = to_codes(kind_id, codes_entry)
2567 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2568 if model.match_codes(pattern, codes_entry):
2569 kdata_all.append(
2570 (pattern, kind_codes_id, codes_entry, deltat))
2572 kind_codes_ids = [x[1] for x in kdata_all]
2574 counts_at_tmin = {}
2575 if tmin is not None:
2576 for nut in self.iter_nuts(
2577 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2579 k = nut.codes, nut.deltat
2580 if k not in counts_at_tmin:
2581 counts_at_tmin[k] = 0
2583 counts_at_tmin[k] += 1
2585 coverages = []
2586 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2587 entry = [pattern, codes_entry, deltat, None, None, []]
2588 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2589 sql = self._sql('''
2590 SELECT
2591 time_seconds,
2592 time_offset
2593 FROM %(db)s.%(coverage)s
2594 WHERE
2595 kind_codes_id == ?
2596 ORDER BY
2597 kind_codes_id ''' + order + ''',
2598 time_seconds ''' + order + ''',
2599 time_offset ''' + order + '''
2600 LIMIT 1
2601 ''')
2603 for row in self._conn.execute(sql, [kind_codes_id]):
2604 entry[3+i] = model.tjoin(row[0], row[1])
2606 if None in entry[3:5]:
2607 continue
2609 args = [kind_codes_id]
2611 sql_time = ''
2612 if tmin is not None:
2613 # intentionally < because (== tmin) is queried from nuts
2614 sql_time += ' AND ( ? < time_seconds ' \
2615 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2616 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2618 if tmax is not None:
2619 sql_time += ' AND ( time_seconds < ? ' \
2620 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2621 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2623 sql_limit = ''
2624 if limit is not None:
2625 sql_limit = ' LIMIT ?'
2626 args.append(limit)
2628 sql = self._sql('''
2629 SELECT
2630 time_seconds,
2631 time_offset,
2632 step
2633 FROM %(db)s.%(coverage)s
2634 WHERE
2635 kind_codes_id == ?
2636 ''' + sql_time + '''
2637 ORDER BY
2638 kind_codes_id,
2639 time_seconds,
2640 time_offset
2641 ''' + sql_limit)
2643 rows = list(self._conn.execute(sql, args))
2645 if limit is not None and len(rows) == limit:
2646 entry[-1] = None
2647 else:
2648 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2649 tlast = None
2650 if tmin is not None:
2651 entry[-1].append((tmin, counts))
2652 tlast = tmin
2654 for row in rows:
2655 t = model.tjoin(row[0], row[1])
2656 counts += row[2]
2657 entry[-1].append((t, counts))
2658 tlast = t
2660 if tmax is not None and (tlast is None or tlast != tmax):
2661 entry[-1].append((tmax, counts))
2663 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2665 return coverages
2667 def get_stationxml(
2668 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2669 level='response'):
2671 '''
2672 Get station/channel/response metadata in StationXML representation.
2674 %(query_args)s
2676 :returns:
2677 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2678 '''
2680 if level not in ('network', 'station', 'channel', 'response'):
2681 raise ValueError('Invalid level: %s' % level)
2683 tmin, tmax, codes = self._get_selection_args(
2684 CHANNEL, obj, tmin, tmax, time, codes)
2686 filtering = CodesPatternFiltering(codes=codes)
2688 nslcs = list(set(
2689 codes.nslc for codes in
2690 filtering.filter(self.get_codes(kind='channel'))))
2692 from pyrocko.io import stationxml as sx
2694 networks = []
2695 for net, stas in prefix_tree(nslcs):
2696 network = sx.Network(code=net)
2697 networks.append(network)
2699 if level not in ('station', 'channel', 'response'):
2700 continue
2702 for sta, locs in stas:
2703 stations = self.get_stations(
2704 tmin=tmin,
2705 tmax=tmax,
2706 codes=(net, sta, '*'),
2707 model='stationxml')
2709 errors = sx.check_overlaps(
2710 'Station', (net, sta), stations)
2712 if errors:
2713 raise sx.Inconsistencies(
2714 'Inconsistencies found:\n %s'
2715 % '\n '.join(errors))
2717 network.station_list.extend(stations)
2719 if level not in ('channel', 'response'):
2720 continue
2722 for loc, chas in locs:
2723 for cha, _ in chas:
2724 channels = self.get_channels(
2725 tmin=tmin,
2726 tmax=tmax,
2727 codes=(net, sta, loc, cha),
2728 model='stationxml')
2730 errors = sx.check_overlaps(
2731 'Channel', (net, sta, loc, cha), channels)
2733 if errors:
2734 raise sx.Inconsistencies(
2735 'Inconsistencies found:\n %s'
2736 % '\n '.join(errors))
2738 for channel in channels:
2739 station = sx.find_containing(stations, channel)
2740 if station is not None:
2741 station.channel_list.append(channel)
2742 else:
2743 raise sx.Inconsistencies(
2744 'No station or station epoch found for '
2745 'channel: %s' % '.'.join(
2746 (net, sta, loc, cha)))
2748 if level != 'response':
2749 continue
2751 response_sq, response_sx = self.get_response(
2752 codes=(net, sta, loc, cha),
2753 tmin=channel.start_date,
2754 tmax=channel.end_date,
2755 model='stationxml+')
2757 if not (
2758 sx.eq_open(
2759 channel.start_date, response_sq.tmin)
2760 and sx.eq_open(
2761 channel.end_date, response_sq.tmax)):
2763 raise sx.Inconsistencies(
2764 'Response time span does not match '
2765 'channel time span: %s' % '.'.join(
2766 (net, sta, loc, cha)))
2768 channel.response = response_sx
2770 return sx.FDSNStationXML(
2771 source='Generated by Pyrocko Squirrel.',
2772 network_list=networks)
2774 def add_operator(self, op):
2775 self._operators.append(op)
2777 def update_operator_mappings(self):
2778 available = self.get_codes(kind=('channel'))
2780 for operator in self._operators:
2781 operator.update_mappings(available, self._operator_registry)
2783 def iter_operator_mappings(self):
2784 for operator in self._operators:
2785 for in_codes, out_codes in operator.iter_mappings():
2786 yield operator, in_codes, out_codes
2788 def get_operator_mappings(self):
2789 return list(self.iter_operator_mappings())
2791 def get_operator(self, codes):
2792 try:
2793 return self._operator_registry[codes][0]
2794 except KeyError:
2795 return None
2797 def get_operator_group(self, codes):
2798 try:
2799 return self._operator_registry[codes]
2800 except KeyError:
2801 return None, (None, None, None)
2803 def iter_operator_codes(self):
2804 for _, _, out_codes in self.iter_operator_mappings():
2805 for codes in out_codes:
2806 yield codes
2808 def get_operator_codes(self):
2809 return list(self.iter_operator_codes())
2811 def print_tables(self, table_names=None, stream=None):
2812 '''
2813 Dump raw database tables in textual form (for debugging purposes).
2815 :param table_names:
2816 Names of tables to be dumped or ``None`` to dump all.
2817 :type table_names:
2818 :py:class:`list` of :py:class:`str`
2820 :param stream:
2821 Open file or ``None`` to dump to standard output.
2822 '''
2824 if stream is None:
2825 stream = sys.stdout
2827 if isinstance(table_names, str):
2828 table_names = [table_names]
2830 if table_names is None:
2831 table_names = [
2832 'selection_file_states',
2833 'selection_nuts',
2834 'selection_kind_codes_count',
2835 'files', 'nuts', 'kind_codes', 'kind_codes_count']
2837 m = {
2838 'selection_file_states': '%(db)s.%(file_states)s',
2839 'selection_nuts': '%(db)s.%(nuts)s',
2840 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
2841 'files': 'files',
2842 'nuts': 'nuts',
2843 'kind_codes': 'kind_codes',
2844 'kind_codes_count': 'kind_codes_count'}
2846 for table_name in table_names:
2847 self._database.print_table(
2848 m[table_name] % self._names, stream=stream)
2851class SquirrelStats(Object):
2852 '''
2853 Container to hold statistics about contents available from a Squirrel.
2855 See also :py:meth:`Squirrel.get_stats`.
2856 '''
2858 nfiles = Int.T(
2859 help='Number of files in selection.')
2860 nnuts = Int.T(
2861 help='Number of index nuts in selection.')
2862 codes = List.T(
2863 Tuple.T(content_t=String.T()),
2864 help='Available code sequences in selection, e.g. '
2865 '(agency, network, station, location) for stations nuts.')
2866 kinds = List.T(
2867 String.T(),
2868 help='Available content types in selection.')
2869 total_size = Int.T(
2870 help='Aggregated file size of files is selection.')
2871 counts = Dict.T(
2872 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
2873 help='Breakdown of how many nuts of any content type and code '
2874 'sequence are available in selection, ``counts[kind][codes]``.')
2875 time_spans = Dict.T(
2876 String.T(), Tuple.T(content_t=Timestamp.T()),
2877 help='Time spans by content type.')
2878 sources = List.T(
2879 String.T(),
2880 help='Descriptions of attached sources.')
2881 operators = List.T(
2882 String.T(),
2883 help='Descriptions of attached operators.')
2885 def __str__(self):
2886 kind_counts = dict(
2887 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
2889 scodes = model.codes_to_str_abbreviated(self.codes)
2891 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
2892 ' ' + s for s in self.sources)
2894 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
2895 ' ' + s for s in self.operators)
2897 def stime(t):
2898 return util.tts(t) if t is not None and t not in (
2899 model.g_tmin, model.g_tmax) else '<none>'
2901 def stable(rows):
2902 ns = [max(len(w) for w in col) for col in zip(*rows)]
2903 return '\n'.join(
2904 ' '.join(w.ljust(n) for n, w in zip(ns, row))
2905 for row in rows)
2907 def indent(s):
2908 return '\n'.join(' '+line for line in s.splitlines())
2910 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
2911 kind + ':',
2912 str(kind_counts[kind]),
2913 stime(self.time_spans[kind][0]),
2914 '-',
2915 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
2917 s = '''
2918Number of files: %i
2919Total size of known files: %s
2920Number of index nuts: %i
2921Available content kinds: %s
2922Available codes: %s
2923Sources: %s
2924Operators: %s''' % (
2925 self.nfiles,
2926 util.human_bytesize(self.total_size),
2927 self.nnuts,
2928 stspans, scodes, ssources, soperators)
2930 return s.lstrip()
2933__all__ = [
2934 'Squirrel',
2935 'SquirrelStats',
2936]