1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import sys
7import os
9import math
10import logging
11import threading
12import queue
13from collections import defaultdict
15from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
16from pyrocko import util, trace
17from pyrocko.progress import progress
19from . import model, io, cache, dataset
21from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
22 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
23 codes_patterns_for_kind
24from .client import fdsn, catalog
25from .selection import Selection, filldocs
26from .database import abspath
27from .operators.base import Operator, CodesPatternFiltering
28from . import client, environment, error
30logger = logging.getLogger('psq.base')
32guts_prefix = 'squirrel'
35def make_task(*args):
36 return progress.task(*args, logger=logger)
39def lpick(condition, seq):
40 ft = [], []
41 for ele in seq:
42 ft[int(bool(condition(ele)))].append(ele)
44 return ft
47def len_plural(obj):
48 return len(obj), '' if len(obj) == 1 else 's'
51def blocks(tmin, tmax, deltat, nsamples_block=100000):
52 tblock = util.to_time_float(deltat * nsamples_block)
53 iblock_min = int(math.floor(tmin / tblock))
54 iblock_max = int(math.ceil(tmax / tblock))
55 for iblock in range(iblock_min, iblock_max):
56 yield iblock * tblock, (iblock+1) * tblock
59def gaps(avail, tmin, tmax):
60 assert tmin < tmax
62 data = [(tmax, 1), (tmin, -1)]
63 for (tmin_a, tmax_a) in avail:
64 assert tmin_a < tmax_a
65 data.append((tmin_a, 1))
66 data.append((tmax_a, -1))
68 data.sort()
69 s = 1
70 gaps = []
71 tmin_g = None
72 for t, x in data:
73 if s == 1 and x == -1:
74 tmin_g = t
75 elif s == 0 and x == 1 and tmin_g is not None:
76 tmax_g = t
77 if tmin_g != tmax_g:
78 gaps.append((tmin_g, tmax_g))
80 s += x
82 return gaps
85def order_key(order):
86 return (order.codes, order.tmin, order.tmax)
89def _is_exact(pat):
90 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
93def prefix_tree(tups):
94 if not tups:
95 return []
97 if len(tups[0]) == 1:
98 return sorted((tup[0], []) for tup in tups)
100 d = defaultdict(list)
101 for tup in tups:
102 d[tup[0]].append(tup[1:])
104 sub = []
105 for k in sorted(d.keys()):
106 sub.append((k, prefix_tree(d[k])))
108 return sub
111def match_time_span(tmin, tmax, obj):
112 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \
113 and (tmin is None or obj.tmax is None or tmin < obj.tmax)
116class Batch(object):
117 '''
118 Batch of waveforms from window-wise data extraction.
120 Encapsulates state and results yielded for each window in window-wise
121 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
123 *Attributes:*
125 .. py:attribute:: tmin
127 Start of this time window.
129 .. py:attribute:: tmax
131 End of this time window.
133 .. py:attribute:: i
135 Index of this time window in sequence.
137 .. py:attribute:: n
139 Total number of time windows in sequence.
141 .. py:attribute:: igroup
143 Index of this time window's sequence group.
145 .. py:attribute:: ngroups
147 Total number of sequence groups.
149 .. py:attribute:: traces
151 Extracted waveforms for this time window.
152 '''
154 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces):
155 self.tmin = tmin
156 self.tmax = tmax
157 self.i = i
158 self.n = n
159 self.igroup = igroup
160 self.ngroups = ngroups
161 self.traces = traces
164class Squirrel(Selection):
165 '''
166 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
168 :param env:
169 Squirrel environment instance or directory path to use as starting
170 point for its detection. By default, the current directory is used as
171 starting point. When searching for a usable environment the directory
172 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
173 directory is used if it exists, otherwise the parent directories are
174 search upwards for the existence of such a directory. If no such
175 directory is found, the user's global Squirrel environment
176 ``'$HOME/.pyrocko/squirrel'`` is used.
177 :type env:
178 :py:class:`~pyrocko.squirrel.environment.Environment` or
179 :py:class:`str`
181 :param database:
182 Database instance or path to database. By default the
183 database found in the detected Squirrel environment is used.
184 :type database:
185 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
187 :param cache_path:
188 Directory path to use for data caching. By default, the ``'cache'``
189 directory in the detected Squirrel environment is used.
190 :type cache_path:
191 :py:class:`str`
193 :param persistent:
194 If given a name, create a persistent selection.
195 :type persistent:
196 :py:class:`str`
198 This is the central class of the Squirrel framework. It provides a unified
199 interface to query and access seismic waveforms, station meta-data and
200 event information from local file collections and remote data sources. For
201 prompt responses, a profound database setup is used under the hood. To
202 speed up assemblage of ad-hoc data selections, files are indexed on first
203 use and the extracted meta-data is remembered in the database for
204 subsequent accesses. Bulk data is lazily loaded from disk and remote
205 sources, just when requested. Once loaded, data is cached in memory to
206 expedite typical access patterns. Files and data sources can be dynamically
207 added to and removed from the Squirrel selection at runtime.
209 Queries are restricted to the contents of the files currently added to the
210 Squirrel selection (usually a subset of the file meta-information
211 collection in the database). This list of files is referred to here as the
212 "selection". By default, temporary tables are created in the attached
213 database to hold the names of the files in the selection as well as various
214 indices and counters. These tables are only visible inside the application
215 which created them and are deleted when the database connection is closed
216 or the application exits. To create a selection which is not deleted at
217 exit, supply a name to the ``persistent`` argument of the Squirrel
218 constructor. Persistent selections are shared among applications using the
219 same database.
221 **Method summary**
223 Some of the methods are implemented in :py:class:`Squirrel`'s base class
224 :py:class:`~pyrocko.squirrel.selection.Selection`.
226 .. autosummary::
228 ~Squirrel.add
229 ~Squirrel.add_source
230 ~Squirrel.add_fdsn
231 ~Squirrel.add_catalog
232 ~Squirrel.add_dataset
233 ~Squirrel.add_virtual
234 ~Squirrel.update
235 ~Squirrel.update_waveform_promises
236 ~Squirrel.advance_accessor
237 ~Squirrel.clear_accessor
238 ~Squirrel.reload
239 ~pyrocko.squirrel.selection.Selection.iter_paths
240 ~Squirrel.iter_nuts
241 ~Squirrel.iter_kinds
242 ~Squirrel.iter_deltats
243 ~Squirrel.iter_codes
244 ~pyrocko.squirrel.selection.Selection.get_paths
245 ~Squirrel.get_nuts
246 ~Squirrel.get_kinds
247 ~Squirrel.get_deltats
248 ~Squirrel.get_codes
249 ~Squirrel.get_counts
250 ~Squirrel.get_time_span
251 ~Squirrel.get_deltat_span
252 ~Squirrel.get_nfiles
253 ~Squirrel.get_nnuts
254 ~Squirrel.get_total_size
255 ~Squirrel.get_stats
256 ~Squirrel.get_content
257 ~Squirrel.get_stations
258 ~Squirrel.get_channels
259 ~Squirrel.get_responses
260 ~Squirrel.get_events
261 ~Squirrel.get_waveform_nuts
262 ~Squirrel.get_waveforms
263 ~Squirrel.chopper_waveforms
264 ~Squirrel.get_coverage
265 ~Squirrel.pile
266 ~Squirrel.snuffle
267 ~Squirrel.glob_codes
268 ~pyrocko.squirrel.selection.Selection.get_database
269 ~Squirrel.print_tables
270 '''
272 def __init__(
273 self, env=None, database=None, cache_path=None, persistent=None):
275 if not isinstance(env, environment.Environment):
276 env = environment.get_environment(env)
278 if database is None:
279 database = env.expand_path(env.database_path)
281 if cache_path is None:
282 cache_path = env.expand_path(env.cache_path)
284 if persistent is None:
285 persistent = env.persistent
287 Selection.__init__(
288 self, database=database, persistent=persistent)
290 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
292 self._content_caches = {
293 'waveform': cache.ContentCache(),
294 'default': cache.ContentCache()}
296 self._cache_path = cache_path
298 self._sources = []
299 self._operators = []
300 self._operator_registry = {}
302 self._pending_orders = []
304 self._pile = None
305 self._n_choppers_active = 0
307 self._names.update({
308 'nuts': self.name + '_nuts',
309 'kind_codes_count': self.name + '_kind_codes_count',
310 'coverage': self.name + '_coverage'})
312 with self.transaction('create tables') as cursor:
313 self._create_tables_squirrel(cursor)
315 def _create_tables_squirrel(self, cursor):
317 cursor.execute(self._register_table(self._sql(
318 '''
319 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
320 nut_id integer PRIMARY KEY,
321 file_id integer,
322 file_segment integer,
323 file_element integer,
324 kind_id integer,
325 kind_codes_id integer,
326 tmin_seconds integer,
327 tmin_offset integer,
328 tmax_seconds integer,
329 tmax_offset integer,
330 kscale integer)
331 ''')))
333 cursor.execute(self._register_table(self._sql(
334 '''
335 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
336 kind_codes_id integer PRIMARY KEY,
337 count integer)
338 ''')))
340 cursor.execute(self._sql(
341 '''
342 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
343 ON %(nuts)s (file_id, file_segment, file_element)
344 '''))
346 cursor.execute(self._sql(
347 '''
348 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
349 ON %(nuts)s (file_id)
350 '''))
352 cursor.execute(self._sql(
353 '''
354 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
355 ON %(nuts)s (kind_id, tmin_seconds)
356 '''))
358 cursor.execute(self._sql(
359 '''
360 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
361 ON %(nuts)s (kind_id, tmax_seconds)
362 '''))
364 cursor.execute(self._sql(
365 '''
366 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
367 ON %(nuts)s (kind_id, kscale, tmin_seconds)
368 '''))
370 cursor.execute(self._sql(
371 '''
372 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
373 BEFORE DELETE ON main.files FOR EACH ROW
374 BEGIN
375 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
376 END
377 '''))
379 # trigger only on size to make silent update of mtime possible
380 cursor.execute(self._sql(
381 '''
382 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
383 BEFORE UPDATE OF size ON main.files FOR EACH ROW
384 BEGIN
385 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
386 END
387 '''))
389 cursor.execute(self._sql(
390 '''
391 CREATE TRIGGER IF NOT EXISTS
392 %(db)s.%(file_states)s_delete_files
393 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
394 BEGIN
395 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
396 END
397 '''))
399 cursor.execute(self._sql(
400 '''
401 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
402 BEFORE INSERT ON %(nuts)s FOR EACH ROW
403 BEGIN
404 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
405 (new.kind_codes_id, 0);
406 UPDATE %(kind_codes_count)s
407 SET count = count + 1
408 WHERE new.kind_codes_id
409 == %(kind_codes_count)s.kind_codes_id;
410 END
411 '''))
413 cursor.execute(self._sql(
414 '''
415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
416 BEFORE DELETE ON %(nuts)s FOR EACH ROW
417 BEGIN
418 UPDATE %(kind_codes_count)s
419 SET count = count - 1
420 WHERE old.kind_codes_id
421 == %(kind_codes_count)s.kind_codes_id;
422 END
423 '''))
425 cursor.execute(self._register_table(self._sql(
426 '''
427 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
428 kind_codes_id integer,
429 time_seconds integer,
430 time_offset integer,
431 step integer)
432 ''')))
434 cursor.execute(self._sql(
435 '''
436 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
437 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
438 '''))
440 cursor.execute(self._sql(
441 '''
442 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
443 AFTER INSERT ON %(nuts)s FOR EACH ROW
444 BEGIN
445 INSERT OR IGNORE INTO %(coverage)s VALUES
446 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
447 ;
448 UPDATE %(coverage)s
449 SET step = step + 1
450 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
451 AND new.tmin_seconds == %(coverage)s.time_seconds
452 AND new.tmin_offset == %(coverage)s.time_offset
453 ;
454 INSERT OR IGNORE INTO %(coverage)s VALUES
455 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
456 ;
457 UPDATE %(coverage)s
458 SET step = step - 1
459 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
460 AND new.tmax_seconds == %(coverage)s.time_seconds
461 AND new.tmax_offset == %(coverage)s.time_offset
462 ;
463 DELETE FROM %(coverage)s
464 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
465 AND new.tmin_seconds == %(coverage)s.time_seconds
466 AND new.tmin_offset == %(coverage)s.time_offset
467 AND step == 0
468 ;
469 DELETE FROM %(coverage)s
470 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
471 AND new.tmax_seconds == %(coverage)s.time_seconds
472 AND new.tmax_offset == %(coverage)s.time_offset
473 AND step == 0
474 ;
475 END
476 '''))
478 cursor.execute(self._sql(
479 '''
480 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
481 BEFORE DELETE ON %(nuts)s FOR EACH ROW
482 BEGIN
483 INSERT OR IGNORE INTO %(coverage)s VALUES
484 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
485 ;
486 UPDATE %(coverage)s
487 SET step = step - 1
488 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
489 AND old.tmin_seconds == %(coverage)s.time_seconds
490 AND old.tmin_offset == %(coverage)s.time_offset
491 ;
492 INSERT OR IGNORE INTO %(coverage)s VALUES
493 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
494 ;
495 UPDATE %(coverage)s
496 SET step = step + 1
497 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
498 AND old.tmax_seconds == %(coverage)s.time_seconds
499 AND old.tmax_offset == %(coverage)s.time_offset
500 ;
501 DELETE FROM %(coverage)s
502 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
503 AND old.tmin_seconds == %(coverage)s.time_seconds
504 AND old.tmin_offset == %(coverage)s.time_offset
505 AND step == 0
506 ;
507 DELETE FROM %(coverage)s
508 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
509 AND old.tmax_seconds == %(coverage)s.time_seconds
510 AND old.tmax_offset == %(coverage)s.time_offset
511 AND step == 0
512 ;
513 END
514 '''))
516 def _delete(self):
517 '''Delete database tables associated with this Squirrel.'''
519 with self.transaction('delete tables') as cursor:
520 for s in '''
521 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
522 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
523 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
524 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
525 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
526 DROP TABLE %(db)s.%(nuts)s;
527 DROP TABLE %(db)s.%(kind_codes_count)s;
528 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
529 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
530 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
531 '''.strip().splitlines():
533 cursor.execute(self._sql(s))
535 Selection._delete(self)
537 @filldocs
538 def add(self,
539 paths,
540 kinds=None,
541 format='detect',
542 include=None,
543 exclude=None,
544 check=True):
546 '''
547 Add files to the selection.
549 :param paths:
550 Iterator yielding paths to files or directories to be added to the
551 selection. Recurses into directories. If given a ``str``, it
552 is treated as a single path to be added.
553 :type paths:
554 :py:class:`list` of :py:class:`str`
556 :param kinds:
557 Content types to be made available through the Squirrel selection.
558 By default, all known content types are accepted.
559 :type kinds:
560 :py:class:`list` of :py:class:`str`
562 :param format:
563 File format identifier or ``'detect'`` to enable auto-detection
564 (available: %(file_formats)s).
565 :type format:
566 str
568 :param include:
569 If not ``None``, files are only included if their paths match the
570 given regular expression pattern.
571 :type format:
572 str
574 :param exclude:
575 If not ``None``, files are only included if their paths do not
576 match the given regular expression pattern.
577 :type format:
578 str
580 :param check:
581 If ``True``, all file modification times are checked to see if
582 cached information has to be updated (slow). If ``False``, only
583 previously unknown files are indexed and cached information is used
584 for known files, regardless of file state (fast, corrresponds to
585 Squirrel's ``--optimistic`` mode). File deletions will go
586 undetected in the latter case.
587 :type check:
588 bool
590 :Complexity:
591 O(log N)
592 '''
594 if isinstance(kinds, str):
595 kinds = (kinds,)
597 if isinstance(paths, str):
598 paths = [paths]
600 kind_mask = model.to_kind_mask(kinds)
602 with progress.view():
603 Selection.add(
604 self, util.iter_select_files(
605 paths,
606 show_progress=False,
607 include=include,
608 exclude=exclude,
609 pass_through=lambda path: path.startswith('virtual:')
610 ), kind_mask, format)
612 self._load(check)
613 self._update_nuts()
615 def reload(self):
616 '''
617 Check for modifications and reindex modified files.
619 Based on file modification times.
620 '''
622 self._set_file_states_force_check()
623 self._load(check=True)
624 self._update_nuts()
626 def add_virtual(self, nuts, virtual_paths=None):
627 '''
628 Add content which is not backed by files.
630 :param nuts:
631 Content pieces to be added.
632 :type nuts:
633 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
635 :param virtual_paths:
636 List of virtual paths to prevent creating a temporary list of the
637 nuts while aggregating the file paths for the selection.
638 :type virtual_paths:
639 :py:class:`list` of :py:class:`str`
641 Stores to the main database and the selection.
642 '''
644 if isinstance(virtual_paths, str):
645 virtual_paths = [virtual_paths]
647 if virtual_paths is None:
648 if not isinstance(nuts, list):
649 nuts = list(nuts)
650 virtual_paths = set(nut.file_path for nut in nuts)
652 Selection.add(self, virtual_paths)
653 self.get_database().dig(nuts)
654 self._update_nuts()
656 def add_volatile(self, nuts):
657 if not isinstance(nuts, list):
658 nuts = list(nuts)
660 paths = list(set(nut.file_path for nut in nuts))
661 io.backends.virtual.add_nuts(nuts)
662 self.add_virtual(nuts, paths)
663 self._volatile_paths.extend(paths)
665 def add_volatile_waveforms(self, traces):
666 '''
667 Add in-memory waveforms which will be removed when the app closes.
668 '''
670 name = model.random_name()
672 path = 'virtual:volatile:%s' % name
674 nuts = []
675 for itr, tr in enumerate(traces):
676 assert tr.tmin <= tr.tmax
677 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
678 tmax_seconds, tmax_offset = model.tsplit(
679 tr.tmin + tr.data_len()*tr.deltat)
681 nuts.append(model.Nut(
682 file_path=path,
683 file_format='virtual',
684 file_segment=itr,
685 file_element=0,
686 file_mtime=0,
687 codes=tr.codes,
688 tmin_seconds=tmin_seconds,
689 tmin_offset=tmin_offset,
690 tmax_seconds=tmax_seconds,
691 tmax_offset=tmax_offset,
692 deltat=tr.deltat,
693 kind_id=to_kind_id('waveform'),
694 content=tr))
696 self.add_volatile(nuts)
697 return path
699 def _load(self, check):
700 for _ in io.iload(
701 self,
702 content=[],
703 skip_unchanged=True,
704 check=check):
705 pass
707 def _update_nuts(self, transaction=None):
708 transaction = transaction or self.transaction('update nuts')
709 with make_task('Aggregating selection') as task, \
710 transaction as cursor:
712 self._conn.set_progress_handler(task.update, 100000)
713 nrows = cursor.execute(self._sql(
714 '''
715 INSERT INTO %(db)s.%(nuts)s
716 SELECT NULL,
717 nuts.file_id, nuts.file_segment, nuts.file_element,
718 nuts.kind_id, nuts.kind_codes_id,
719 nuts.tmin_seconds, nuts.tmin_offset,
720 nuts.tmax_seconds, nuts.tmax_offset,
721 nuts.kscale
722 FROM %(db)s.%(file_states)s
723 INNER JOIN nuts
724 ON %(db)s.%(file_states)s.file_id == nuts.file_id
725 INNER JOIN kind_codes
726 ON nuts.kind_codes_id ==
727 kind_codes.kind_codes_id
728 WHERE %(db)s.%(file_states)s.file_state != 2
729 AND (((1 << kind_codes.kind_id)
730 & %(db)s.%(file_states)s.kind_mask) != 0)
731 ''')).rowcount
733 task.update(nrows)
734 self._set_file_states_known(transaction)
735 self._conn.set_progress_handler(None, 0)
737 def add_source(self, source, check=True):
738 '''
739 Add remote resource.
741 :param source:
742 Remote data access client instance.
743 :type source:
744 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
745 '''
747 self._sources.append(source)
748 source.setup(self, check=check)
750 def add_fdsn(self, *args, **kwargs):
751 '''
752 Add FDSN site for transparent remote data access.
754 Arguments are passed to
755 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
756 '''
758 self.add_source(fdsn.FDSNSource(*args, **kwargs))
760 def add_catalog(self, *args, **kwargs):
761 '''
762 Add online catalog for transparent event data access.
764 Arguments are passed to
765 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
766 '''
768 self.add_source(catalog.CatalogSource(*args, **kwargs))
770 def add_dataset(self, ds, check=True):
771 '''
772 Read dataset description from file and add its contents.
774 :param ds:
775 Path to dataset description file or dataset description object
776 . See :py:mod:`~pyrocko.squirrel.dataset`.
777 :type ds:
778 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
780 :param check:
781 If ``True``, all file modification times are checked to see if
782 cached information has to be updated (slow). If ``False``, only
783 previously unknown files are indexed and cached information is used
784 for known files, regardless of file state (fast, corrresponds to
785 Squirrel's ``--optimistic`` mode). File deletions will go
786 undetected in the latter case.
787 :type check:
788 bool
789 '''
790 if isinstance(ds, str):
791 ds = dataset.read_dataset(ds)
793 ds.setup(self, check=check)
795 def _get_selection_args(
796 self, kind_id,
797 obj=None, tmin=None, tmax=None, time=None, codes=None):
799 if codes is not None:
800 codes = codes_patterns_for_kind(kind_id, codes)
802 if time is not None:
803 tmin = time
804 tmax = time
806 if obj is not None:
807 tmin = tmin if tmin is not None else obj.tmin
808 tmax = tmax if tmax is not None else obj.tmax
809 codes = codes if codes is not None else codes_patterns_for_kind(
810 kind_id, obj.codes)
812 return tmin, tmax, codes
814 def _get_selection_args_str(self, *args, **kwargs):
816 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
817 return 'tmin: %s, tmax: %s, codes: %s' % (
818 util.time_to_str(tmin) if tmin is not None else 'none',
819 util.time_to_str(tmax) if tmax is not None else 'none',
820 ','.join(str(entry) for entry in codes))
822 def _selection_args_to_kwargs(
823 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
825 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
827 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
829 tmin_seconds, tmin_offset = model.tsplit(tmin)
830 tmax_seconds, tmax_offset = model.tsplit(tmax)
831 if naiv:
832 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
833 args.append(tmax_seconds)
834 else:
835 tscale_edges = model.tscale_edges
836 tmin_cond = []
837 for kscale in range(tscale_edges.size + 1):
838 if kscale != tscale_edges.size:
839 tscale = int(tscale_edges[kscale])
840 tmin_cond.append('''
841 (%(db)s.%(nuts)s.kind_id = ?
842 AND %(db)s.%(nuts)s.kscale == ?
843 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
844 ''')
845 args.extend(
846 (to_kind_id(kind), kscale,
847 tmin_seconds - tscale - 1, tmax_seconds + 1))
849 else:
850 tmin_cond.append('''
851 (%(db)s.%(nuts)s.kind_id == ?
852 AND %(db)s.%(nuts)s.kscale == ?
853 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
854 ''')
856 args.extend(
857 (to_kind_id(kind), kscale, tmax_seconds + 1))
858 if tmin_cond:
859 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
861 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
862 args.append(tmin_seconds)
864 def _codes_match_sql(self, kind_id, codes, cond, args):
865 pats = codes_patterns_for_kind(kind_id, codes)
866 if pats is None:
867 return
869 pats_exact = []
870 pats_nonexact = []
871 for pat in pats:
872 spat = pat.safe_str
873 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
875 cond_exact = None
876 if pats_exact:
877 cond_exact = ' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
878 '?'*len(pats_exact))
880 args.extend(pats_exact)
882 cond_nonexact = None
883 if pats_nonexact:
884 cond_nonexact = ' ( %s ) ' % ' OR '.join(
885 ('kind_codes.codes GLOB ?',) * len(pats_nonexact))
887 args.extend(pats_nonexact)
889 if cond_exact and cond_nonexact:
890 cond.append(' ( %s OR %s ) ' % (cond_exact, cond_nonexact))
892 elif cond_exact:
893 cond.append(cond_exact)
895 elif cond_nonexact:
896 cond.append(cond_nonexact)
898 def iter_nuts(
899 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False,
900 kind_codes_ids=None, path=None, limit=None):
902 '''
903 Iterate over content entities matching given constraints.
905 :param kind:
906 Content kind (or kinds) to extract.
907 :type kind:
908 :py:class:`str`, :py:class:`list` of :py:class:`str`
910 :param tmin:
911 Start time of query interval.
912 :type tmin:
913 timestamp
915 :param tmax:
916 End time of query interval.
917 :type tmax:
918 timestamp
920 :param codes:
921 List of code patterns to query.
922 :type codes:
923 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
924 objects appropriate for the queried content type, or anything which
925 can be converted to such objects.
927 :param naiv:
928 Bypass time span lookup through indices (slow, for testing).
929 :type naiv:
930 :py:class:`bool`
932 :param kind_codes_ids:
933 Kind-codes IDs of contents to be retrieved (internal use).
934 :type kind_codes_ids:
935 :py:class:`list` of :py:class:`int`
937 :yields:
938 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
939 intersecting content.
941 :complexity:
942 O(log N) for the time selection part due to heavy use of database
943 indices.
945 Query time span is treated as a half-open interval ``[tmin, tmax)``.
946 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
947 closed-interval so that content intersecting with the time instant ``t
948 = tmin = tmax`` is returned (otherwise nothing would be returned as
949 ``[t, t)`` never matches anything).
951 Time spans of content entities to be matched are also treated as half
952 open intervals, e.g. content span ``[0, 1)`` is matched by query span
953 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
954 modified to closed-interval when the content time span is an empty
955 interval, i.e. to indicate a time instant. E.g. time instant 0 is
956 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
957 '''
959 if not isinstance(kind, str):
960 if kind is None:
961 kind = model.g_content_kinds
962 for kind_ in kind:
963 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
964 yield nut
966 return
968 kind_id = to_kind_id(kind)
970 cond = []
971 args = []
972 if tmin is not None or tmax is not None:
973 assert kind is not None
974 if tmin is None:
975 tmin = self.get_time_span()[0]
976 if tmax is None:
977 tmax = self.get_time_span()[1] + 1.0
979 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
981 cond.append('kind_codes.kind_id == ?')
982 args.append(kind_id)
984 if codes is not None:
985 self._codes_match_sql(kind_id, codes, cond, args)
987 if kind_codes_ids is not None:
988 cond.append(
989 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
990 '?'*len(kind_codes_ids)))
992 args.extend(kind_codes_ids)
994 db = self.get_database()
995 if path is not None:
996 cond.append('files.path == ?')
997 args.append(db.relpath(abspath(path)))
999 sql = ('''
1000 SELECT
1001 files.path,
1002 files.format,
1003 files.mtime,
1004 files.size,
1005 %(db)s.%(nuts)s.file_segment,
1006 %(db)s.%(nuts)s.file_element,
1007 kind_codes.kind_id,
1008 kind_codes.codes,
1009 %(db)s.%(nuts)s.tmin_seconds,
1010 %(db)s.%(nuts)s.tmin_offset,
1011 %(db)s.%(nuts)s.tmax_seconds,
1012 %(db)s.%(nuts)s.tmax_offset,
1013 kind_codes.deltat
1014 FROM files
1015 INNER JOIN %(db)s.%(nuts)s
1016 ON files.file_id == %(db)s.%(nuts)s.file_id
1017 INNER JOIN kind_codes
1018 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1019 ''')
1021 if cond:
1022 sql += ''' WHERE ''' + ' AND '.join(cond)
1024 if limit is not None:
1025 sql += ''' LIMIT %i''' % limit
1027 sql = self._sql(sql)
1028 if tmin is None and tmax is None:
1029 for row in self._conn.execute(sql, args):
1030 row = (db.abspath(row[0]),) + row[1:]
1031 nut = model.Nut(values_nocheck=row)
1032 yield nut
1033 else:
1034 assert tmin is not None and tmax is not None
1035 if tmin == tmax:
1036 for row in self._conn.execute(sql, args):
1037 row = (db.abspath(row[0]),) + row[1:]
1038 nut = model.Nut(values_nocheck=row)
1039 if (nut.tmin <= tmin < nut.tmax) \
1040 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1042 yield nut
1043 else:
1044 for row in self._conn.execute(sql, args):
1045 row = (db.abspath(row[0]),) + row[1:]
1046 nut = model.Nut(values_nocheck=row)
1047 if (tmin < nut.tmax and nut.tmin < tmax) \
1048 or (nut.tmin == nut.tmax
1049 and tmin <= nut.tmin < tmax):
1051 yield nut
1053 def get_nuts(self, *args, **kwargs):
1054 '''
1055 Get content entities matching given constraints.
1057 Like :py:meth:`iter_nuts` but returns results as a list.
1058 '''
1060 return list(self.iter_nuts(*args, **kwargs))
1062 def _split_nuts(
1063 self, kind, tmin=None, tmax=None, codes=None, path=None):
1065 kind_id = to_kind_id(kind)
1066 tmin_seconds, tmin_offset = model.tsplit(tmin)
1067 tmax_seconds, tmax_offset = model.tsplit(tmax)
1069 names_main_nuts = dict(self._names)
1070 names_main_nuts.update(db='main', nuts='nuts')
1072 db = self.get_database()
1074 def main_nuts(s):
1075 return s % names_main_nuts
1077 with self.transaction('split nuts') as cursor:
1078 # modify selection and main
1079 for sql_subst in [
1080 self._sql, main_nuts]:
1082 cond = []
1083 args = []
1085 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1087 if codes is not None:
1088 self._codes_match_sql(kind_id, codes, cond, args)
1090 if path is not None:
1091 cond.append('files.path == ?')
1092 args.append(db.relpath(abspath(path)))
1094 sql = sql_subst('''
1095 SELECT
1096 %(db)s.%(nuts)s.nut_id,
1097 %(db)s.%(nuts)s.tmin_seconds,
1098 %(db)s.%(nuts)s.tmin_offset,
1099 %(db)s.%(nuts)s.tmax_seconds,
1100 %(db)s.%(nuts)s.tmax_offset,
1101 kind_codes.deltat
1102 FROM files
1103 INNER JOIN %(db)s.%(nuts)s
1104 ON files.file_id == %(db)s.%(nuts)s.file_id
1105 INNER JOIN kind_codes
1106 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1107 WHERE ''' + ' AND '.join(cond)) # noqa
1109 insert = []
1110 delete = []
1111 for row in cursor.execute(sql, args):
1112 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1113 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1115 nut_tmin = model.tjoin(
1116 nut_tmin_seconds, nut_tmin_offset)
1117 nut_tmax = model.tjoin(
1118 nut_tmax_seconds, nut_tmax_offset)
1120 if nut_tmin < tmax and tmin < nut_tmax:
1121 if nut_tmin < tmin:
1122 insert.append((
1123 nut_tmin_seconds, nut_tmin_offset,
1124 tmin_seconds, tmin_offset,
1125 model.tscale_to_kscale(
1126 tmin_seconds - nut_tmin_seconds),
1127 nut_id))
1129 if tmax < nut_tmax:
1130 insert.append((
1131 tmax_seconds, tmax_offset,
1132 nut_tmax_seconds, nut_tmax_offset,
1133 model.tscale_to_kscale(
1134 nut_tmax_seconds - tmax_seconds),
1135 nut_id))
1137 delete.append((nut_id,))
1139 sql_add = '''
1140 INSERT INTO %(db)s.%(nuts)s (
1141 file_id, file_segment, file_element, kind_id,
1142 kind_codes_id, tmin_seconds, tmin_offset,
1143 tmax_seconds, tmax_offset, kscale )
1144 SELECT
1145 file_id, file_segment, file_element,
1146 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1147 FROM %(db)s.%(nuts)s
1148 WHERE nut_id == ?
1149 '''
1150 cursor.executemany(sql_subst(sql_add), insert)
1152 sql_delete = '''
1153 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1154 '''
1155 cursor.executemany(sql_subst(sql_delete), delete)
1157 def get_time_span(self, kinds=None):
1158 '''
1159 Get time interval over all content in selection.
1161 :param kinds:
1162 If not ``None``, restrict query to given content kinds.
1163 :type kind:
1164 list of str
1166 :complexity:
1167 O(1), independent of the number of nuts.
1169 :returns:
1170 ``(tmin, tmax)``, combined time interval of queried content kinds.
1171 '''
1173 sql_min = self._sql('''
1174 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1175 FROM %(db)s.%(nuts)s
1176 WHERE kind_id == ?
1177 AND tmin_seconds == (
1178 SELECT MIN(tmin_seconds)
1179 FROM %(db)s.%(nuts)s
1180 WHERE kind_id == ?)
1181 ''')
1183 sql_max = self._sql('''
1184 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1185 FROM %(db)s.%(nuts)s
1186 WHERE kind_id == ?
1187 AND tmax_seconds == (
1188 SELECT MAX(tmax_seconds)
1189 FROM %(db)s.%(nuts)s
1190 WHERE kind_id == ?)
1191 ''')
1193 gtmin = None
1194 gtmax = None
1196 if isinstance(kinds, str):
1197 kinds = [kinds]
1199 if kinds is None:
1200 kind_ids = model.g_content_kind_ids
1201 else:
1202 kind_ids = model.to_kind_ids(kinds)
1204 for kind_id in kind_ids:
1205 for tmin_seconds, tmin_offset in self._conn.execute(
1206 sql_min, (kind_id, kind_id)):
1207 tmin = model.tjoin(tmin_seconds, tmin_offset)
1208 if tmin is not None and (gtmin is None or tmin < gtmin):
1209 gtmin = tmin
1211 for (tmax_seconds, tmax_offset) in self._conn.execute(
1212 sql_max, (kind_id, kind_id)):
1213 tmax = model.tjoin(tmax_seconds, tmax_offset)
1214 if tmax is not None and (gtmax is None or tmax > gtmax):
1215 gtmax = tmax
1217 return gtmin, gtmax
1219 def has(self, kinds):
1220 '''
1221 Check availability of given content kinds.
1223 :param kinds:
1224 Content kinds to query.
1225 :type kind:
1226 list of str
1228 :returns:
1229 ``True`` if any of the queried content kinds is available
1230 in the selection.
1231 '''
1232 self_tmin, self_tmax = self.get_time_span(kinds)
1234 return None not in (self_tmin, self_tmax)
1236 def get_deltat_span(self, kind):
1237 '''
1238 Get min and max sampling interval of all content of given kind.
1240 :param kind:
1241 Content kind
1242 :type kind:
1243 str
1245 :returns: ``(deltat_min, deltat_max)``
1246 '''
1248 deltats = [
1249 deltat for deltat in self.get_deltats(kind)
1250 if deltat is not None]
1252 if deltats:
1253 return min(deltats), max(deltats)
1254 else:
1255 return None, None
1257 def iter_kinds(self, codes=None):
1258 '''
1259 Iterate over content types available in selection.
1261 :param codes:
1262 If given, get kinds only for selected codes identifier.
1263 Only a single identifier may be given here and no pattern matching
1264 is done, currently.
1265 :type codes:
1266 :py:class:`~pyrocko.squirrel.model.Codes`
1268 :yields:
1269 Available content kinds as :py:class:`str`.
1271 :complexity:
1272 O(1), independent of number of nuts.
1273 '''
1275 return self._database._iter_kinds(
1276 codes=codes,
1277 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1279 def iter_deltats(self, kind=None):
1280 '''
1281 Iterate over sampling intervals available in selection.
1283 :param kind:
1284 If given, get sampling intervals only for a given content type.
1285 :type kind:
1286 str
1288 :yields:
1289 :py:class:`float` values.
1291 :complexity:
1292 O(1), independent of number of nuts.
1293 '''
1294 return self._database._iter_deltats(
1295 kind=kind,
1296 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1298 def iter_codes(self, kind=None):
1299 '''
1300 Iterate over content identifier code sequences available in selection.
1302 :param kind:
1303 If given, get codes only for a given content type.
1304 :type kind:
1305 str
1307 :yields:
1308 :py:class:`tuple` of :py:class:`str`
1310 :complexity:
1311 O(1), independent of number of nuts.
1312 '''
1313 return self._database._iter_codes(
1314 kind=kind,
1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1317 def _iter_codes_info(self, kind=None, codes=None):
1318 '''
1319 Iterate over number of occurrences of any (kind, codes) combination.
1321 :param kind:
1322 If given, get counts only for selected content type.
1323 :type kind:
1324 str
1326 :yields:
1327 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1329 :complexity:
1330 O(1), independent of number of nuts.
1331 '''
1332 return self._database._iter_codes_info(
1333 kind=kind,
1334 codes=codes,
1335 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1337 def get_kinds(self, codes=None):
1338 '''
1339 Get content types available in selection.
1341 :param codes:
1342 If given, get kinds only for selected codes identifier.
1343 Only a single identifier may be given here and no pattern matching
1344 is done, currently.
1345 :type codes:
1346 :py:class:`~pyrocko.squirrel.model.Codes`
1348 :returns:
1349 Sorted list of available content types.
1350 :rtype:
1351 py:class:`list` of :py:class:`str`
1353 :complexity:
1354 O(1), independent of number of nuts.
1356 '''
1357 return sorted(list(self.iter_kinds(codes=codes)))
1359 def get_deltats(self, kind=None):
1360 '''
1361 Get sampling intervals available in selection.
1363 :param kind:
1364 If given, get sampling intervals only for selected content type.
1365 :type kind:
1366 str
1368 :complexity:
1369 O(1), independent of number of nuts.
1371 :returns: Sorted list of available sampling intervals.
1372 '''
1373 return sorted(list(self.iter_deltats(kind=kind)))
1375 def get_codes(self, kind=None):
1376 '''
1377 Get identifier code sequences available in selection.
1379 :param kind:
1380 If given, get codes only for selected content type.
1381 :type kind:
1382 str
1384 :complexity:
1385 O(1), independent of number of nuts.
1387 :returns: Sorted list of available codes as tuples of strings.
1388 '''
1389 return sorted(list(self.iter_codes(kind=kind)))
1391 def get_counts(self, kind=None):
1392 '''
1393 Get number of occurrences of any (kind, codes) combination.
1395 :param kind:
1396 If given, get codes only for selected content type.
1397 :type kind:
1398 str
1400 :complexity:
1401 O(1), independent of number of nuts.
1403 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1404 if kind is not ``None``
1405 '''
1406 d = {}
1407 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1408 if kind_id not in d:
1409 v = d[kind_id] = {}
1410 else:
1411 v = d[kind_id]
1413 if codes not in v:
1414 v[codes] = 0
1416 v[codes] += count
1418 if kind is not None:
1419 return d[to_kind_id(kind)]
1420 else:
1421 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1423 def glob_codes(self, kind, codes):
1424 '''
1425 Find codes matching given patterns.
1427 :param kind:
1428 Content kind to be queried.
1429 :type kind:
1430 str
1432 :param codes:
1433 List of code patterns to query.
1434 :type codes:
1435 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1436 objects appropriate for the queried content type, or anything which
1437 can be converted to such objects.
1439 :returns:
1440 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1441 '''
1443 kind_id = to_kind_id(kind)
1444 args = [kind_id]
1445 pats = codes_patterns_for_kind(kind_id, codes)
1447 if pats:
1448 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1449 ('kind_codes.codes GLOB ?',) * len(pats))
1451 args.extend(pat.safe_str for pat in pats)
1452 else:
1453 codes_cond = ''
1455 sql = self._sql('''
1456 SELECT kind_codes_id, codes, deltat FROM kind_codes
1457 WHERE
1458 kind_id == ? ''' + codes_cond)
1460 return list(map(list, self._conn.execute(sql, args)))
1462 def update(self, constraint=None, **kwargs):
1463 '''
1464 Update or partially update channel and event inventories.
1466 :param constraint:
1467 Selection of times or areas to be brought up to date.
1468 :type constraint:
1469 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1471 :param \\*\\*kwargs:
1472 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1474 This function triggers all attached remote sources, to check for
1475 updates in the meta-data. The sources will only submit queries when
1476 their expiration date has passed, or if the selection spans into
1477 previously unseen times or areas.
1478 '''
1480 if constraint is None:
1481 constraint = client.Constraint(**kwargs)
1483 for source in self._sources:
1484 source.update_channel_inventory(self, constraint)
1485 source.update_event_inventory(self, constraint)
1487 def update_waveform_promises(self, constraint=None, **kwargs):
1488 '''
1489 Permit downloading of remote waveforms.
1491 :param constraint:
1492 Remote waveforms compatible with the given constraint are enabled
1493 for download.
1494 :type constraint:
1495 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1497 :param \\*\\*kwargs:
1498 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1500 Calling this method permits Squirrel to download waveforms from remote
1501 sources when processing subsequent waveform requests. This works by
1502 inserting so called waveform promises into the database. It will look
1503 into the available channels for each remote source and create a promise
1504 for each channel compatible with the given constraint. If the promise
1505 then matches in a waveform request, Squirrel tries to download the
1506 waveform. If the download is successful, the downloaded waveform is
1507 added to the Squirrel and the promise is deleted. If the download
1508 fails, the promise is kept if the reason of failure looks like being
1509 temporary, e.g. because of a network failure. If the cause of failure
1510 however seems to be permanent, the promise is deleted so that no
1511 further attempts are made to download a waveform which might not be
1512 available from that server at all. To force re-scheduling after a
1513 permanent failure, call :py:meth:`update_waveform_promises`
1514 yet another time.
1515 '''
1517 if constraint is None:
1518 constraint = client.Constraint(**kwargs)
1520 for source in self._sources:
1521 source.update_waveform_promises(self, constraint)
1523 def remove_waveform_promises(self, from_database='selection'):
1524 '''
1525 Remove waveform promises from live selection or global database.
1527 Calling this function removes all waveform promises provided by the
1528 attached sources.
1530 :param from_database:
1531 Remove from live selection ``'selection'`` or global database
1532 ``'global'``.
1533 '''
1534 for source in self._sources:
1535 source.remove_waveform_promises(self, from_database=from_database)
1537 def update_responses(self, constraint=None, **kwargs):
1538 if constraint is None:
1539 constraint = client.Constraint(**kwargs)
1541 for source in self._sources:
1542 source.update_response_inventory(self, constraint)
1544 def get_nfiles(self):
1545 '''
1546 Get number of files in selection.
1547 '''
1549 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1550 for row in self._conn.execute(sql):
1551 return row[0]
1553 def get_nnuts(self):
1554 '''
1555 Get number of nuts in selection.
1556 '''
1558 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1559 for row in self._conn.execute(sql):
1560 return row[0]
1562 def get_total_size(self):
1563 '''
1564 Get aggregated file size available in selection.
1565 '''
1567 sql = self._sql('''
1568 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1569 INNER JOIN files
1570 ON %(db)s.%(file_states)s.file_id = files.file_id
1571 ''')
1573 for row in self._conn.execute(sql):
1574 return row[0] or 0
1576 def get_stats(self):
1577 '''
1578 Get statistics on contents available through this selection.
1579 '''
1581 kinds = self.get_kinds()
1582 time_spans = {}
1583 for kind in kinds:
1584 time_spans[kind] = self.get_time_span([kind])
1586 return SquirrelStats(
1587 nfiles=self.get_nfiles(),
1588 nnuts=self.get_nnuts(),
1589 kinds=kinds,
1590 codes=self.get_codes(),
1591 total_size=self.get_total_size(),
1592 counts=self.get_counts(),
1593 time_spans=time_spans,
1594 sources=[s.describe() for s in self._sources],
1595 operators=[op.describe() for op in self._operators])
1597 @filldocs
1598 def check(
1599 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1600 ignore=[]):
1601 '''
1602 Check for common data/metadata problems.
1604 %(query_args)s
1606 :param ignore:
1607 Problem types to be ignored.
1608 :type ignore:
1609 :class:`list` of :class:`str`
1610 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)
1612 :returns:
1613 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object
1614 containing the results of the check.
1616 See :py:func:`~pyrocko.squirrel.check.do_check`.
1617 '''
1619 from .check import do_check
1620 tmin, tmax, codes = self._get_selection_args(
1621 CHANNEL, obj, tmin, tmax, time, codes)
1623 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)
1625 def get_content(
1626 self,
1627 nut,
1628 cache_id='default',
1629 accessor_id='default',
1630 show_progress=False,
1631 model='squirrel'):
1633 '''
1634 Get and possibly load full content for a given index entry from file.
1636 Loads the actual content objects (channel, station, waveform, ...) from
1637 file. For efficiency, sibling content (all stuff in the same file
1638 segment) will also be loaded as a side effect. The loaded contents are
1639 cached in the Squirrel object.
1640 '''
1642 content_cache = self._content_caches[cache_id]
1643 if not content_cache.has(nut):
1645 for nut_loaded in io.iload(
1646 nut.file_path,
1647 segment=nut.file_segment,
1648 format=nut.file_format,
1649 database=self._database,
1650 update_selection=self,
1651 show_progress=show_progress):
1653 content_cache.put(nut_loaded)
1655 try:
1656 return content_cache.get(nut, accessor_id, model)
1658 except KeyError:
1659 raise error.NotAvailable(
1660 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1662 def advance_accessor(self, accessor_id='default', cache_id=None):
1663 '''
1664 Notify memory caches about consumer moving to a new data batch.
1666 :param accessor_id:
1667 Name of accessing consumer to be advanced.
1668 :type accessor_id:
1669 str
1671 :param cache_id:
1672 Name of cache to for which the accessor should be advanced. By
1673 default the named accessor is advanced in all registered caches.
1674 By default, two caches named ``'default'`` and ``'waveform'`` are
1675 available.
1676 :type cache_id:
1677 str
1679 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1680 Squirrel's memory caching works and can be tuned. Default behaviour is
1681 to release data when it has not been used in the latest data
1682 window/batch. If the accessor is never advanced, data is cached
1683 indefinitely - which is often desired e.g. for station meta-data.
1684 Methods for consecutive data traversal, like
1685 :py:meth:`chopper_waveforms` automatically advance and clear
1686 their accessor.
1687 '''
1688 for cache_ in (
1689 self._content_caches.keys()
1690 if cache_id is None
1691 else [cache_id]):
1693 self._content_caches[cache_].advance_accessor(accessor_id)
1695 def clear_accessor(self, accessor_id, cache_id=None):
1696 '''
1697 Notify memory caches about a consumer having finished.
1699 :param accessor_id:
1700 Name of accessor to be cleared.
1701 :type accessor_id:
1702 str
1704 :param cache_id:
1705 Name of cache for which the accessor should be cleared. By default
1706 the named accessor is cleared from all registered caches. By
1707 default, two caches named ``'default'`` and ``'waveform'`` are
1708 available.
1709 :type cache_id:
1710 str
1712 Calling this method clears all references to cache entries held by the
1713 named accessor. Cache entries are then freed if not referenced by any
1714 other accessor.
1715 '''
1717 for cache_ in (
1718 self._content_caches.keys()
1719 if cache_id is None
1720 else [cache_id]):
1722 self._content_caches[cache_].clear_accessor(accessor_id)
1724 def get_cache_stats(self, cache_id):
1725 return self._content_caches[cache_id].get_stats()
1727 @filldocs
1728 def get_stations(
1729 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1730 model='squirrel'):
1732 '''
1733 Get stations matching given constraints.
1735 %(query_args)s
1737 :param model:
1738 Select object model for returned values: ``'squirrel'`` to get
1739 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1740 objects with channel information attached.
1741 :type model:
1742 str
1744 :returns:
1745 List of :py:class:`pyrocko.squirrel.Station
1746 <pyrocko.squirrel.model.Station>` objects by default or list of
1747 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1748 objects if ``model='pyrocko'`` is requested.
1750 See :py:meth:`iter_nuts` for details on time span matching.
1751 '''
1753 if model == 'pyrocko':
1754 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes)
1755 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1756 args = self._get_selection_args(
1757 STATION, obj, tmin, tmax, time, codes)
1759 nuts = sorted(
1760 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1762 return [self.get_content(nut, model=model) for nut in nuts]
1763 else:
1764 raise ValueError('Invalid station model: %s' % model)
1766 @filldocs
1767 def get_channels(
1768 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1769 model='squirrel'):
1771 '''
1772 Get channels matching given constraints.
1774 %(query_args)s
1776 :returns:
1777 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1779 See :py:meth:`iter_nuts` for details on time span matching.
1780 '''
1782 args = self._get_selection_args(
1783 CHANNEL, obj, tmin, tmax, time, codes)
1785 nuts = sorted(
1786 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1788 return [self.get_content(nut, model=model) for nut in nuts]
1790 @filldocs
1791 def get_sensors(
1792 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1794 '''
1795 Get sensors matching given constraints.
1797 %(query_args)s
1799 :returns:
1800 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1802 See :py:meth:`iter_nuts` for details on time span matching.
1803 '''
1805 tmin, tmax, codes = self._get_selection_args(
1806 CHANNEL, obj, tmin, tmax, time, codes)
1808 if codes is not None:
1809 codes = codes_patterns_list(
1810 (entry.replace(channel=entry.channel[:-1] + '?')
1811 if entry.channel != '*' else entry)
1812 for entry in codes)
1814 nuts = sorted(
1815 self.iter_nuts(
1816 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1818 return [
1819 sensor for sensor in model.Sensor.from_channels(
1820 self.get_content(nut) for nut in nuts)
1821 if match_time_span(tmin, tmax, sensor)]
1823 @filldocs
1824 def get_responses(
1825 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1826 model='squirrel'):
1828 '''
1829 Get instrument responses matching given constraints.
1831 %(query_args)s
1833 :returns:
1834 List of :py:class:`~pyrocko.squirrel.model.Response` objects.
1836 See :py:meth:`iter_nuts` for details on time span matching.
1837 '''
1839 args = self._get_selection_args(
1840 RESPONSE, obj, tmin, tmax, time, codes)
1842 nuts = sorted(
1843 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1845 return [self.get_content(nut, model=model) for nut in nuts]
1847 @filldocs
1848 def get_response(
1849 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1850 model='squirrel'):
1852 '''
1853 Get instrument response matching given constraints.
1855 %(query_args)s
1857 :returns:
1858 :py:class:`~pyrocko.squirrel.model.Response` object.
1860 Same as :py:meth:`get_responses` but returning exactly one response.
1861 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more
1862 than one is available.
1864 See :py:meth:`iter_nuts` for details on time span matching.
1865 '''
1867 if model == 'stationxml':
1868 model_ = 'stationxml+'
1869 else:
1870 model_ = model
1872 responses = self.get_responses(
1873 obj, tmin, tmax, time, codes, model=model_)
1874 if len(responses) == 0:
1875 raise error.NotAvailable(
1876 'No instrument response available (%s).'
1877 % self._get_selection_args_str(
1878 RESPONSE, obj, tmin, tmax, time, codes))
1880 elif len(responses) > 1:
1881 if model_ == 'squirrel':
1882 resps_sq = responses
1883 elif model_ == 'stationxml+':
1884 resps_sq = [resp[0] for resp in responses]
1885 else:
1886 raise ValueError('Invalid response model: %s' % model)
1888 rinfo = ':\n' + '\n'.join(
1889 ' ' + resp.summary for resp in resps_sq)
1891 raise error.NotAvailable(
1892 'Multiple instrument responses matching given constraints '
1893 '(%s)%s' % (
1894 self._get_selection_args_str(
1895 RESPONSE, obj, tmin, tmax, time, codes), rinfo))
1897 if model == 'stationxml':
1898 return responses[0][1]
1899 else:
1900 return responses[0]
1902 @filldocs
1903 def get_events(
1904 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1906 '''
1907 Get events matching given constraints.
1909 %(query_args)s
1911 :returns:
1912 List of :py:class:`~pyrocko.model.event.Event` objects.
1914 See :py:meth:`iter_nuts` for details on time span matching.
1915 '''
1917 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
1918 nuts = sorted(
1919 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
1921 return [self.get_content(nut) for nut in nuts]
1923 def _redeem_promises(self, *args, order_only=False):
1925 def split_promise(order):
1926 self._split_nuts(
1927 'waveform_promise',
1928 order.tmin, order.tmax,
1929 codes=order.codes,
1930 path=order.source_id)
1932 tmin, tmax, _ = args
1934 waveforms = list(self.iter_nuts('waveform', *args))
1935 promises = list(self.iter_nuts('waveform_promise', *args))
1937 codes_to_avail = defaultdict(list)
1938 for nut in waveforms:
1939 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
1941 def tts(x):
1942 if isinstance(x, tuple):
1943 return tuple(tts(e) for e in x)
1944 elif isinstance(x, list):
1945 return list(tts(e) for e in x)
1946 else:
1947 return util.time_to_str(x)
1949 orders = []
1950 for promise in promises:
1951 waveforms_avail = codes_to_avail[promise.codes]
1952 for block_tmin, block_tmax in blocks(
1953 max(tmin, promise.tmin),
1954 min(tmax, promise.tmax),
1955 promise.deltat):
1957 orders.append(
1958 WaveformOrder(
1959 source_id=promise.file_path,
1960 codes=promise.codes,
1961 tmin=block_tmin,
1962 tmax=block_tmax,
1963 deltat=promise.deltat,
1964 gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
1966 orders_noop, orders = lpick(lambda order: order.gaps, orders)
1968 order_keys_noop = set(order_key(order) for order in orders_noop)
1969 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
1970 logger.info(
1971 'Waveform orders already satisified with cached/local data: '
1972 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
1974 for order in orders_noop:
1975 split_promise(order)
1977 if order_only:
1978 if orders:
1979 self._pending_orders.extend(orders)
1980 logger.info(
1981 'Enqueuing %i waveform order%s.'
1982 % len_plural(orders))
1983 return
1984 else:
1985 if self._pending_orders:
1986 orders.extend(self._pending_orders)
1987 logger.info(
1988 'Adding %i previously enqueued order%s.'
1989 % len_plural(self._pending_orders))
1991 self._pending_orders = []
1993 source_ids = []
1994 sources = {}
1995 for source in self._sources:
1996 if isinstance(source, fdsn.FDSNSource):
1997 source_ids.append(source._source_id)
1998 sources[source._source_id] = source
2000 source_priority = dict(
2001 (source_id, i) for (i, source_id) in enumerate(source_ids))
2003 order_groups = defaultdict(list)
2004 for order in orders:
2005 order_groups[order_key(order)].append(order)
2007 for k, order_group in order_groups.items():
2008 order_group.sort(
2009 key=lambda order: source_priority[order.source_id])
2011 n_order_groups = len(order_groups)
2013 if len(order_groups) != 0 or len(orders) != 0:
2014 logger.info(
2015 'Waveform orders standing for download: %i (%i)'
2016 % (len(order_groups), len(orders)))
2018 task = make_task('Waveform orders processed', n_order_groups)
2019 else:
2020 task = None
2022 def release_order_group(order):
2023 okey = order_key(order)
2024 for followup in order_groups[okey]:
2025 split_promise(followup)
2027 del order_groups[okey]
2029 if task:
2030 task.update(n_order_groups - len(order_groups))
2032 def noop(order):
2033 pass
2035 def success(order):
2036 release_order_group(order)
2037 split_promise(order)
2039 def batch_add(paths):
2040 self.add(paths)
2042 calls = queue.Queue()
2044 def enqueue(f):
2045 def wrapper(*args):
2046 calls.put((f, args))
2048 return wrapper
2050 while order_groups:
2052 orders_now = []
2053 empty = []
2054 for k, order_group in order_groups.items():
2055 try:
2056 orders_now.append(order_group.pop(0))
2057 except IndexError:
2058 empty.append(k)
2060 for k in empty:
2061 del order_groups[k]
2063 by_source_id = defaultdict(list)
2064 for order in orders_now:
2065 by_source_id[order.source_id].append(order)
2067 threads = []
2068 for source_id in by_source_id:
2069 def download():
2070 try:
2071 sources[source_id].download_waveforms(
2072 by_source_id[source_id],
2073 success=enqueue(success),
2074 error_permanent=enqueue(split_promise),
2075 error_temporary=noop,
2076 batch_add=enqueue(batch_add))
2078 finally:
2079 calls.put(None)
2081 thread = threading.Thread(target=download)
2082 thread.start()
2083 threads.append(thread)
2085 ndone = 0
2086 while ndone < len(threads):
2087 ret = calls.get()
2088 if ret is None:
2089 ndone += 1
2090 else:
2091 ret[0](*ret[1])
2093 for thread in threads:
2094 thread.join()
2096 if task:
2097 task.update(n_order_groups - len(order_groups))
2099 if task:
2100 task.done()
2102 @filldocs
2103 def get_waveform_nuts(
2104 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2105 order_only=False):
2107 '''
2108 Get waveform content entities matching given constraints.
2110 %(query_args)s
2112 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2113 resolves matching waveform promises (downloads waveforms from remote
2114 sources).
2116 See :py:meth:`iter_nuts` for details on time span matching.
2117 '''
2119 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2120 self._redeem_promises(*args, order_only=order_only)
2121 return sorted(
2122 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2124 @filldocs
2125 def have_waveforms(
2126 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2128 '''
2129 Check if any waveforms or waveform promises are available for given
2130 constraints.
2132 %(query_args)s
2133 '''
2135 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2136 return bool(list(
2137 self.iter_nuts('waveform', *args, limit=1))) \
2138 or bool(list(
2139 self.iter_nuts('waveform_promise', *args, limit=1)))
2141 @filldocs
2142 def get_waveforms(
2143 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2144 uncut=False, want_incomplete=True, degap=True, maxgap=5,
2145 maxlap=None, snap=None, include_last=False, load_data=True,
2146 accessor_id='default', operator_params=None, order_only=False):
2148 '''
2149 Get waveforms matching given constraints.
2151 %(query_args)s
2153 :param uncut:
2154 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2155 and to disable degapping/deoverlapping. Returns untouched traces as
2156 they are read from file segment. File segments are always read in
2157 their entirety.
2158 :type uncut:
2159 bool
2161 :param want_incomplete:
2162 If ``True``, gappy/incomplete traces are included in the result.
2163 :type want_incomplete:
2164 bool
2166 :param degap:
2167 If ``True``, connect traces and remove gaps and overlaps.
2168 :type degap:
2169 bool
2171 :param maxgap:
2172 Maximum gap size in samples which is filled with interpolated
2173 samples when ``degap`` is ``True``.
2174 :type maxgap:
2175 int
2177 :param maxlap:
2178 Maximum overlap size in samples which is removed when ``degap`` is
2179 ``True``.
2180 :type maxlap:
2181 int
2183 :param snap:
2184 Rounding functions used when computing sample index from time
2185 instance, for trace start and trace end, respectively. By default,
2186 ``(round, round)`` is used.
2187 :type snap:
2188 tuple of 2 callables
2190 :param include_last:
2191 If ``True``, add one more sample to the returned traces (the sample
2192 which would be the first sample of a query with ``tmin`` set to the
2193 current value of ``tmax``).
2194 :type include_last:
2195 bool
2197 :param load_data:
2198 If ``True``, waveform data samples are read from files (or cache).
2199 If ``False``, meta-information-only traces are returned (dummy
2200 traces with no data samples).
2201 :type load_data:
2202 bool
2204 :param accessor_id:
2205 Name of consumer on who's behalf data is accessed. Used in cache
2206 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2207 to distinguish different points of extraction for the decision of
2208 when to release cached waveform data. Should be used when data is
2209 alternately extracted from more than one region / selection.
2210 :type accessor_id:
2211 str
2213 See :py:meth:`iter_nuts` for details on time span matching.
2215 Loaded data is kept in memory (at least) until
2216 :py:meth:`clear_accessor` has been called or
2217 :py:meth:`advance_accessor` has been called two consecutive times
2218 without data being accessed between the two calls (by this accessor).
2219 Data may still be further kept in the memory cache if held alive by
2220 consumers with a different ``accessor_id``.
2221 '''
2223 tmin, tmax, codes = self._get_selection_args(
2224 WAVEFORM, obj, tmin, tmax, time, codes)
2226 self_tmin, self_tmax = self.get_time_span(
2227 ['waveform', 'waveform_promise'])
2229 if None in (self_tmin, self_tmax):
2230 logger.warning(
2231 'No waveforms available.')
2232 return []
2234 tmin = tmin if tmin is not None else self_tmin
2235 tmax = tmax if tmax is not None else self_tmax
2237 if codes is not None and len(codes) == 1:
2238 # TODO: fix for multiple / mixed codes
2239 operator = self.get_operator(codes[0])
2240 if operator is not None:
2241 return operator.get_waveforms(
2242 self, codes[0],
2243 tmin=tmin, tmax=tmax,
2244 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2245 maxgap=maxgap, maxlap=maxlap, snap=snap,
2246 include_last=include_last, load_data=load_data,
2247 accessor_id=accessor_id, params=operator_params)
2249 nuts = self.get_waveform_nuts(
2250 obj, tmin, tmax, time, codes, order_only=order_only)
2252 if order_only:
2253 return []
2255 if load_data:
2256 traces = [
2257 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2259 else:
2260 traces = [
2261 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2263 if uncut:
2264 return traces
2266 if snap is None:
2267 snap = (round, round)
2269 chopped = []
2270 for tr in traces:
2271 if not load_data and tr.ydata is not None:
2272 tr = tr.copy(data=False)
2273 tr.ydata = None
2275 try:
2276 chopped.append(tr.chop(
2277 tmin, tmax,
2278 inplace=False,
2279 snap=snap,
2280 include_last=include_last))
2282 except trace.NoData:
2283 pass
2285 processed = self._process_chopped(
2286 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2288 return processed
2290 @filldocs
2291 def chopper_waveforms(
2292 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2293 tinc=None, tpad=0.,
2294 want_incomplete=True, snap_window=False,
2295 degap=True, maxgap=5, maxlap=None,
2296 snap=None, include_last=False, load_data=True,
2297 accessor_id=None, clear_accessor=True, operator_params=None,
2298 grouping=None):
2300 '''
2301 Iterate window-wise over waveform archive.
2303 %(query_args)s
2305 :param tinc:
2306 Time increment (window shift time) (default uses ``tmax-tmin``).
2307 :type tinc:
2308 timestamp
2310 :param tpad:
2311 Padding time appended on either side of the data window (window
2312 overlap is ``2*tpad``).
2313 :type tpad:
2314 timestamp
2316 :param want_incomplete:
2317 If ``True``, gappy/incomplete traces are included in the result.
2318 :type want_incomplete:
2319 bool
2321 :param snap_window:
2322 If ``True``, start time windows at multiples of tinc with respect
2323 to system time zero.
2324 :type snap_window:
2325 bool
2327 :param degap:
2328 If ``True``, connect traces and remove gaps and overlaps.
2329 :type degap:
2330 bool
2332 :param maxgap:
2333 Maximum gap size in samples which is filled with interpolated
2334 samples when ``degap`` is ``True``.
2335 :type maxgap:
2336 int
2338 :param maxlap:
2339 Maximum overlap size in samples which is removed when ``degap`` is
2340 ``True``.
2341 :type maxlap:
2342 int
2344 :param snap:
2345 Rounding functions used when computing sample index from time
2346 instance, for trace start and trace end, respectively. By default,
2347 ``(round, round)`` is used.
2348 :type snap:
2349 tuple of 2 callables
2351 :param include_last:
2352 If ``True``, add one more sample to the returned traces (the sample
2353 which would be the first sample of a query with ``tmin`` set to the
2354 current value of ``tmax``).
2355 :type include_last:
2356 bool
2358 :param load_data:
2359 If ``True``, waveform data samples are read from files (or cache).
2360 If ``False``, meta-information-only traces are returned (dummy
2361 traces with no data samples).
2362 :type load_data:
2363 bool
2365 :param accessor_id:
2366 Name of consumer on who's behalf data is accessed. Used in cache
2367 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2368 to distinguish different points of extraction for the decision of
2369 when to release cached waveform data. Should be used when data is
2370 alternately extracted from more than one region / selection.
2371 :type accessor_id:
2372 str
2374 :param clear_accessor:
2375 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2376 chopper finishes. Set to ``False`` to keep loaded waveforms in
2377 memory when the generator returns.
2378 :type clear_accessor:
2379 bool
2381 :param grouping:
2382 By default, traversal over the data is over time and all matching
2383 traces of a time window are yielded. Using this option, it is
2384 possible to traverse the data first by group (e.g. station or
2385 network) and second by time. This can reduce the number of traces
2386 in each batch and thus reduce the memory footprint of the process.
2387 :type grouping:
2388 :py:class:`~pyrocko.squirrel.operator.Grouping`
2390 :yields:
2391 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2392 extracted time window.
2394 See :py:meth:`iter_nuts` for details on time span matching.
2395 '''
2397 tmin, tmax, codes = self._get_selection_args(
2398 WAVEFORM, obj, tmin, tmax, time, codes)
2400 self_tmin, self_tmax = self.get_time_span(
2401 ['waveform', 'waveform_promise'])
2403 if None in (self_tmin, self_tmax):
2404 logger.warning(
2405 'Content has undefined time span. No waveforms and no '
2406 'waveform promises?')
2407 return
2409 if snap_window and tinc is not None:
2410 tmin = tmin if tmin is not None else self_tmin
2411 tmax = tmax if tmax is not None else self_tmax
2412 tmin = math.floor(tmin / tinc) * tinc
2413 tmax = math.ceil(tmax / tinc) * tinc
2414 else:
2415 tmin = tmin if tmin is not None else self_tmin + tpad
2416 tmax = tmax if tmax is not None else self_tmax - tpad
2418 tinc = tinc if tinc is not None else tmax - tmin
2420 try:
2421 if accessor_id is None:
2422 accessor_id = 'chopper%i' % self._n_choppers_active
2424 self._n_choppers_active += 1
2426 eps = tinc * 1e-6
2427 if tinc != 0.0:
2428 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2429 else:
2430 nwin = 1
2432 if grouping is None:
2433 codes_list = [codes]
2434 else:
2435 operator = Operator(
2436 filtering=CodesPatternFiltering(codes=codes),
2437 grouping=grouping)
2439 available = set(self.get_codes(kind='waveform'))
2440 available.update(self.get_codes(kind='waveform_promise'))
2441 operator.update_mappings(sorted(available))
2443 codes_list = [
2444 codes_patterns_list(scl)
2445 for scl in operator.iter_in_codes()]
2447 ngroups = len(codes_list)
2448 for igroup, scl in enumerate(codes_list):
2449 for iwin in range(nwin):
2450 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2452 chopped = self.get_waveforms(
2453 tmin=wmin-tpad,
2454 tmax=wmax+tpad,
2455 codes=scl,
2456 snap=snap,
2457 include_last=include_last,
2458 load_data=load_data,
2459 want_incomplete=want_incomplete,
2460 degap=degap,
2461 maxgap=maxgap,
2462 maxlap=maxlap,
2463 accessor_id=accessor_id,
2464 operator_params=operator_params)
2466 self.advance_accessor(accessor_id)
2468 yield Batch(
2469 tmin=wmin,
2470 tmax=wmax,
2471 i=iwin,
2472 n=nwin,
2473 igroup=igroup,
2474 ngroups=ngroups,
2475 traces=chopped)
2477 finally:
2478 self._n_choppers_active -= 1
2479 if clear_accessor:
2480 self.clear_accessor(accessor_id, 'waveform')
2482 def _process_chopped(
2483 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2485 chopped.sort(key=lambda a: a.full_id)
2486 if degap:
2487 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2489 if not want_incomplete:
2490 chopped_weeded = []
2491 for tr in chopped:
2492 emin = tr.tmin - tmin
2493 emax = tr.tmax + tr.deltat - tmax
2494 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2495 chopped_weeded.append(tr)
2497 elif degap:
2498 if (0. < emin <= 5. * tr.deltat
2499 and -5. * tr.deltat <= emax < 0.):
2501 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2502 chopped_weeded.append(tr)
2504 chopped = chopped_weeded
2506 return chopped
2508 def _get_pyrocko_stations(
2509 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2511 from pyrocko import model as pmodel
2513 if codes is not None:
2514 codes = codes_patterns_for_kind(STATION, codes)
2516 by_nsl = defaultdict(lambda: (list(), list()))
2517 for station in self.get_stations(obj, tmin, tmax, time, codes):
2518 sargs = station._get_pyrocko_station_args()
2519 by_nsl[station.codes.nsl][0].append(sargs)
2521 if codes is not None:
2522 codes = [model.CodesNSLCE(c) for c in codes]
2524 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2525 sargs = channel._get_pyrocko_station_args()
2526 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2527 sargs_list.append(sargs)
2528 channels_list.append(channel)
2530 pstations = []
2531 nsls = list(by_nsl.keys())
2532 nsls.sort()
2533 for nsl in nsls:
2534 sargs_list, channels_list = by_nsl[nsl]
2535 sargs = util.consistency_merge(
2536 [('',) + x for x in sargs_list])
2538 by_c = defaultdict(list)
2539 for ch in channels_list:
2540 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2542 chas = list(by_c.keys())
2543 chas.sort()
2544 pchannels = []
2545 for cha in chas:
2546 list_of_cargs = by_c[cha]
2547 cargs = util.consistency_merge(
2548 [('',) + x for x in list_of_cargs])
2549 pchannels.append(pmodel.Channel(*cargs))
2551 pstations.append(
2552 pmodel.Station(*sargs, channels=pchannels))
2554 return pstations
2556 @property
2557 def pile(self):
2559 '''
2560 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2562 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2563 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2564 but uses the fluffy power of the Squirrel under the hood.
2566 This interface can be used as a drop-in replacement for piles which are
2567 used in existing scripts and programs for efficient waveform data
2568 access. The Squirrel-based pile scales better for large datasets. Newer
2569 scripts should use Squirrel's native methods to avoid the emulation
2570 overhead.
2571 '''
2572 from . import pile
2574 if self._pile is None:
2575 self._pile = pile.Pile(self)
2577 return self._pile
2579 def snuffle(self):
2580 '''
2581 Look at dataset in Snuffler.
2582 '''
2583 self.pile.snuffle()
2585 def _gather_codes_keys(self, kind, gather, selector):
2586 return set(
2587 gather(codes)
2588 for codes in self.iter_codes(kind)
2589 if selector is None or selector(codes))
2591 def __str__(self):
2592 return str(self.get_stats())
2594 def get_coverage(
2595 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2597 '''
2598 Get coverage information.
2600 Get information about strips of gapless data coverage.
2602 :param kind:
2603 Content kind to be queried.
2604 :type kind:
2605 str
2607 :param tmin:
2608 Start time of query interval.
2609 :type tmin:
2610 timestamp
2612 :param tmax:
2613 End time of query interval.
2614 :type tmax:
2615 timestamp
2617 :param codes:
2618 If given, restrict query to given content codes patterns.
2619 :type codes:
2620 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2621 objects appropriate for the queried content type, or anything which
2622 can be converted to such objects.
2624 :param limit:
2625 Limit query to return only up to a given maximum number of entries
2626 per matching time series (without setting this option, very gappy
2627 data could cause the query to execute for a very long time).
2628 :type limit:
2629 int
2631 :returns:
2632 Information about time spans covered by the requested time series
2633 data.
2634 :rtype:
2635 :py:class:`list` of :py:class:`Coverage` objects
2636 '''
2638 tmin_seconds, tmin_offset = model.tsplit(tmin)
2639 tmax_seconds, tmax_offset = model.tsplit(tmax)
2640 kind_id = to_kind_id(kind)
2642 codes_info = list(self._iter_codes_info(kind=kind))
2644 kdata_all = []
2645 if codes is None:
2646 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2647 kdata_all.append(
2648 (codes_entry, kind_codes_id, codes_entry, deltat))
2650 else:
2651 for codes_entry in codes:
2652 pattern = to_codes(kind_id, codes_entry)
2653 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2654 if model.match_codes(pattern, codes_entry):
2655 kdata_all.append(
2656 (pattern, kind_codes_id, codes_entry, deltat))
2658 kind_codes_ids = [x[1] for x in kdata_all]
2660 counts_at_tmin = {}
2661 if tmin is not None:
2662 for nut in self.iter_nuts(
2663 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2665 k = nut.codes, nut.deltat
2666 if k not in counts_at_tmin:
2667 counts_at_tmin[k] = 0
2669 counts_at_tmin[k] += 1
2671 coverages = []
2672 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2673 entry = [pattern, codes_entry, deltat, None, None, []]
2674 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2675 sql = self._sql('''
2676 SELECT
2677 time_seconds,
2678 time_offset
2679 FROM %(db)s.%(coverage)s
2680 WHERE
2681 kind_codes_id == ?
2682 ORDER BY
2683 kind_codes_id ''' + order + ''',
2684 time_seconds ''' + order + ''',
2685 time_offset ''' + order + '''
2686 LIMIT 1
2687 ''')
2689 for row in self._conn.execute(sql, [kind_codes_id]):
2690 entry[3+i] = model.tjoin(row[0], row[1])
2692 if None in entry[3:5]:
2693 continue
2695 args = [kind_codes_id]
2697 sql_time = ''
2698 if tmin is not None:
2699 # intentionally < because (== tmin) is queried from nuts
2700 sql_time += ' AND ( ? < time_seconds ' \
2701 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2702 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2704 if tmax is not None:
2705 sql_time += ' AND ( time_seconds < ? ' \
2706 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2707 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2709 sql_limit = ''
2710 if limit is not None:
2711 sql_limit = ' LIMIT ?'
2712 args.append(limit)
2714 sql = self._sql('''
2715 SELECT
2716 time_seconds,
2717 time_offset,
2718 step
2719 FROM %(db)s.%(coverage)s
2720 WHERE
2721 kind_codes_id == ?
2722 ''' + sql_time + '''
2723 ORDER BY
2724 kind_codes_id,
2725 time_seconds,
2726 time_offset
2727 ''' + sql_limit)
2729 rows = list(self._conn.execute(sql, args))
2731 if limit is not None and len(rows) == limit:
2732 entry[-1] = None
2733 else:
2734 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2735 tlast = None
2736 if tmin is not None:
2737 entry[-1].append((tmin, counts))
2738 tlast = tmin
2740 for row in rows:
2741 t = model.tjoin(row[0], row[1])
2742 counts += row[2]
2743 entry[-1].append((t, counts))
2744 tlast = t
2746 if tmax is not None and (tlast is None or tlast != tmax):
2747 entry[-1].append((tmax, counts))
2749 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2751 return coverages
2753 def get_stationxml(
2754 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2755 level='response'):
2757 '''
2758 Get station/channel/response metadata in StationXML representation.
2760 %(query_args)s
2762 :returns:
2763 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2764 '''
2766 if level not in ('network', 'station', 'channel', 'response'):
2767 raise ValueError('Invalid level: %s' % level)
2769 tmin, tmax, codes = self._get_selection_args(
2770 CHANNEL, obj, tmin, tmax, time, codes)
2772 filtering = CodesPatternFiltering(codes=codes)
2774 nslcs = list(set(
2775 codes.nslc for codes in
2776 filtering.filter(self.get_codes(kind='channel'))))
2778 from pyrocko.io import stationxml as sx
2780 networks = []
2781 for net, stas in prefix_tree(nslcs):
2782 network = sx.Network(code=net)
2783 networks.append(network)
2785 if level not in ('station', 'channel', 'response'):
2786 continue
2788 for sta, locs in stas:
2789 stations = self.get_stations(
2790 tmin=tmin,
2791 tmax=tmax,
2792 codes=(net, sta, '*'),
2793 model='stationxml')
2795 errors = sx.check_overlaps(
2796 'Station', (net, sta), stations)
2798 if errors:
2799 raise sx.Inconsistencies(
2800 'Inconsistencies found:\n %s'
2801 % '\n '.join(errors))
2803 network.station_list.extend(stations)
2805 if level not in ('channel', 'response'):
2806 continue
2808 for loc, chas in locs:
2809 for cha, _ in chas:
2810 channels = self.get_channels(
2811 tmin=tmin,
2812 tmax=tmax,
2813 codes=(net, sta, loc, cha),
2814 model='stationxml')
2816 errors = sx.check_overlaps(
2817 'Channel', (net, sta, loc, cha), channels)
2819 if errors:
2820 raise sx.Inconsistencies(
2821 'Inconsistencies found:\n %s'
2822 % '\n '.join(errors))
2824 for channel in channels:
2825 station = sx.find_containing(stations, channel)
2826 if station is not None:
2827 station.channel_list.append(channel)
2828 else:
2829 raise sx.Inconsistencies(
2830 'No station or station epoch found for '
2831 'channel: %s' % '.'.join(
2832 (net, sta, loc, cha)))
2834 if level != 'response':
2835 continue
2837 response_sq, response_sx = self.get_response(
2838 codes=(net, sta, loc, cha),
2839 tmin=channel.start_date,
2840 tmax=channel.end_date,
2841 model='stationxml+')
2843 if not (
2844 sx.eq_open(
2845 channel.start_date, response_sq.tmin)
2846 and sx.eq_open(
2847 channel.end_date, response_sq.tmax)):
2849 raise sx.Inconsistencies(
2850 'Response time span does not match '
2851 'channel time span: %s' % '.'.join(
2852 (net, sta, loc, cha)))
2854 channel.response = response_sx
2856 return sx.FDSNStationXML(
2857 source='Generated by Pyrocko Squirrel.',
2858 network_list=networks)
2860 def add_operator(self, op):
2861 self._operators.append(op)
2863 def update_operator_mappings(self):
2864 available = self.get_codes(kind=('channel'))
2866 for operator in self._operators:
2867 operator.update_mappings(available, self._operator_registry)
2869 def iter_operator_mappings(self):
2870 for operator in self._operators:
2871 for in_codes, out_codes in operator.iter_mappings():
2872 yield operator, in_codes, out_codes
2874 def get_operator_mappings(self):
2875 return list(self.iter_operator_mappings())
2877 def get_operator(self, codes):
2878 try:
2879 return self._operator_registry[codes][0]
2880 except KeyError:
2881 return None
2883 def get_operator_group(self, codes):
2884 try:
2885 return self._operator_registry[codes]
2886 except KeyError:
2887 return None, (None, None, None)
2889 def iter_operator_codes(self):
2890 for _, _, out_codes in self.iter_operator_mappings():
2891 for codes in out_codes:
2892 yield codes
2894 def get_operator_codes(self):
2895 return list(self.iter_operator_codes())
2897 def print_tables(self, table_names=None, stream=None):
2898 '''
2899 Dump raw database tables in textual form (for debugging purposes).
2901 :param table_names:
2902 Names of tables to be dumped or ``None`` to dump all.
2903 :type table_names:
2904 :py:class:`list` of :py:class:`str`
2906 :param stream:
2907 Open file or ``None`` to dump to standard output.
2908 '''
2910 if stream is None:
2911 stream = sys.stdout
2913 if isinstance(table_names, str):
2914 table_names = [table_names]
2916 if table_names is None:
2917 table_names = [
2918 'selection_file_states',
2919 'selection_nuts',
2920 'selection_kind_codes_count',
2921 'files', 'nuts', 'kind_codes', 'kind_codes_count']
2923 m = {
2924 'selection_file_states': '%(db)s.%(file_states)s',
2925 'selection_nuts': '%(db)s.%(nuts)s',
2926 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
2927 'files': 'files',
2928 'nuts': 'nuts',
2929 'kind_codes': 'kind_codes',
2930 'kind_codes_count': 'kind_codes_count'}
2932 for table_name in table_names:
2933 self._database.print_table(
2934 m[table_name] % self._names, stream=stream)
2937class SquirrelStats(Object):
2938 '''
2939 Container to hold statistics about contents available from a Squirrel.
2941 See also :py:meth:`Squirrel.get_stats`.
2942 '''
2944 nfiles = Int.T(
2945 help='Number of files in selection.')
2946 nnuts = Int.T(
2947 help='Number of index nuts in selection.')
2948 codes = List.T(
2949 Tuple.T(content_t=String.T()),
2950 help='Available code sequences in selection, e.g. '
2951 '(agency, network, station, location) for stations nuts.')
2952 kinds = List.T(
2953 String.T(),
2954 help='Available content types in selection.')
2955 total_size = Int.T(
2956 help='Aggregated file size of files is selection.')
2957 counts = Dict.T(
2958 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
2959 help='Breakdown of how many nuts of any content type and code '
2960 'sequence are available in selection, ``counts[kind][codes]``.')
2961 time_spans = Dict.T(
2962 String.T(), Tuple.T(content_t=Timestamp.T()),
2963 help='Time spans by content type.')
2964 sources = List.T(
2965 String.T(),
2966 help='Descriptions of attached sources.')
2967 operators = List.T(
2968 String.T(),
2969 help='Descriptions of attached operators.')
2971 def __str__(self):
2972 kind_counts = dict(
2973 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
2975 scodes = model.codes_to_str_abbreviated(self.codes)
2977 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
2978 ' ' + s for s in self.sources)
2980 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
2981 ' ' + s for s in self.operators)
2983 def stime(t):
2984 return util.tts(t) if t is not None and t not in (
2985 model.g_tmin, model.g_tmax) else '<none>'
2987 def stable(rows):
2988 ns = [max(len(w) for w in col) for col in zip(*rows)]
2989 return '\n'.join(
2990 ' '.join(w.ljust(n) for n, w in zip(ns, row))
2991 for row in rows)
2993 def indent(s):
2994 return '\n'.join(' '+line for line in s.splitlines())
2996 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
2997 kind + ':',
2998 str(kind_counts[kind]),
2999 stime(self.time_spans[kind][0]),
3000 '-',
3001 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
3003 s = '''
3004Number of files: %i
3005Total size of known files: %s
3006Number of index nuts: %i
3007Available content kinds: %s
3008Available codes: %s
3009Sources: %s
3010Operators: %s''' % (
3011 self.nfiles,
3012 util.human_bytesize(self.total_size),
3013 self.nnuts,
3014 stspans, scodes, ssources, soperators)
3016 return s.lstrip()
3019__all__ = [
3020 'Squirrel',
3021 'SquirrelStats',
3022]