Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%
868 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-04 09:52 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-04 09:52 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Squirrel main classes.
8'''
10import sys
11import os
12import time
13import math
14import logging
15import threading
16import queue
17from collections import defaultdict
19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
20from pyrocko import util, trace
21from pyrocko.progress import progress
22from pyrocko.plot import nice_time_tick_inc_approx_secs
24from . import model, io, cache, dataset
26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
28 codes_patterns_for_kind
29from .client import fdsn, catalog
30from .selection import Selection, filldocs
31from .database import abspath
32from .operators.base import Operator, CodesPatternFiltering
33from . import client, environment, error
35logger = logging.getLogger('psq.base')
37guts_prefix = 'squirrel'
40def nonef(f, xs):
41 xs_ = [x for x in xs if x is not None]
42 if xs_:
43 return f(xs_)
44 else:
45 return None
48def make_task(*args):
49 return progress.task(*args, logger=logger)
52def lpick(condition, seq):
53 ft = [], []
54 for ele in seq:
55 ft[int(bool(condition(ele)))].append(ele)
57 return ft
60def len_plural(obj):
61 return len(obj), '' if len(obj) == 1 else 's'
64def blocks(tmin, tmax, deltat, nsamples_block=100000):
65 tblock = nice_time_tick_inc_approx_secs(
66 util.to_time_float(deltat * nsamples_block))
67 iblock_min = int(math.floor(tmin / tblock))
68 iblock_max = int(math.ceil(tmax / tblock))
69 for iblock in range(iblock_min, iblock_max):
70 yield iblock * tblock, (iblock+1) * tblock
73def gaps(avail, tmin, tmax):
74 assert tmin < tmax
76 data = [(tmax, 1), (tmin, -1)]
77 for (tmin_a, tmax_a) in avail:
78 assert tmin_a < tmax_a
79 data.append((tmin_a, 1))
80 data.append((tmax_a, -1))
82 data.sort()
83 s = 1
84 gaps = []
85 tmin_g = None
86 for t, x in data:
87 if s == 1 and x == -1:
88 tmin_g = t
89 elif s == 0 and x == 1 and tmin_g is not None:
90 tmax_g = t
91 if tmin_g != tmax_g:
92 gaps.append((tmin_g, tmax_g))
94 s += x
96 return gaps
99def order_key(order):
100 return (order.codes, order.tmin, order.tmax)
103def _is_exact(pat):
104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
107def prefix_tree(tups):
108 if not tups:
109 return []
111 if len(tups[0]) == 1:
112 return sorted((tup[0], []) for tup in tups)
114 d = defaultdict(list)
115 for tup in tups:
116 d[tup[0]].append(tup[1:])
118 sub = []
119 for k in sorted(d.keys()):
120 sub.append((k, prefix_tree(d[k])))
122 return sub
125def match_time_span(tmin, tmax, obj):
126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \
127 and (tmin is None or obj.tmax is None or tmin < obj.tmax)
130class Batch(object):
131 '''
132 Batch of waveforms from window-wise data extraction.
134 Encapsulates state and results yielded for each window in window-wise
135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
137 *Attributes:*
139 .. py:attribute:: tmin
141 Start of this time window.
143 .. py:attribute:: tmax
145 End of this time window.
147 .. py:attribute:: i
149 Index of this time window in sequence.
151 .. py:attribute:: n
153 Total number of time windows in sequence.
155 .. py:attribute:: igroup
157 Index of this time window's sequence group.
159 .. py:attribute:: ngroups
161 Total number of sequence groups.
163 .. py:attribute:: traces
165 Extracted waveforms for this time window.
166 '''
168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces):
169 self.tmin = tmin
170 self.tmax = tmax
171 self.i = i
172 self.n = n
173 self.igroup = igroup
174 self.ngroups = ngroups
175 self.traces = traces
178class Squirrel(Selection):
179 '''
180 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
182 :param env:
183 Squirrel environment instance or directory path to use as starting
184 point for its detection. By default, the current directory is used as
185 starting point. When searching for a usable environment the directory
186 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
187 directory is used if it exists, otherwise the parent directories are
188 search upwards for the existence of such a directory. If no such
189 directory is found, the user's global Squirrel environment
190 ``'$HOME/.pyrocko/squirrel'`` is used.
191 :type env:
192 :py:class:`~pyrocko.squirrel.environment.Environment` or
193 :py:class:`str`
195 :param database:
196 Database instance or path to database. By default the
197 database found in the detected Squirrel environment is used.
198 :type database:
199 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
201 :param cache_path:
202 Directory path to use for data caching. By default, the ``'cache'``
203 directory in the detected Squirrel environment is used.
204 :type cache_path:
205 :py:class:`str`
207 :param persistent:
208 If given a name, create a persistent selection.
209 :type persistent:
210 :py:class:`str`
212 This is the central class of the Squirrel framework. It provides a unified
213 interface to query and access seismic waveforms, station meta-data and
214 event information from local file collections and remote data sources. For
215 prompt responses, a profound database setup is used under the hood. To
216 speed up assemblage of ad-hoc data selections, files are indexed on first
217 use and the extracted meta-data is remembered in the database for
218 subsequent accesses. Bulk data is lazily loaded from disk and remote
219 sources, just when requested. Once loaded, data is cached in memory to
220 expedite typical access patterns. Files and data sources can be dynamically
221 added to and removed from the Squirrel selection at runtime.
223 Queries are restricted to the contents of the files currently added to the
224 Squirrel selection (usually a subset of the file meta-information
225 collection in the database). This list of files is referred to here as the
226 "selection". By default, temporary tables are created in the attached
227 database to hold the names of the files in the selection as well as various
228 indices and counters. These tables are only visible inside the application
229 which created them and are deleted when the database connection is closed
230 or the application exits. To create a selection which is not deleted at
231 exit, supply a name to the ``persistent`` argument of the Squirrel
232 constructor. Persistent selections are shared among applications using the
233 same database.
235 **Method summary**
237 Some of the methods are implemented in :py:class:`Squirrel`'s base class
238 :py:class:`~pyrocko.squirrel.selection.Selection`.
240 .. autosummary::
242 ~Squirrel.add
243 ~Squirrel.add_source
244 ~Squirrel.add_fdsn
245 ~Squirrel.add_catalog
246 ~Squirrel.add_dataset
247 ~Squirrel.add_virtual
248 ~Squirrel.update
249 ~Squirrel.update_waveform_promises
250 ~Squirrel.advance_accessor
251 ~Squirrel.clear_accessor
252 ~Squirrel.reload
253 ~pyrocko.squirrel.selection.Selection.iter_paths
254 ~Squirrel.iter_nuts
255 ~Squirrel.iter_kinds
256 ~Squirrel.iter_deltats
257 ~Squirrel.iter_codes
258 ~pyrocko.squirrel.selection.Selection.get_paths
259 ~Squirrel.get_nuts
260 ~Squirrel.get_kinds
261 ~Squirrel.get_deltats
262 ~Squirrel.get_codes
263 ~Squirrel.get_counts
264 ~Squirrel.get_time_span
265 ~Squirrel.get_deltat_span
266 ~Squirrel.get_nfiles
267 ~Squirrel.get_nnuts
268 ~Squirrel.get_total_size
269 ~Squirrel.get_stats
270 ~Squirrel.get_content
271 ~Squirrel.get_stations
272 ~Squirrel.get_channels
273 ~Squirrel.get_responses
274 ~Squirrel.get_events
275 ~Squirrel.get_waveform_nuts
276 ~Squirrel.get_waveforms
277 ~Squirrel.chopper_waveforms
278 ~Squirrel.get_coverage
279 ~Squirrel.pile
280 ~Squirrel.snuffle
281 ~Squirrel.glob_codes
282 ~pyrocko.squirrel.selection.Selection.get_database
283 ~Squirrel.print_tables
284 '''
286 def __init__(
287 self, env=None, database=None, cache_path=None, persistent=None):
289 if not isinstance(env, environment.Environment):
290 env = environment.get_environment(env)
292 if database is None:
293 database = env.expand_path(env.database_path)
295 if cache_path is None:
296 cache_path = env.expand_path(env.cache_path)
298 if persistent is None:
299 persistent = env.persistent
301 Selection.__init__(
302 self, database=database, persistent=persistent)
304 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
306 self._content_caches = {
307 'waveform': cache.ContentCache(),
308 'default': cache.ContentCache()}
310 self._cache_path = cache_path
312 self._sources = []
313 self._operators = []
314 self._operator_registry = {}
316 self._pending_orders = []
318 self._pile = None
319 self._n_choppers_active = 0
321 self.downloads_enabled = True
323 self._names.update({
324 'nuts': self.name + '_nuts',
325 'kind_codes_count': self.name + '_kind_codes_count',
326 'coverage': self.name + '_coverage'})
328 with self.transaction('create tables') as cursor:
329 self._create_tables_squirrel(cursor)
331 def _create_tables_squirrel(self, cursor):
333 cursor.execute(self._register_table(self._sql(
334 '''
335 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
336 nut_id integer PRIMARY KEY,
337 file_id integer,
338 file_segment integer,
339 file_element integer,
340 kind_id integer,
341 kind_codes_id integer,
342 tmin_seconds integer,
343 tmin_offset integer,
344 tmax_seconds integer,
345 tmax_offset integer,
346 kscale integer)
347 ''')))
349 cursor.execute(self._register_table(self._sql(
350 '''
351 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
352 kind_codes_id integer PRIMARY KEY,
353 count integer)
354 ''')))
356 cursor.execute(self._sql(
357 '''
358 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
359 ON %(nuts)s (file_id, file_segment, file_element)
360 '''))
362 cursor.execute(self._sql(
363 '''
364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
365 ON %(nuts)s (file_id)
366 '''))
368 cursor.execute(self._sql(
369 '''
370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
371 ON %(nuts)s (kind_id, tmin_seconds)
372 '''))
374 cursor.execute(self._sql(
375 '''
376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
377 ON %(nuts)s (kind_id, tmax_seconds)
378 '''))
380 cursor.execute(self._sql(
381 '''
382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
383 ON %(nuts)s (kind_id, kscale, tmin_seconds)
384 '''))
386 cursor.execute(self._sql(
387 '''
388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
389 BEFORE DELETE ON main.files FOR EACH ROW
390 BEGIN
391 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
392 END
393 '''))
395 # trigger only on size to make silent update of mtime possible
396 cursor.execute(self._sql(
397 '''
398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
399 BEFORE UPDATE OF size ON main.files FOR EACH ROW
400 BEGIN
401 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
402 END
403 '''))
405 cursor.execute(self._sql(
406 '''
407 CREATE TRIGGER IF NOT EXISTS
408 %(db)s.%(file_states)s_delete_files
409 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
410 BEGIN
411 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
412 END
413 '''))
415 cursor.execute(self._sql(
416 '''
417 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
418 BEFORE INSERT ON %(nuts)s FOR EACH ROW
419 BEGIN
420 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
421 (new.kind_codes_id, 0);
422 UPDATE %(kind_codes_count)s
423 SET count = count + 1
424 WHERE new.kind_codes_id
425 == %(kind_codes_count)s.kind_codes_id;
426 END
427 '''))
429 cursor.execute(self._sql(
430 '''
431 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
432 BEFORE DELETE ON %(nuts)s FOR EACH ROW
433 BEGIN
434 UPDATE %(kind_codes_count)s
435 SET count = count - 1
436 WHERE old.kind_codes_id
437 == %(kind_codes_count)s.kind_codes_id;
438 END
439 '''))
441 cursor.execute(self._register_table(self._sql(
442 '''
443 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
444 kind_codes_id integer,
445 time_seconds integer,
446 time_offset integer,
447 step integer)
448 ''')))
450 cursor.execute(self._sql(
451 '''
452 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
453 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
454 '''))
456 cursor.execute(self._sql(
457 '''
458 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
459 AFTER INSERT ON %(nuts)s FOR EACH ROW
460 BEGIN
461 INSERT OR IGNORE INTO %(coverage)s VALUES
462 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
463 ;
464 UPDATE %(coverage)s
465 SET step = step + 1
466 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
467 AND new.tmin_seconds == %(coverage)s.time_seconds
468 AND new.tmin_offset == %(coverage)s.time_offset
469 ;
470 INSERT OR IGNORE INTO %(coverage)s VALUES
471 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
472 ;
473 UPDATE %(coverage)s
474 SET step = step - 1
475 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
476 AND new.tmax_seconds == %(coverage)s.time_seconds
477 AND new.tmax_offset == %(coverage)s.time_offset
478 ;
479 DELETE FROM %(coverage)s
480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
481 AND new.tmin_seconds == %(coverage)s.time_seconds
482 AND new.tmin_offset == %(coverage)s.time_offset
483 AND step == 0
484 ;
485 DELETE FROM %(coverage)s
486 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
487 AND new.tmax_seconds == %(coverage)s.time_seconds
488 AND new.tmax_offset == %(coverage)s.time_offset
489 AND step == 0
490 ;
491 END
492 '''))
494 cursor.execute(self._sql(
495 '''
496 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
497 BEFORE DELETE ON %(nuts)s FOR EACH ROW
498 BEGIN
499 INSERT OR IGNORE INTO %(coverage)s VALUES
500 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
501 ;
502 UPDATE %(coverage)s
503 SET step = step - 1
504 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
505 AND old.tmin_seconds == %(coverage)s.time_seconds
506 AND old.tmin_offset == %(coverage)s.time_offset
507 ;
508 INSERT OR IGNORE INTO %(coverage)s VALUES
509 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
510 ;
511 UPDATE %(coverage)s
512 SET step = step + 1
513 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
514 AND old.tmax_seconds == %(coverage)s.time_seconds
515 AND old.tmax_offset == %(coverage)s.time_offset
516 ;
517 DELETE FROM %(coverage)s
518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
519 AND old.tmin_seconds == %(coverage)s.time_seconds
520 AND old.tmin_offset == %(coverage)s.time_offset
521 AND step == 0
522 ;
523 DELETE FROM %(coverage)s
524 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
525 AND old.tmax_seconds == %(coverage)s.time_seconds
526 AND old.tmax_offset == %(coverage)s.time_offset
527 AND step == 0
528 ;
529 END
530 '''))
532 def _delete(self):
533 '''Delete database tables associated with this Squirrel.'''
535 with self.transaction('delete tables') as cursor:
536 for s in '''
537 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
538 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
539 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
540 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
541 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
542 DROP TABLE %(db)s.%(nuts)s;
543 DROP TABLE %(db)s.%(kind_codes_count)s;
544 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
545 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
546 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
547 '''.strip().splitlines():
549 cursor.execute(self._sql(s))
551 Selection._delete(self)
553 @filldocs
554 def add(self,
555 paths,
556 kinds=None,
557 format='detect',
558 include=None,
559 exclude=None,
560 check=True):
562 '''
563 Add files to the selection.
565 :param paths:
566 Iterator yielding paths to files or directories to be added to the
567 selection. Recurses into directories. If given a ``str``, it
568 is treated as a single path to be added.
569 :type paths:
570 :py:class:`list` of :py:class:`str`
572 :param kinds:
573 Content types to be made available through the Squirrel selection.
574 By default, all known content types are accepted.
575 :type kinds:
576 :py:class:`list` of :py:class:`str`
578 :param format:
579 File format identifier or ``'detect'`` to enable auto-detection
580 (available: %(file_formats)s).
581 :type format:
582 str
584 :param include:
585 If not ``None``, files are only included if their paths match the
586 given regular expression pattern.
587 :type format:
588 str
590 :param exclude:
591 If not ``None``, files are only included if their paths do not
592 match the given regular expression pattern.
593 :type format:
594 str
596 :param check:
597 If ``True``, all file modification times are checked to see if
598 cached information has to be updated (slow). If ``False``, only
599 previously unknown files are indexed and cached information is used
600 for known files, regardless of file state (fast, corrresponds to
601 Squirrel's ``--optimistic`` mode). File deletions will go
602 undetected in the latter case.
603 :type check:
604 bool
606 :Complexity:
607 O(log N)
608 '''
610 if isinstance(kinds, str):
611 kinds = (kinds,)
613 if isinstance(paths, str):
614 paths = [paths]
616 kind_mask = model.to_kind_mask(kinds)
618 with progress.view():
619 Selection.add(
620 self, util.iter_select_files(
621 paths,
622 show_progress=False,
623 include=include,
624 exclude=exclude,
625 pass_through=lambda path: path.startswith('virtual:')
626 ), kind_mask, format)
628 self._load(check)
629 self._update_nuts()
631 def reload(self):
632 '''
633 Check for modifications and reindex modified files.
635 Based on file modification times.
636 '''
638 self._set_file_states_force_check()
639 self._load(check=True)
640 self._update_nuts()
642 def add_virtual(self, nuts, virtual_paths=None):
643 '''
644 Add content which is not backed by files.
646 :param nuts:
647 Content pieces to be added.
648 :type nuts:
649 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
651 :param virtual_paths:
652 List of virtual paths to prevent creating a temporary list of the
653 nuts while aggregating the file paths for the selection.
654 :type virtual_paths:
655 :py:class:`list` of :py:class:`str`
657 Stores to the main database and the selection.
658 '''
660 if isinstance(virtual_paths, str):
661 virtual_paths = [virtual_paths]
663 if virtual_paths is None:
664 if not isinstance(nuts, list):
665 nuts = list(nuts)
666 virtual_paths = set(nut.file_path for nut in nuts)
668 Selection.add(self, virtual_paths)
669 self.get_database().dig(nuts)
670 self._update_nuts()
672 def add_volatile(self, nuts):
673 if not isinstance(nuts, list):
674 nuts = list(nuts)
676 paths = list(set(nut.file_path for nut in nuts))
677 io.backends.virtual.add_nuts(nuts)
678 self.add_virtual(nuts, paths)
679 self._volatile_paths.extend(paths)
681 def add_volatile_waveforms(self, traces):
682 '''
683 Add in-memory waveforms which will be removed when the app closes.
684 '''
686 name = model.random_name()
688 path = 'virtual:volatile:%s' % name
690 nuts = []
691 for itr, tr in enumerate(traces):
692 assert tr.tmin <= tr.tmax
693 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
694 tmax_seconds, tmax_offset = model.tsplit(
695 tr.tmin + tr.data_len()*tr.deltat)
697 nuts.append(model.Nut(
698 file_path=path,
699 file_format='virtual',
700 file_segment=itr,
701 file_element=0,
702 file_mtime=0,
703 codes=tr.codes,
704 tmin_seconds=tmin_seconds,
705 tmin_offset=tmin_offset,
706 tmax_seconds=tmax_seconds,
707 tmax_offset=tmax_offset,
708 deltat=tr.deltat,
709 kind_id=to_kind_id('waveform'),
710 content=tr))
712 self.add_volatile(nuts)
713 return path
715 def _load(self, check):
716 for _ in io.iload(
717 self,
718 content=[],
719 skip_unchanged=True,
720 check=check):
721 pass
723 def _update_nuts(self, transaction=None):
724 transaction = transaction or self.transaction('update nuts')
725 with make_task('Aggregating selection') as task, \
726 transaction as cursor:
728 self._conn.set_progress_handler(task.update, 100000)
729 nrows = cursor.execute(self._sql(
730 '''
731 INSERT INTO %(db)s.%(nuts)s
732 SELECT NULL,
733 nuts.file_id, nuts.file_segment, nuts.file_element,
734 nuts.kind_id, nuts.kind_codes_id,
735 nuts.tmin_seconds, nuts.tmin_offset,
736 nuts.tmax_seconds, nuts.tmax_offset,
737 nuts.kscale
738 FROM %(db)s.%(file_states)s
739 INNER JOIN nuts
740 ON %(db)s.%(file_states)s.file_id == nuts.file_id
741 INNER JOIN kind_codes
742 ON nuts.kind_codes_id ==
743 kind_codes.kind_codes_id
744 WHERE %(db)s.%(file_states)s.file_state != 2
745 AND (((1 << kind_codes.kind_id)
746 & %(db)s.%(file_states)s.kind_mask) != 0)
747 ''')).rowcount
749 task.update(nrows)
750 self._set_file_states_known(transaction)
751 self._conn.set_progress_handler(None, 0)
753 def add_source(self, source, check=True):
754 '''
755 Add remote resource.
757 :param source:
758 Remote data access client instance.
759 :type source:
760 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
761 '''
763 self._sources.append(source)
764 source.setup(self, check=check)
766 def add_fdsn(self, *args, **kwargs):
767 '''
768 Add FDSN site for transparent remote data access.
770 Arguments are passed to
771 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
772 '''
774 self.add_source(fdsn.FDSNSource(*args, **kwargs))
776 def add_catalog(self, *args, **kwargs):
777 '''
778 Add online catalog for transparent event data access.
780 Arguments are passed to
781 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
782 '''
784 self.add_source(catalog.CatalogSource(*args, **kwargs))
786 def add_dataset(self, ds, check=True):
787 '''
788 Read dataset description from file and add its contents.
790 :param ds:
791 Path to dataset description file or dataset description object
792 . See :py:mod:`~pyrocko.squirrel.dataset`.
793 :type ds:
794 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
796 :param check:
797 If ``True``, all file modification times are checked to see if
798 cached information has to be updated (slow). If ``False``, only
799 previously unknown files are indexed and cached information is used
800 for known files, regardless of file state (fast, corrresponds to
801 Squirrel's ``--optimistic`` mode). File deletions will go
802 undetected in the latter case.
803 :type check:
804 bool
805 '''
806 if isinstance(ds, str):
807 ds = dataset.read_dataset(ds)
809 ds.setup(self, check=check)
811 def _get_selection_args(
812 self, kind_id,
813 obj=None, tmin=None, tmax=None, time=None, codes=None):
815 if codes is not None:
816 codes = codes_patterns_for_kind(kind_id, codes)
818 if time is not None:
819 tmin = time
820 tmax = time
822 if obj is not None:
823 tmin = tmin if tmin is not None else obj.tmin
824 tmax = tmax if tmax is not None else obj.tmax
825 codes = codes if codes is not None else codes_patterns_for_kind(
826 kind_id, obj.codes)
828 return tmin, tmax, codes
830 def _get_selection_args_str(self, *args, **kwargs):
832 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
833 return 'tmin: %s, tmax: %s, codes: %s' % (
834 util.time_to_str(tmin) if tmin is not None else 'none',
835 util.time_to_str(tmax) if tmax is not None else 'none',
836 ','.join(str(entry) for entry in codes))
838 def _selection_args_to_kwargs(
839 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
841 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
843 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
845 tmin_seconds, tmin_offset = model.tsplit(tmin)
846 tmax_seconds, tmax_offset = model.tsplit(tmax)
847 if naiv:
848 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
849 args.append(tmax_seconds)
850 else:
851 tscale_edges = model.tscale_edges
852 tmin_cond = []
853 for kscale in range(tscale_edges.size + 1):
854 if kscale != tscale_edges.size:
855 tscale = int(tscale_edges[kscale])
856 tmin_cond.append('''
857 (%(db)s.%(nuts)s.kind_id = ?
858 AND %(db)s.%(nuts)s.kscale == ?
859 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
860 ''')
861 args.extend(
862 (to_kind_id(kind), kscale,
863 tmin_seconds - tscale - 1, tmax_seconds + 1))
865 else:
866 tmin_cond.append('''
867 (%(db)s.%(nuts)s.kind_id == ?
868 AND %(db)s.%(nuts)s.kscale == ?
869 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
870 ''')
872 args.extend(
873 (to_kind_id(kind), kscale, tmax_seconds + 1))
874 if tmin_cond:
875 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
877 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
878 args.append(tmin_seconds)
880 def _codes_match_sql(self, positive, kind_id, codes, cond, args):
881 pats = codes_patterns_for_kind(kind_id, codes)
882 if pats is None:
883 return
885 pats_exact = []
886 pats_nonexact = []
887 for pat in pats:
888 spat = pat.safe_str
889 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
891 codes_cond = []
892 if pats_exact:
893 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
894 '?'*len(pats_exact)))
896 args.extend(pats_exact)
898 if pats_nonexact:
899 codes_cond.append(' ( %s ) ' % ' OR '.join(
900 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)))
902 args.extend(pats_nonexact)
904 if codes_cond:
905 cond.append('%s ( %s )' % (
906 'NOT' if not positive else '',
907 ' OR '.join(codes_cond)))
909 def iter_nuts(
910 self, kind=None, tmin=None, tmax=None, codes=None,
911 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
912 naiv=False, kind_codes_ids=None, path=None, limit=None):
914 '''
915 Iterate over content entities matching given constraints.
917 :param kind:
918 Content kind (or kinds) to extract.
919 :type kind:
920 :py:class:`str`, :py:class:`list` of :py:class:`str`
922 :param tmin:
923 Start time of query interval.
924 :type tmin:
925 :py:func:`~pyrocko.util.get_time_float`
927 :param tmax:
928 End time of query interval.
929 :type tmax:
930 :py:func:`~pyrocko.util.get_time_float`
932 :param codes:
933 List of code patterns to query.
934 :type codes:
935 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
936 objects appropriate for the queried content type, or anything which
937 can be converted to such objects.
939 :param naiv:
940 Bypass time span lookup through indices (slow, for testing).
941 :type naiv:
942 :py:class:`bool`
944 :param kind_codes_ids:
945 Kind-codes IDs of contents to be retrieved (internal use).
946 :type kind_codes_ids:
947 :py:class:`list` of :py:class:`int`
949 :yields:
950 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
951 intersecting content.
953 :complexity:
954 O(log N) for the time selection part due to heavy use of database
955 indices.
957 Query time span is treated as a half-open interval ``[tmin, tmax)``.
958 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
959 closed-interval so that content intersecting with the time instant ``t
960 = tmin = tmax`` is returned (otherwise nothing would be returned as
961 ``[t, t)`` never matches anything).
963 Time spans of content entities to be matched are also treated as half
964 open intervals, e.g. content span ``[0, 1)`` is matched by query span
965 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
966 modified to closed-interval when the content time span is an empty
967 interval, i.e. to indicate a time instant. E.g. time instant 0 is
968 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
969 '''
971 if not isinstance(kind, str):
972 if kind is None:
973 kind = model.g_content_kinds
974 for kind_ in kind:
975 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
976 yield nut
978 return
980 kind_id = to_kind_id(kind)
982 cond = []
983 args = []
984 if tmin is not None or tmax is not None:
985 assert kind is not None
986 if tmin is None:
987 tmin = self.get_time_span()[0]
988 if tmax is None:
989 tmax = self.get_time_span()[1] + 1.0
991 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
993 cond.append('kind_codes.kind_id == ?')
994 args.append(kind_id)
996 if codes is not None:
997 self._codes_match_sql(True, kind_id, codes, cond, args)
999 if codes_exclude is not None:
1000 self._codes_match_sql(False, kind_id, codes_exclude, cond, args)
1002 if sample_rate_min is not None:
1003 cond.append('kind_codes.deltat <= ?')
1004 args.append(1.0/sample_rate_min)
1006 if sample_rate_max is not None:
1007 cond.append('? <= kind_codes.deltat')
1008 args.append(1.0/sample_rate_max)
1010 if kind_codes_ids is not None:
1011 cond.append(
1012 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
1013 '?'*len(kind_codes_ids)))
1015 args.extend(kind_codes_ids)
1017 db = self.get_database()
1018 if path is not None:
1019 cond.append('files.path == ?')
1020 args.append(db.relpath(abspath(path)))
1022 sql = ('''
1023 SELECT
1024 files.path,
1025 files.format,
1026 files.mtime,
1027 files.size,
1028 %(db)s.%(nuts)s.file_segment,
1029 %(db)s.%(nuts)s.file_element,
1030 kind_codes.kind_id,
1031 kind_codes.codes,
1032 %(db)s.%(nuts)s.tmin_seconds,
1033 %(db)s.%(nuts)s.tmin_offset,
1034 %(db)s.%(nuts)s.tmax_seconds,
1035 %(db)s.%(nuts)s.tmax_offset,
1036 kind_codes.deltat
1037 FROM files
1038 INNER JOIN %(db)s.%(nuts)s
1039 ON files.file_id == %(db)s.%(nuts)s.file_id
1040 INNER JOIN kind_codes
1041 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1042 ''')
1044 if cond:
1045 sql += ''' WHERE ''' + ' AND '.join(cond)
1047 if limit is not None:
1048 sql += ''' LIMIT %i''' % limit
1050 sql = self._sql(sql)
1051 if tmin is None and tmax is None:
1052 for row in self._conn.execute(sql, args):
1053 row = (db.abspath(row[0]),) + row[1:]
1054 nut = model.Nut(values_nocheck=row)
1055 yield nut
1056 else:
1057 assert tmin is not None and tmax is not None
1058 if tmin == tmax:
1059 for row in self._conn.execute(sql, args):
1060 row = (db.abspath(row[0]),) + row[1:]
1061 nut = model.Nut(values_nocheck=row)
1062 if (nut.tmin <= tmin < nut.tmax) \
1063 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1065 yield nut
1066 else:
1067 for row in self._conn.execute(sql, args):
1068 row = (db.abspath(row[0]),) + row[1:]
1069 nut = model.Nut(values_nocheck=row)
1070 if (tmin < nut.tmax and nut.tmin < tmax) \
1071 or (nut.tmin == nut.tmax
1072 and tmin <= nut.tmin < tmax):
1074 yield nut
1076 def get_nuts(self, *args, **kwargs):
1077 '''
1078 Get content entities matching given constraints.
1080 Like :py:meth:`iter_nuts` but returns results as a list.
1081 '''
1083 return list(self.iter_nuts(*args, **kwargs))
1085 def _split_nuts(
1086 self, kind, tmin=None, tmax=None, codes=None, path=None):
1088 kind_id = to_kind_id(kind)
1089 tmin_seconds, tmin_offset = model.tsplit(tmin)
1090 tmax_seconds, tmax_offset = model.tsplit(tmax)
1092 names_main_nuts = dict(self._names)
1093 names_main_nuts.update(db='main', nuts='nuts')
1095 db = self.get_database()
1097 def main_nuts(s):
1098 return s % names_main_nuts
1100 with self.transaction('split nuts') as cursor:
1101 # modify selection and main
1102 for sql_subst in [
1103 self._sql, main_nuts]:
1105 cond = []
1106 args = []
1108 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1110 if codes is not None:
1111 self._codes_match_sql(True, kind_id, codes, cond, args)
1113 if path is not None:
1114 cond.append('files.path == ?')
1115 args.append(db.relpath(abspath(path)))
1117 sql = sql_subst('''
1118 SELECT
1119 %(db)s.%(nuts)s.nut_id,
1120 %(db)s.%(nuts)s.tmin_seconds,
1121 %(db)s.%(nuts)s.tmin_offset,
1122 %(db)s.%(nuts)s.tmax_seconds,
1123 %(db)s.%(nuts)s.tmax_offset,
1124 kind_codes.deltat
1125 FROM files
1126 INNER JOIN %(db)s.%(nuts)s
1127 ON files.file_id == %(db)s.%(nuts)s.file_id
1128 INNER JOIN kind_codes
1129 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1130 WHERE ''' + ' AND '.join(cond)) # noqa
1132 insert = []
1133 delete = []
1134 for row in cursor.execute(sql, args):
1135 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1136 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1138 nut_tmin = model.tjoin(
1139 nut_tmin_seconds, nut_tmin_offset)
1140 nut_tmax = model.tjoin(
1141 nut_tmax_seconds, nut_tmax_offset)
1143 if nut_tmin < tmax and tmin < nut_tmax:
1144 if nut_tmin < tmin:
1145 insert.append((
1146 nut_tmin_seconds, nut_tmin_offset,
1147 tmin_seconds, tmin_offset,
1148 model.tscale_to_kscale(
1149 tmin_seconds - nut_tmin_seconds),
1150 nut_id))
1152 if tmax < nut_tmax:
1153 insert.append((
1154 tmax_seconds, tmax_offset,
1155 nut_tmax_seconds, nut_tmax_offset,
1156 model.tscale_to_kscale(
1157 nut_tmax_seconds - tmax_seconds),
1158 nut_id))
1160 delete.append((nut_id,))
1162 sql_add = '''
1163 INSERT INTO %(db)s.%(nuts)s (
1164 file_id, file_segment, file_element, kind_id,
1165 kind_codes_id, tmin_seconds, tmin_offset,
1166 tmax_seconds, tmax_offset, kscale )
1167 SELECT
1168 file_id, file_segment, file_element,
1169 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1170 FROM %(db)s.%(nuts)s
1171 WHERE nut_id == ?
1172 '''
1173 cursor.executemany(sql_subst(sql_add), insert)
1175 sql_delete = '''
1176 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1177 '''
1178 cursor.executemany(sql_subst(sql_delete), delete)
1180 def get_time_span(self, kinds=None, tight=True, dummy_limits=True):
1181 '''
1182 Get time interval over all content in selection.
1184 :param kinds:
1185 If not ``None``, restrict query to given content kinds.
1186 :type kind:
1187 list of str
1189 :complexity:
1190 O(1), independent of the number of nuts.
1192 :returns:
1193 ``(tmin, tmax)``, combined time interval of queried content kinds.
1194 '''
1196 sql_min = self._sql('''
1197 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1198 FROM %(db)s.%(nuts)s
1199 WHERE kind_id == ?
1200 AND tmin_seconds == (
1201 SELECT MIN(tmin_seconds)
1202 FROM %(db)s.%(nuts)s
1203 WHERE kind_id == ?)
1204 ''')
1206 sql_max = self._sql('''
1207 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1208 FROM %(db)s.%(nuts)s
1209 WHERE kind_id == ?
1210 AND tmax_seconds == (
1211 SELECT MAX(tmax_seconds)
1212 FROM %(db)s.%(nuts)s
1213 WHERE kind_id == ?)
1214 ''')
1216 gtmin = None
1217 gtmax = None
1219 if isinstance(kinds, str):
1220 kinds = [kinds]
1222 if kinds is None:
1223 kind_ids = model.g_content_kind_ids
1224 else:
1225 kind_ids = model.to_kind_ids(kinds)
1227 tmins = []
1228 tmaxs = []
1229 for kind_id in kind_ids:
1230 for tmin_seconds, tmin_offset in self._conn.execute(
1231 sql_min, (kind_id, kind_id)):
1232 tmins.append(model.tjoin(tmin_seconds, tmin_offset))
1234 for (tmax_seconds, tmax_offset) in self._conn.execute(
1235 sql_max, (kind_id, kind_id)):
1236 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset))
1238 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins]
1239 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs]
1241 if tight:
1242 gtmin = nonef(min, tmins)
1243 gtmax = nonef(max, tmaxs)
1244 else:
1245 gtmin = None if None in tmins else nonef(min, tmins)
1246 gtmax = None if None in tmaxs else nonef(max, tmaxs)
1248 if dummy_limits:
1249 if gtmin is None:
1250 gtmin = model.g_tmin
1251 if gtmax is None:
1252 gtmax = model.g_tmax
1254 return gtmin, gtmax
1256 def has(self, kinds):
1257 '''
1258 Check availability of given content kinds.
1260 :param kinds:
1261 Content kinds to query.
1262 :type kind:
1263 list of str
1265 :returns:
1266 ``True`` if any of the queried content kinds is available
1267 in the selection.
1268 '''
1269 self_tmin, self_tmax = self.get_time_span(
1270 kinds, dummy_limits=False)
1272 return None not in (self_tmin, self_tmax)
1274 def get_deltat_span(self, kind):
1275 '''
1276 Get min and max sampling interval of all content of given kind.
1278 :param kind:
1279 Content kind
1280 :type kind:
1281 str
1283 :returns: ``(deltat_min, deltat_max)``
1284 '''
1286 deltats = [
1287 deltat for deltat in self.get_deltats(kind)
1288 if deltat is not None]
1290 if deltats:
1291 return min(deltats), max(deltats)
1292 else:
1293 return None, None
1295 def iter_kinds(self, codes=None):
1296 '''
1297 Iterate over content types available in selection.
1299 :param codes:
1300 If given, get kinds only for selected codes identifier.
1301 Only a single identifier may be given here and no pattern matching
1302 is done, currently.
1303 :type codes:
1304 :py:class:`~pyrocko.squirrel.model.Codes`
1306 :yields:
1307 Available content kinds as :py:class:`str`.
1309 :complexity:
1310 O(1), independent of number of nuts.
1311 '''
1313 return self._database._iter_kinds(
1314 codes=codes,
1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1317 def iter_deltats(self, kind=None):
1318 '''
1319 Iterate over sampling intervals available in selection.
1321 :param kind:
1322 If given, get sampling intervals only for a given content type.
1323 :type kind:
1324 str
1326 :yields:
1327 :py:class:`float` values.
1329 :complexity:
1330 O(1), independent of number of nuts.
1331 '''
1332 return self._database._iter_deltats(
1333 kind=kind,
1334 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1336 def iter_codes(self, kind=None):
1337 '''
1338 Iterate over content identifier code sequences available in selection.
1340 :param kind:
1341 If given, get codes only for a given content type.
1342 :type kind:
1343 str
1345 :yields:
1346 :py:class:`tuple` of :py:class:`str`
1348 :complexity:
1349 O(1), independent of number of nuts.
1350 '''
1351 return self._database._iter_codes(
1352 kind=kind,
1353 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1355 def _iter_codes_info(self, kind=None, codes=None):
1356 '''
1357 Iterate over number of occurrences of any (kind, codes) combination.
1359 :param kind:
1360 If given, get counts only for selected content type.
1361 :type kind:
1362 str
1364 :yields:
1365 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1367 :complexity:
1368 O(1), independent of number of nuts.
1369 '''
1370 return self._database._iter_codes_info(
1371 kind=kind,
1372 codes=codes,
1373 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1375 def get_kinds(self, codes=None):
1376 '''
1377 Get content types available in selection.
1379 :param codes:
1380 If given, get kinds only for selected codes identifier.
1381 Only a single identifier may be given here and no pattern matching
1382 is done, currently.
1383 :type codes:
1384 :py:class:`~pyrocko.squirrel.model.Codes`
1386 :returns:
1387 Sorted list of available content types.
1388 :rtype:
1389 py:class:`list` of :py:class:`str`
1391 :complexity:
1392 O(1), independent of number of nuts.
1394 '''
1395 return sorted(list(self.iter_kinds(codes=codes)))
1397 def get_deltats(self, kind=None):
1398 '''
1399 Get sampling intervals available in selection.
1401 :param kind:
1402 If given, get sampling intervals only for selected content type.
1403 :type kind:
1404 str
1406 :complexity:
1407 O(1), independent of number of nuts.
1409 :returns: Sorted list of available sampling intervals.
1410 '''
1411 return sorted(list(self.iter_deltats(kind=kind)))
1413 def get_codes(self, kind=None):
1414 '''
1415 Get identifier code sequences available in selection.
1417 :param kind:
1418 If given, get codes only for selected content type.
1419 :type kind:
1420 str
1422 :complexity:
1423 O(1), independent of number of nuts.
1425 :returns: Sorted list of available codes as tuples of strings.
1426 '''
1427 return sorted(list(self.iter_codes(kind=kind)))
1429 def get_counts(self, kind=None):
1430 '''
1431 Get number of occurrences of any (kind, codes) combination.
1433 :param kind:
1434 If given, get codes only for selected content type.
1435 :type kind:
1436 str
1438 :complexity:
1439 O(1), independent of number of nuts.
1441 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1442 if kind is not ``None``
1443 '''
1444 d = {}
1445 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1446 if kind_id not in d:
1447 v = d[kind_id] = {}
1448 else:
1449 v = d[kind_id]
1451 if codes not in v:
1452 v[codes] = 0
1454 v[codes] += count
1456 if kind is not None:
1457 return d[to_kind_id(kind)]
1458 else:
1459 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1461 def glob_codes(self, kind, codes):
1462 '''
1463 Find codes matching given patterns.
1465 :param kind:
1466 Content kind to be queried.
1467 :type kind:
1468 str
1470 :param codes:
1471 List of code patterns to query.
1472 :type codes:
1473 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1474 objects appropriate for the queried content type, or anything which
1475 can be converted to such objects.
1477 :returns:
1478 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1479 '''
1481 kind_id = to_kind_id(kind)
1482 args = [kind_id]
1483 pats = codes_patterns_for_kind(kind_id, codes)
1485 if pats:
1486 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1487 ('kind_codes.codes GLOB ?',) * len(pats))
1489 args.extend(pat.safe_str for pat in pats)
1490 else:
1491 codes_cond = ''
1493 sql = self._sql('''
1494 SELECT kind_codes_id, codes, deltat FROM kind_codes
1495 WHERE
1496 kind_id == ? ''' + codes_cond)
1498 return list(map(list, self._conn.execute(sql, args)))
1500 def update(self, constraint=None, **kwargs):
1501 '''
1502 Update or partially update channel and event inventories.
1504 :param constraint:
1505 Selection of times or areas to be brought up to date.
1506 :type constraint:
1507 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1509 :param \\*\\*kwargs:
1510 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1512 This function triggers all attached remote sources, to check for
1513 updates in the meta-data. The sources will only submit queries when
1514 their expiration date has passed, or if the selection spans into
1515 previously unseen times or areas.
1516 '''
1518 if constraint is None:
1519 constraint = client.Constraint(**kwargs)
1521 for source in self._sources:
1522 source.update_channel_inventory(self, constraint)
1523 source.update_event_inventory(self, constraint)
1525 def update_waveform_promises(self, constraint=None, **kwargs):
1526 '''
1527 Permit downloading of remote waveforms.
1529 :param constraint:
1530 Remote waveforms compatible with the given constraint are enabled
1531 for download.
1532 :type constraint:
1533 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1535 :param \\*\\*kwargs:
1536 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1538 Calling this method permits Squirrel to download waveforms from remote
1539 sources when processing subsequent waveform requests. This works by
1540 inserting so called waveform promises into the database. It will look
1541 into the available channels for each remote source and create a promise
1542 for each channel compatible with the given constraint. If the promise
1543 then matches in a waveform request, Squirrel tries to download the
1544 waveform. If the download is successful, the downloaded waveform is
1545 added to the Squirrel and the promise is deleted. If the download
1546 fails, the promise is kept if the reason of failure looks like being
1547 temporary, e.g. because of a network failure. If the cause of failure
1548 however seems to be permanent, the promise is deleted so that no
1549 further attempts are made to download a waveform which might not be
1550 available from that server at all. To force re-scheduling after a
1551 permanent failure, call :py:meth:`update_waveform_promises`
1552 yet another time.
1553 '''
1555 if constraint is None:
1556 constraint = client.Constraint(**kwargs)
1558 for source in self._sources:
1559 source.update_waveform_promises(self, constraint)
1561 def remove_waveform_promises(self, from_database='selection'):
1562 '''
1563 Remove waveform promises from live selection or global database.
1565 Calling this function removes all waveform promises provided by the
1566 attached sources.
1568 :param from_database:
1569 Remove from live selection ``'selection'`` or global database
1570 ``'global'``.
1571 '''
1572 for source in self._sources:
1573 source.remove_waveform_promises(self, from_database=from_database)
1575 def update_responses(self, constraint=None, **kwargs):
1576 if constraint is None:
1577 constraint = client.Constraint(**kwargs)
1579 for source in self._sources:
1580 source.update_response_inventory(self, constraint)
1582 def get_nfiles(self):
1583 '''
1584 Get number of files in selection.
1585 '''
1587 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1588 for row in self._conn.execute(sql):
1589 return row[0]
1591 def get_nnuts(self):
1592 '''
1593 Get number of nuts in selection.
1594 '''
1596 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1597 for row in self._conn.execute(sql):
1598 return row[0]
1600 def get_total_size(self):
1601 '''
1602 Get aggregated file size available in selection.
1603 '''
1605 sql = self._sql('''
1606 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1607 INNER JOIN files
1608 ON %(db)s.%(file_states)s.file_id = files.file_id
1609 ''')
1611 for row in self._conn.execute(sql):
1612 return row[0] or 0
1614 def get_stats(self):
1615 '''
1616 Get statistics on contents available through this selection.
1617 '''
1619 kinds = self.get_kinds()
1620 time_spans = {}
1621 for kind in kinds:
1622 time_spans[kind] = self.get_time_span([kind])
1624 return SquirrelStats(
1625 nfiles=self.get_nfiles(),
1626 nnuts=self.get_nnuts(),
1627 kinds=kinds,
1628 codes=self.get_codes(),
1629 total_size=self.get_total_size(),
1630 counts=self.get_counts(),
1631 time_spans=time_spans,
1632 sources=[s.describe() for s in self._sources],
1633 operators=[op.describe() for op in self._operators])
1635 @filldocs
1636 def check(
1637 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1638 ignore=[]):
1639 '''
1640 Check for common data/metadata problems.
1642 %(query_args)s
1644 :param ignore:
1645 Problem types to be ignored.
1646 :type ignore:
1647 :class:`list` of :class:`str`
1648 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)
1650 :returns:
1651 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object
1652 containing the results of the check.
1654 See :py:func:`~pyrocko.squirrel.check.do_check`.
1655 '''
1657 from .check import do_check
1658 tmin, tmax, codes = self._get_selection_args(
1659 CHANNEL, obj, tmin, tmax, time, codes)
1661 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)
1663 def get_content(
1664 self,
1665 nut,
1666 cache_id='default',
1667 accessor_id='default',
1668 show_progress=False,
1669 model='squirrel'):
1671 '''
1672 Get and possibly load full content for a given index entry from file.
1674 Loads the actual content objects (channel, station, waveform, ...) from
1675 file. For efficiency, sibling content (all stuff in the same file
1676 segment) will also be loaded as a side effect. The loaded contents are
1677 cached in the Squirrel object.
1678 '''
1680 content_cache = self._content_caches[cache_id]
1681 if not content_cache.has(nut):
1683 for nut_loaded in io.iload(
1684 nut.file_path,
1685 segment=nut.file_segment,
1686 format=nut.file_format,
1687 database=self._database,
1688 update_selection=self,
1689 show_progress=show_progress):
1691 content_cache.put(nut_loaded)
1693 try:
1694 return content_cache.get(nut, accessor_id, model)
1696 except KeyError:
1697 raise error.NotAvailable(
1698 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1700 def advance_accessor(self, accessor_id='default', cache_id=None):
1701 '''
1702 Notify memory caches about consumer moving to a new data batch.
1704 :param accessor_id:
1705 Name of accessing consumer to be advanced.
1706 :type accessor_id:
1707 str
1709 :param cache_id:
1710 Name of cache to for which the accessor should be advanced. By
1711 default the named accessor is advanced in all registered caches.
1712 By default, two caches named ``'default'`` and ``'waveform'`` are
1713 available.
1714 :type cache_id:
1715 str
1717 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1718 Squirrel's memory caching works and can be tuned. Default behaviour is
1719 to release data when it has not been used in the latest data
1720 window/batch. If the accessor is never advanced, data is cached
1721 indefinitely - which is often desired e.g. for station meta-data.
1722 Methods for consecutive data traversal, like
1723 :py:meth:`chopper_waveforms` automatically advance and clear
1724 their accessor.
1725 '''
1726 for cache_ in (
1727 self._content_caches.keys()
1728 if cache_id is None
1729 else [cache_id]):
1731 self._content_caches[cache_].advance_accessor(accessor_id)
1733 def clear_accessor(self, accessor_id, cache_id=None):
1734 '''
1735 Notify memory caches about a consumer having finished.
1737 :param accessor_id:
1738 Name of accessor to be cleared.
1739 :type accessor_id:
1740 str
1742 :param cache_id:
1743 Name of cache for which the accessor should be cleared. By default
1744 the named accessor is cleared from all registered caches. By
1745 default, two caches named ``'default'`` and ``'waveform'`` are
1746 available.
1747 :type cache_id:
1748 str
1750 Calling this method clears all references to cache entries held by the
1751 named accessor. Cache entries are then freed if not referenced by any
1752 other accessor.
1753 '''
1755 for cache_ in (
1756 self._content_caches.keys()
1757 if cache_id is None
1758 else [cache_id]):
1760 self._content_caches[cache_].clear_accessor(accessor_id)
1762 def get_cache_stats(self, cache_id):
1763 return self._content_caches[cache_id].get_stats()
1765 @filldocs
1766 def get_stations(
1767 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1768 model='squirrel', on_error='raise'):
1770 '''
1771 Get stations matching given constraints.
1773 %(query_args)s
1775 :param model:
1776 Select object model for returned values: ``'squirrel'`` to get
1777 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1778 objects with channel information attached.
1779 :type model:
1780 str
1782 :returns:
1783 List of :py:class:`pyrocko.squirrel.Station
1784 <pyrocko.squirrel.model.Station>` objects by default or list of
1785 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1786 objects if ``model='pyrocko'`` is requested.
1788 See :py:meth:`iter_nuts` for details on time span matching.
1789 '''
1791 if model == 'pyrocko':
1792 return self._get_pyrocko_stations(
1793 obj, tmin, tmax, time, codes, on_error=on_error)
1794 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1795 args = self._get_selection_args(
1796 STATION, obj, tmin, tmax, time, codes)
1798 nuts = sorted(
1799 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1801 return [self.get_content(nut, model=model) for nut in nuts]
1802 else:
1803 raise ValueError('Invalid station model: %s' % model)
1805 @filldocs
1806 def get_channels(
1807 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1808 model='squirrel'):
1810 '''
1811 Get channels matching given constraints.
1813 %(query_args)s
1815 :returns:
1816 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1818 See :py:meth:`iter_nuts` for details on time span matching.
1819 '''
1821 args = self._get_selection_args(
1822 CHANNEL, obj, tmin, tmax, time, codes)
1824 nuts = sorted(
1825 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1827 return [self.get_content(nut, model=model) for nut in nuts]
1829 @filldocs
1830 def get_sensors(
1831 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1833 '''
1834 Get sensors matching given constraints.
1836 %(query_args)s
1838 :returns:
1839 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1841 See :py:meth:`iter_nuts` for details on time span matching.
1842 '''
1844 tmin, tmax, codes = self._get_selection_args(
1845 CHANNEL, obj, tmin, tmax, time, codes)
1847 if codes is not None:
1848 codes = codes_patterns_list(
1849 (entry.replace(channel=entry.channel[:-1] + '?')
1850 if entry.channel != '*' else entry)
1851 for entry in codes)
1853 nuts = sorted(
1854 self.iter_nuts(
1855 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1857 return [
1858 sensor for sensor in model.Sensor.from_channels(
1859 self.get_content(nut) for nut in nuts)
1860 if match_time_span(tmin, tmax, sensor)]
1862 @filldocs
1863 def get_responses(
1864 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1865 model='squirrel'):
1867 '''
1868 Get instrument responses matching given constraints.
1870 %(query_args)s
1872 :param model:
1873 Select data model for returned objects. Choices: ``'squirrel'``,
1874 ``'stationxml'``, ``'stationxml+'``. See return value description.
1875 :type model:
1876 str
1878 :returns:
1879 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model ==
1880 'squirrel'`` or list of
1881 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`
1882 if ``model == 'stationxml'`` or list of
1883 (:py:class:`~pyrocko.squirrel.model.Response`,
1884 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1885 'stationxml+'``.
1887 See :py:meth:`iter_nuts` for details on time span matching.
1888 '''
1890 args = self._get_selection_args(
1891 RESPONSE, obj, tmin, tmax, time, codes)
1893 nuts = sorted(
1894 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1896 return [self.get_content(nut, model=model) for nut in nuts]
1898 @filldocs
1899 def get_response(
1900 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1901 model='squirrel', on_duplicate='raise'):
1903 '''
1904 Get instrument response matching given constraints.
1906 %(query_args)s
1908 :param model:
1909 Select data model for returned object. Choices: ``'squirrel'``,
1910 ``'stationxml'``, ``'stationxml+'``. See return value description.
1911 :type model:
1912 str
1914 :param on_duplicate:
1915 Determines how duplicates/multiple matching responses are handled.
1916 Choices: ``'raise'`` - raise
1917 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a
1918 warning and return first match, ``'ignore'`` - silently return
1919 first match.
1920 :type on_duplicate:
1921 str
1923 :returns:
1924 :py:class:`~pyrocko.squirrel.model.Response` if
1925 ``model == 'squirrel'`` or
1926 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model ==
1927 'stationxml'`` or
1928 (:py:class:`~pyrocko.squirrel.model.Response`,
1929 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1930 'stationxml+'``.
1932 Same as :py:meth:`get_responses` but returning exactly one response.
1933 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is
1934 available. Duplicates are handled according to the ``on_duplicate``
1935 argument.
1937 See :py:meth:`iter_nuts` for details on time span matching.
1938 '''
1940 if model == 'stationxml':
1941 model_ = 'stationxml+'
1942 else:
1943 model_ = model
1945 responses = self.get_responses(
1946 obj, tmin, tmax, time, codes, model=model_)
1947 if len(responses) == 0:
1948 raise error.NotAvailable(
1949 'No instrument response available (%s).'
1950 % self._get_selection_args_str(
1951 RESPONSE, obj, tmin, tmax, time, codes))
1953 elif len(responses) > 1:
1955 if on_duplicate in ('raise', 'warn'):
1956 if model_ == 'squirrel':
1957 resps_sq = responses
1958 elif model_ == 'stationxml+':
1959 resps_sq = [resp[0] for resp in responses]
1960 else:
1961 raise ValueError('Invalid response model: %s' % model)
1963 rinfo = ':\n' + '\n'.join(
1964 ' ' + resp.summary for resp in resps_sq)
1966 message = \
1967 'Multiple instrument responses matching given ' \
1968 'constraints (%s)%s%s' % (
1969 self._get_selection_args_str(
1970 RESPONSE, obj, tmin, tmax, time, codes),
1971 ' -> using first' if on_duplicate == 'warn' else '',
1972 rinfo)
1974 if on_duplicate == 'raise':
1975 raise error.Duplicate(message)
1977 elif on_duplicate == 'warn':
1978 logger.warning(message)
1980 elif on_duplicate == 'ignore':
1981 pass
1983 else:
1984 ValueError(
1985 'Invalid argument for on_duplicate: %s' % on_duplicate)
1987 if model == 'stationxml':
1988 return responses[0][1]
1989 else:
1990 return responses[0]
1992 @filldocs
1993 def get_events(
1994 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1996 '''
1997 Get events matching given constraints.
1999 %(query_args)s
2001 :returns:
2002 List of :py:class:`~pyrocko.model.event.Event` objects.
2004 See :py:meth:`iter_nuts` for details on time span matching.
2005 '''
2007 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
2008 nuts = sorted(
2009 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
2011 return [self.get_content(nut) for nut in nuts]
2013 def _redeem_promises(self, *args, order_only=False):
2015 def split_promise(order, tmax=None):
2016 self._split_nuts(
2017 'waveform_promise',
2018 order.tmin, tmax if tmax is not None else order.tmax,
2019 codes=order.codes,
2020 path=order.source_id)
2022 tmin, tmax = args[:2]
2024 waveforms = list(self.iter_nuts('waveform', *args))
2025 promises = list(self.iter_nuts('waveform_promise', *args))
2027 codes_to_avail = defaultdict(list)
2028 for nut in waveforms:
2029 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
2031 def tts(x):
2032 if isinstance(x, tuple):
2033 return tuple(tts(e) for e in x)
2034 elif isinstance(x, list):
2035 return list(tts(e) for e in x)
2036 else:
2037 return util.time_to_str(x)
2039 now = time.time()
2040 orders = []
2041 for promise in promises:
2042 waveforms_avail = codes_to_avail[promise.codes]
2043 for block_tmin, block_tmax in blocks(
2044 max(tmin, promise.tmin),
2045 min(tmax, promise.tmax),
2046 promise.deltat):
2048 if block_tmin > now:
2049 continue
2051 orders.append(
2052 WaveformOrder(
2053 source_id=promise.file_path,
2054 codes=promise.codes,
2055 tmin=block_tmin,
2056 tmax=block_tmax,
2057 deltat=promise.deltat,
2058 gaps=gaps(waveforms_avail, block_tmin, block_tmax),
2059 time_created=now))
2061 orders_noop, orders = lpick(lambda order: order.gaps, orders)
2063 order_keys_noop = set(order_key(order) for order in orders_noop)
2064 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
2065 logger.info(
2066 'Waveform orders already satisified with cached/local data: '
2067 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
2069 for order in orders_noop:
2070 split_promise(order)
2072 if order_only:
2073 if orders:
2074 self._pending_orders.extend(orders)
2075 logger.info(
2076 'Enqueuing %i waveform order%s.'
2077 % len_plural(orders))
2078 return
2079 else:
2080 if self._pending_orders:
2081 orders.extend(self._pending_orders)
2082 logger.info(
2083 'Adding %i previously enqueued order%s.'
2084 % len_plural(self._pending_orders))
2086 self._pending_orders = []
2088 source_ids = []
2089 sources = {}
2090 for source in self._sources:
2091 if isinstance(source, fdsn.FDSNSource):
2092 source_ids.append(source._source_id)
2093 sources[source._source_id] = source
2095 source_priority = dict(
2096 (source_id, i) for (i, source_id) in enumerate(source_ids))
2098 order_groups = defaultdict(list)
2099 for order in orders:
2100 order_groups[order_key(order)].append(order)
2102 for k, order_group in order_groups.items():
2103 order_group.sort(
2104 key=lambda order: source_priority[order.source_id])
2106 n_order_groups = len(order_groups)
2108 if len(order_groups) != 0 or len(orders) != 0:
2109 logger.info(
2110 'Waveform orders standing for download: %i (%i)'
2111 % (len(order_groups), len(orders)))
2113 task = make_task('Waveform orders processed', n_order_groups)
2114 else:
2115 task = None
2117 def release_order_group(order):
2118 okey = order_key(order)
2119 for followup in order_groups[okey]:
2120 if followup is not order:
2121 split_promise(followup)
2123 del order_groups[okey]
2125 if task:
2126 task.update(n_order_groups - len(order_groups))
2128 def noop(order):
2129 pass
2131 def success(order, trs):
2132 release_order_group(order)
2133 if order.is_near_real_time():
2134 if not trs:
2135 return # keep promise when no data received at real time
2136 else:
2137 tmax = max(tr.tmax+tr.deltat for tr in trs)
2138 tmax = order.tmin \
2139 + round((tmax - order.tmin) / order.deltat) \
2140 * order.deltat
2141 split_promise(order, tmax)
2142 else:
2143 split_promise(order)
2145 def batch_add(paths):
2146 self.add(paths)
2148 calls = queue.Queue()
2150 def enqueue(f):
2151 def wrapper(*args):
2152 calls.put((f, args))
2154 return wrapper
2156 while order_groups:
2158 orders_now = []
2159 empty = []
2160 for k, order_group in order_groups.items():
2161 try:
2162 orders_now.append(order_group.pop(0))
2163 except IndexError:
2164 empty.append(k)
2166 for k in empty:
2167 del order_groups[k]
2169 by_source_id = defaultdict(list)
2170 for order in orders_now:
2171 by_source_id[order.source_id].append(order)
2173 threads = []
2174 for source_id in by_source_id:
2175 def download():
2176 try:
2177 sources[source_id].download_waveforms(
2178 by_source_id[source_id],
2179 success=enqueue(success),
2180 error_permanent=enqueue(split_promise),
2181 error_temporary=noop,
2182 batch_add=enqueue(batch_add))
2184 finally:
2185 calls.put(None)
2187 thread = threading.Thread(target=download)
2188 thread.start()
2189 threads.append(thread)
2191 ndone = 0
2192 while ndone < len(threads):
2193 ret = calls.get()
2194 if ret is None:
2195 ndone += 1
2196 else:
2197 ret[0](*ret[1])
2199 for thread in threads:
2200 thread.join()
2202 if task:
2203 task.update(n_order_groups - len(order_groups))
2205 if task:
2206 task.done()
2208 @filldocs
2209 def get_waveform_nuts(
2210 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2211 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2212 order_only=False):
2214 '''
2215 Get waveform content entities matching given constraints.
2217 %(query_args)s
2219 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2220 resolves matching waveform promises (downloads waveforms from remote
2221 sources).
2223 See :py:meth:`iter_nuts` for details on time span matching.
2224 '''
2226 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2228 if self.downloads_enabled:
2229 self._redeem_promises(
2230 *args,
2231 codes_exclude,
2232 sample_rate_min,
2233 sample_rate_max,
2234 order_only=order_only)
2236 nuts = sorted(
2237 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2239 return nuts
2241 @filldocs
2242 def have_waveforms(
2243 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2245 '''
2246 Check if any waveforms or waveform promises are available for given
2247 constraints.
2249 %(query_args)s
2250 '''
2252 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2253 return bool(list(
2254 self.iter_nuts('waveform', *args, limit=1))) \
2255 or (self.downloads_enabled and bool(list(
2256 self.iter_nuts('waveform_promise', *args, limit=1))))
2258 @filldocs
2259 def get_waveforms(
2260 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2261 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2262 uncut=False, want_incomplete=True, degap=True,
2263 maxgap=5, maxlap=None, snap=None, include_last=False,
2264 load_data=True, accessor_id='default', operator_params=None,
2265 order_only=False, channel_priorities=None):
2267 '''
2268 Get waveforms matching given constraints.
2270 %(query_args)s
2272 :param sample_rate_min:
2273 Consider only waveforms with a sampling rate equal to or greater
2274 than the given value [Hz].
2275 :type sample_rate_min:
2276 float
2278 :param sample_rate_max:
2279 Consider only waveforms with a sampling rate equal to or less than
2280 the given value [Hz].
2281 :type sample_rate_max:
2282 float
2284 :param uncut:
2285 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2286 and to disable degapping/deoverlapping. Returns untouched traces as
2287 they are read from file segment. File segments are always read in
2288 their entirety.
2289 :type uncut:
2290 bool
2292 :param want_incomplete:
2293 If ``True``, gappy/incomplete traces are included in the result.
2294 :type want_incomplete:
2295 bool
2297 :param degap:
2298 If ``True``, connect traces and remove gaps and overlaps.
2299 :type degap:
2300 bool
2302 :param maxgap:
2303 Maximum gap size in samples which is filled with interpolated
2304 samples when ``degap`` is ``True``.
2305 :type maxgap:
2306 int
2308 :param maxlap:
2309 Maximum overlap size in samples which is removed when ``degap`` is
2310 ``True``.
2311 :type maxlap:
2312 int
2314 :param snap:
2315 Rounding functions used when computing sample index from time
2316 instance, for trace start and trace end, respectively. By default,
2317 ``(round, round)`` is used.
2318 :type snap:
2319 :py:class:`tuple` of 2 callables
2321 :param include_last:
2322 If ``True``, add one more sample to the returned traces (the sample
2323 which would be the first sample of a query with ``tmin`` set to the
2324 current value of ``tmax``).
2325 :type include_last:
2326 bool
2328 :param load_data:
2329 If ``True``, waveform data samples are read from files (or cache).
2330 If ``False``, meta-information-only traces are returned (dummy
2331 traces with no data samples).
2332 :type load_data:
2333 bool
2335 :param accessor_id:
2336 Name of consumer on who's behalf data is accessed. Used in cache
2337 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2338 to distinguish different points of extraction for the decision of
2339 when to release cached waveform data. Should be used when data is
2340 alternately extracted from more than one region / selection.
2341 :type accessor_id:
2342 str
2344 :param channel_priorities:
2345 List of band/instrument code combinations to try. For example,
2346 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and
2347 then fallback to ``BH?`` if these are not available. The first
2348 matching waveforms are returned. Use in combination with
2349 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample
2350 rate.
2351 :type channel_priorities:
2352 :py:class:`list` of :py:class:`str`
2354 See :py:meth:`iter_nuts` for details on time span matching.
2356 Loaded data is kept in memory (at least) until
2357 :py:meth:`clear_accessor` has been called or
2358 :py:meth:`advance_accessor` has been called two consecutive times
2359 without data being accessed between the two calls (by this accessor).
2360 Data may still be further kept in the memory cache if held alive by
2361 consumers with a different ``accessor_id``.
2362 '''
2364 tmin, tmax, codes = self._get_selection_args(
2365 WAVEFORM, obj, tmin, tmax, time, codes)
2367 if channel_priorities is not None:
2368 return self._get_waveforms_prioritized(
2369 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude,
2370 sample_rate_min=sample_rate_min,
2371 sample_rate_max=sample_rate_max,
2372 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2373 maxgap=maxgap, maxlap=maxlap, snap=snap,
2374 include_last=include_last, load_data=load_data,
2375 accessor_id=accessor_id, operator_params=operator_params,
2376 order_only=order_only, channel_priorities=channel_priorities)
2378 kinds = ['waveform']
2379 if self.downloads_enabled:
2380 kinds.append('waveform_promise')
2382 self_tmin, self_tmax = self.get_time_span(kinds)
2384 if None in (self_tmin, self_tmax):
2385 logger.warning(
2386 'No waveforms available.')
2387 return []
2389 tmin = tmin if tmin is not None else self_tmin
2390 tmax = tmax if tmax is not None else self_tmax
2392 if codes is not None and len(codes) == 1:
2393 # TODO: fix for multiple / mixed codes
2394 operator = self.get_operator(codes[0])
2395 if operator is not None:
2396 return operator.get_waveforms(
2397 self, codes[0],
2398 tmin=tmin, tmax=tmax,
2399 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2400 maxgap=maxgap, maxlap=maxlap, snap=snap,
2401 include_last=include_last, load_data=load_data,
2402 accessor_id=accessor_id, params=operator_params)
2404 nuts = self.get_waveform_nuts(
2405 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min,
2406 sample_rate_max, order_only=order_only)
2408 if order_only:
2409 return []
2411 if load_data:
2412 traces = [
2413 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2415 else:
2416 traces = [
2417 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2419 if uncut:
2420 return traces
2422 if snap is None:
2423 snap = (round, round)
2425 chopped = []
2426 for tr in traces:
2427 if not load_data and tr.ydata is not None:
2428 tr = tr.copy(data=False)
2429 tr.ydata = None
2431 try:
2432 chopped.append(tr.chop(
2433 tmin, tmax,
2434 inplace=False,
2435 snap=snap,
2436 include_last=include_last))
2438 except trace.NoData:
2439 pass
2441 processed = self._process_chopped(
2442 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2444 return processed
2446 def _get_waveforms_prioritized(
2447 self, tmin=None, tmax=None, codes=None, codes_exclude=None,
2448 channel_priorities=None, **kwargs):
2450 trs_all = []
2451 codes_have = set()
2452 for channel in channel_priorities:
2453 assert len(channel) == 2
2454 if codes is not None:
2455 codes_now = [
2456 codes_.replace(channel=channel+'?') for codes_ in codes]
2457 else:
2458 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?')
2460 codes_exclude_now = list(set(
2461 codes_.replace(channel=channel+codes_.channel[-1])
2462 for codes_ in codes_have))
2464 if codes_exclude:
2465 codes_exclude_now.extend(codes_exclude)
2467 trs = self.get_waveforms(
2468 tmin=tmin,
2469 tmax=tmax,
2470 codes=codes_now,
2471 codes_exclude=codes_exclude_now,
2472 **kwargs)
2474 codes_have.update(set(tr.codes for tr in trs))
2475 trs_all.extend(trs)
2477 return trs_all
2479 @filldocs
2480 def chopper_waveforms(
2481 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2482 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2483 tinc=None, tpad=0.,
2484 want_incomplete=True, snap_window=False,
2485 degap=True, maxgap=5, maxlap=None,
2486 snap=None, include_last=False, load_data=True,
2487 accessor_id=None, clear_accessor=True, operator_params=None,
2488 grouping=None, channel_priorities=None):
2490 '''
2491 Iterate window-wise over waveform archive.
2493 %(query_args)s
2495 :param tinc:
2496 Time increment (window shift time) (default uses ``tmax-tmin``).
2497 :type tinc:
2498 :py:func:`~pyrocko.util.get_time_float`
2500 :param tpad:
2501 Padding time appended on either side of the data window (window
2502 overlap is ``2*tpad``).
2503 :type tpad:
2504 :py:func:`~pyrocko.util.get_time_float`
2506 :param want_incomplete:
2507 If ``True``, gappy/incomplete traces are included in the result.
2508 :type want_incomplete:
2509 bool
2511 :param snap_window:
2512 If ``True``, start time windows at multiples of tinc with respect
2513 to system time zero.
2514 :type snap_window:
2515 bool
2517 :param degap:
2518 If ``True``, connect traces and remove gaps and overlaps.
2519 :type degap:
2520 bool
2522 :param maxgap:
2523 Maximum gap size in samples which is filled with interpolated
2524 samples when ``degap`` is ``True``.
2525 :type maxgap:
2526 int
2528 :param maxlap:
2529 Maximum overlap size in samples which is removed when ``degap`` is
2530 ``True``.
2531 :type maxlap:
2532 int
2534 :param snap:
2535 Rounding functions used when computing sample index from time
2536 instance, for trace start and trace end, respectively. By default,
2537 ``(round, round)`` is used.
2538 :type snap:
2539 :py:class:`tuple` of 2 callables
2541 :param include_last:
2542 If ``True``, add one more sample to the returned traces (the sample
2543 which would be the first sample of a query with ``tmin`` set to the
2544 current value of ``tmax``).
2545 :type include_last:
2546 bool
2548 :param load_data:
2549 If ``True``, waveform data samples are read from files (or cache).
2550 If ``False``, meta-information-only traces are returned (dummy
2551 traces with no data samples).
2552 :type load_data:
2553 bool
2555 :param accessor_id:
2556 Name of consumer on who's behalf data is accessed. Used in cache
2557 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2558 to distinguish different points of extraction for the decision of
2559 when to release cached waveform data. Should be used when data is
2560 alternately extracted from more than one region / selection.
2561 :type accessor_id:
2562 str
2564 :param clear_accessor:
2565 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2566 chopper finishes. Set to ``False`` to keep loaded waveforms in
2567 memory when the generator returns.
2568 :type clear_accessor:
2569 bool
2571 :param grouping:
2572 By default, traversal over the data is over time and all matching
2573 traces of a time window are yielded. Using this option, it is
2574 possible to traverse the data first by group (e.g. station or
2575 network) and second by time. This can reduce the number of traces
2576 in each batch and thus reduce the memory footprint of the process.
2577 :type grouping:
2578 :py:class:`~pyrocko.squirrel.operators.base.Grouping`
2580 :yields:
2581 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2582 extracted time window.
2584 See :py:meth:`iter_nuts` for details on time span matching.
2585 '''
2587 tmin, tmax, codes = self._get_selection_args(
2588 WAVEFORM, obj, tmin, tmax, time, codes)
2590 kinds = ['waveform']
2591 if self.downloads_enabled:
2592 kinds.append('waveform_promise')
2594 self_tmin, self_tmax = self.get_time_span(kinds)
2596 if None in (self_tmin, self_tmax):
2597 logger.warning(
2598 'Content has undefined time span. No waveforms and no '
2599 'waveform promises?')
2600 return
2602 if snap_window and tinc is not None:
2603 tmin = tmin if tmin is not None else self_tmin
2604 tmax = tmax if tmax is not None else self_tmax
2605 tmin = math.floor(tmin / tinc) * tinc
2606 tmax = math.ceil(tmax / tinc) * tinc
2607 else:
2608 tmin = tmin if tmin is not None else self_tmin + tpad
2609 tmax = tmax if tmax is not None else self_tmax - tpad
2611 tinc = tinc if tinc is not None else tmax - tmin
2613 try:
2614 if accessor_id is None:
2615 accessor_id = 'chopper%i' % self._n_choppers_active
2617 self._n_choppers_active += 1
2619 eps = tinc * 1e-6
2620 if tinc != 0.0:
2621 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2622 else:
2623 nwin = 1
2625 if grouping is None:
2626 codes_list = [codes]
2627 else:
2628 operator = Operator(
2629 filtering=CodesPatternFiltering(codes=codes),
2630 grouping=grouping)
2632 available = set(self.get_codes(kind='waveform'))
2633 if self.downloads_enabled:
2634 available.update(self.get_codes(kind='waveform_promise'))
2635 operator.update_mappings(sorted(available))
2637 codes_list = [
2638 codes_patterns_list(scl)
2639 for scl in operator.iter_in_codes()]
2641 ngroups = len(codes_list)
2642 for igroup, scl in enumerate(codes_list):
2643 for iwin in range(nwin):
2644 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2646 chopped = self.get_waveforms(
2647 tmin=wmin-tpad,
2648 tmax=wmax+tpad,
2649 codes=scl,
2650 codes_exclude=codes_exclude,
2651 sample_rate_min=sample_rate_min,
2652 sample_rate_max=sample_rate_max,
2653 snap=snap,
2654 include_last=include_last,
2655 load_data=load_data,
2656 want_incomplete=want_incomplete,
2657 degap=degap,
2658 maxgap=maxgap,
2659 maxlap=maxlap,
2660 accessor_id=accessor_id,
2661 operator_params=operator_params,
2662 channel_priorities=channel_priorities)
2664 self.advance_accessor(accessor_id)
2666 yield Batch(
2667 tmin=wmin,
2668 tmax=wmax,
2669 i=iwin,
2670 n=nwin,
2671 igroup=igroup,
2672 ngroups=ngroups,
2673 traces=chopped)
2675 finally:
2676 self._n_choppers_active -= 1
2677 if clear_accessor:
2678 self.clear_accessor(accessor_id, 'waveform')
2680 def _process_chopped(
2681 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2683 chopped.sort(key=lambda a: a.full_id)
2684 if degap:
2685 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2687 if not want_incomplete:
2688 chopped_weeded = []
2689 for tr in chopped:
2690 emin = tr.tmin - tmin
2691 emax = tr.tmax + tr.deltat - tmax
2692 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2693 chopped_weeded.append(tr)
2695 elif degap:
2696 if (0. < emin <= 5. * tr.deltat
2697 and -5. * tr.deltat <= emax < 0.):
2699 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2700 chopped_weeded.append(tr)
2702 chopped = chopped_weeded
2704 return chopped
2706 def _get_pyrocko_stations(
2707 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2708 on_error='raise'):
2710 from pyrocko import model as pmodel
2712 if codes is not None:
2713 codes = codes_patterns_for_kind(STATION, codes)
2715 by_nsl = defaultdict(lambda: (list(), list()))
2716 for station in self.get_stations(obj, tmin, tmax, time, codes):
2717 sargs = station._get_pyrocko_station_args()
2718 by_nsl[station.codes.nsl][0].append(sargs)
2720 if codes is not None:
2721 codes = [model.CodesNSLCE(c) for c in codes]
2723 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2724 sargs = channel._get_pyrocko_station_args()
2725 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2726 sargs_list.append(sargs)
2727 channels_list.append(channel)
2729 pstations = []
2730 nsls = list(by_nsl.keys())
2731 nsls.sort()
2732 for nsl in nsls:
2733 sargs_list, channels_list = by_nsl[nsl]
2734 sargs = util.consistency_merge(
2735 [('',) + x for x in sargs_list],
2736 error=on_error)
2738 by_c = defaultdict(list)
2739 for ch in channels_list:
2740 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2742 chas = list(by_c.keys())
2743 chas.sort()
2744 pchannels = []
2745 for cha in chas:
2746 list_of_cargs = by_c[cha]
2747 cargs = util.consistency_merge(
2748 [('',) + x for x in list_of_cargs],
2749 error=on_error)
2750 pchannels.append(pmodel.Channel(*cargs))
2752 pstations.append(
2753 pmodel.Station(*sargs, channels=pchannels))
2755 return pstations
2757 @property
2758 def pile(self):
2760 '''
2761 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2763 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2764 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2765 but uses the fluffy power of the Squirrel under the hood.
2767 This interface can be used as a drop-in replacement for piles which are
2768 used in existing scripts and programs for efficient waveform data
2769 access. The Squirrel-based pile scales better for large datasets. Newer
2770 scripts should use Squirrel's native methods to avoid the emulation
2771 overhead.
2772 '''
2773 from . import pile
2775 if self._pile is None:
2776 self._pile = pile.Pile(self)
2778 return self._pile
2780 def snuffle(self, **kwargs):
2781 '''
2782 Look at dataset in Snuffler.
2783 '''
2784 self.pile.snuffle(**kwargs)
2786 def _gather_codes_keys(self, kind, gather, selector):
2787 return set(
2788 gather(codes)
2789 for codes in self.iter_codes(kind)
2790 if selector is None or selector(codes))
2792 def __str__(self):
2793 return str(self.get_stats())
2795 def get_coverage(
2796 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2798 '''
2799 Get coverage information.
2801 Get information about strips of gapless data coverage.
2803 :param kind:
2804 Content kind to be queried.
2805 :type kind:
2806 str
2808 :param tmin:
2809 Start time of query interval.
2810 :type tmin:
2811 :py:func:`~pyrocko.util.get_time_float`
2813 :param tmax:
2814 End time of query interval.
2815 :type tmax:
2816 :py:func:`~pyrocko.util.get_time_float`
2818 :param codes:
2819 If given, restrict query to given content codes patterns.
2820 :type codes:
2821 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2822 objects appropriate for the queried content type, or anything which
2823 can be converted to such objects.
2825 :param limit:
2826 Limit query to return only up to a given maximum number of entries
2827 per matching time series (without setting this option, very gappy
2828 data could cause the query to execute for a very long time).
2829 :type limit:
2830 int
2832 :returns:
2833 Information about time spans covered by the requested time series
2834 data.
2835 :rtype:
2836 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage`
2837 '''
2839 tmin_seconds, tmin_offset = model.tsplit(tmin)
2840 tmax_seconds, tmax_offset = model.tsplit(tmax)
2841 kind_id = to_kind_id(kind)
2843 codes_info = list(self._iter_codes_info(kind=kind))
2845 kdata_all = []
2846 if codes is None:
2847 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2848 kdata_all.append(
2849 (codes_entry, kind_codes_id, codes_entry, deltat))
2851 else:
2852 for codes_entry in codes:
2853 pattern = to_codes(kind_id, codes_entry)
2854 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2855 if model.match_codes(pattern, codes_entry):
2856 kdata_all.append(
2857 (pattern, kind_codes_id, codes_entry, deltat))
2859 kind_codes_ids = [x[1] for x in kdata_all]
2861 counts_at_tmin = {}
2862 if tmin is not None:
2863 for nut in self.iter_nuts(
2864 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2866 k = nut.codes, nut.deltat
2867 if k not in counts_at_tmin:
2868 counts_at_tmin[k] = 0
2870 counts_at_tmin[k] += 1
2872 coverages = []
2873 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2874 entry = [pattern, codes_entry, deltat, None, None, []]
2875 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2876 sql = self._sql('''
2877 SELECT
2878 time_seconds,
2879 time_offset
2880 FROM %(db)s.%(coverage)s
2881 WHERE
2882 kind_codes_id == ?
2883 ORDER BY
2884 kind_codes_id ''' + order + ''',
2885 time_seconds ''' + order + ''',
2886 time_offset ''' + order + '''
2887 LIMIT 1
2888 ''')
2890 for row in self._conn.execute(sql, [kind_codes_id]):
2891 entry[3+i] = model.tjoin(row[0], row[1])
2893 if None in entry[3:5]:
2894 continue
2896 args = [kind_codes_id]
2898 sql_time = ''
2899 if tmin is not None:
2900 # intentionally < because (== tmin) is queried from nuts
2901 sql_time += ' AND ( ? < time_seconds ' \
2902 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2903 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2905 if tmax is not None:
2906 sql_time += ' AND ( time_seconds < ? ' \
2907 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2908 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2910 sql_limit = ''
2911 if limit is not None:
2912 sql_limit = ' LIMIT ?'
2913 args.append(limit)
2915 sql = self._sql('''
2916 SELECT
2917 time_seconds,
2918 time_offset,
2919 step
2920 FROM %(db)s.%(coverage)s
2921 WHERE
2922 kind_codes_id == ?
2923 ''' + sql_time + '''
2924 ORDER BY
2925 kind_codes_id,
2926 time_seconds,
2927 time_offset
2928 ''' + sql_limit)
2930 rows = list(self._conn.execute(sql, args))
2932 if limit is not None and len(rows) == limit:
2933 entry[-1] = None
2934 else:
2935 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2936 tlast = None
2937 if tmin is not None:
2938 entry[-1].append((tmin, counts))
2939 tlast = tmin
2941 for row in rows:
2942 t = model.tjoin(row[0], row[1])
2943 counts += row[2]
2944 entry[-1].append((t, counts))
2945 tlast = t
2947 if tmax is not None and (tlast is None or tlast != tmax):
2948 entry[-1].append((tmax, counts))
2950 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2952 return coverages
2954 def get_stationxml(
2955 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2956 level='response', on_error='raise'):
2958 '''
2959 Get station/channel/response metadata in StationXML representation.
2961 %(query_args)s
2963 :returns:
2964 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2965 '''
2967 if level not in ('network', 'station', 'channel', 'response'):
2968 raise ValueError('Invalid level: %s' % level)
2970 tmin, tmax, codes = self._get_selection_args(
2971 CHANNEL, obj, tmin, tmax, time, codes)
2973 def tts(t):
2974 if t is None:
2975 return '<none>'
2976 else:
2977 return util.tts(t, format='%Y-%m-%d %H:%M:%S')
2979 if on_error == 'ignore':
2980 def handle_error(exc):
2981 pass
2983 elif on_error == 'warn':
2984 def handle_error(exc):
2985 logger.warning(str(exc))
2987 elif on_error == 'raise':
2988 def handle_error(exc):
2989 raise exc
2991 def use_first(node_type_name, codes, k, group):
2992 if on_error == 'warn':
2993 logger.warning(
2994 'Duplicates for %s %s, %s - %s -> using first' % (
2995 node_type_name,
2996 '.'.join(codes),
2997 tts(k[0]), tts(k[1])))
2999 return group[0]
3001 def deduplicate(node_type_name, codes, nodes):
3002 groups = defaultdict(list)
3003 for node in nodes:
3004 k = (node.start_date, node.end_date)
3005 groups[k].append(node)
3007 return [
3008 use_first(node_type_name, codes, k, group)
3009 for (k, group) in groups.items()]
3011 filtering = CodesPatternFiltering(codes=codes)
3013 nslcs = list(set(
3014 codes.nslc for codes in
3015 filtering.filter(self.get_codes(kind='channel'))))
3017 from pyrocko.io import stationxml as sx
3019 networks = []
3020 for net, stas in prefix_tree(nslcs):
3021 network = sx.Network(code=net)
3022 networks.append(network)
3024 if level not in ('station', 'channel', 'response'):
3025 continue
3027 for sta, locs in stas:
3028 stations = self.get_stations(
3029 tmin=tmin,
3030 tmax=tmax,
3031 codes=(net, sta, '*'),
3032 model='stationxml')
3034 if on_error != 'raise':
3035 stations = deduplicate(
3036 'Station', (net, sta), stations)
3038 errors = sx.check_overlaps(
3039 'Station', (net, sta), stations)
3041 if errors:
3042 handle_error(error.Duplicate(
3043 'Overlapping/duplicate station info:\n %s'
3044 % '\n '.join(errors)))
3046 network.station_list.extend(stations)
3048 if level not in ('channel', 'response'):
3049 continue
3051 for loc, chas in locs:
3052 for cha, _ in chas:
3053 channels = self.get_channels(
3054 tmin=tmin,
3055 tmax=tmax,
3056 codes=(net, sta, loc, cha),
3057 model='stationxml')
3059 if on_error != 'raise':
3060 channels = deduplicate(
3061 'Channel', (net, sta, loc, cha), channels)
3063 errors = sx.check_overlaps(
3064 'Channel', (net, sta, loc, cha), channels)
3066 if errors:
3067 handle_error(error.Duplicate(
3068 'Overlapping/duplicate channel info:\n %s'
3069 % '\n '.join(errors)))
3071 for channel in channels:
3072 station = sx.find_containing(stations, channel)
3073 if station is not None:
3074 station.channel_list.append(channel)
3075 else:
3076 handle_error(error.NotAvailable(
3077 'No station or station epoch found '
3078 'for channel: %s' % '.'.join(
3079 (net, sta, loc, cha))))
3081 continue
3083 if level != 'response':
3084 continue
3086 try:
3087 response_sq, response_sx = self.get_response(
3088 codes=(net, sta, loc, cha),
3089 tmin=channel.start_date,
3090 tmax=channel.end_date,
3091 model='stationxml+',
3092 on_duplicate=on_error)
3094 except error.NotAvailable as e:
3095 handle_error(e)
3096 continue
3098 if not (
3099 sx.eq_open(
3100 channel.start_date, response_sq.tmin)
3101 and sx.eq_open(
3102 channel.end_date, response_sq.tmax)):
3104 handle_error(error.Inconsistencies(
3105 'Response time span does not match '
3106 'channel time span: %s' % '.'.join(
3107 (net, sta, loc, cha))))
3109 channel.response = response_sx
3111 return sx.FDSNStationXML(
3112 source='Generated by Pyrocko Squirrel.',
3113 network_list=networks)
3115 def add_operator(self, op):
3116 self._operators.append(op)
3118 def update_operator_mappings(self):
3119 available = self.get_codes(kind=('channel'))
3121 for operator in self._operators:
3122 operator.update_mappings(available, self._operator_registry)
3124 def iter_operator_mappings(self):
3125 for operator in self._operators:
3126 for in_codes, out_codes in operator.iter_mappings():
3127 yield operator, in_codes, out_codes
3129 def get_operator_mappings(self):
3130 return list(self.iter_operator_mappings())
3132 def get_operator(self, codes):
3133 try:
3134 return self._operator_registry[codes][0]
3135 except KeyError:
3136 return None
3138 def get_operator_group(self, codes):
3139 try:
3140 return self._operator_registry[codes]
3141 except KeyError:
3142 return None, (None, None, None)
3144 def iter_operator_codes(self):
3145 for _, _, out_codes in self.iter_operator_mappings():
3146 for codes in out_codes:
3147 yield codes
3149 def get_operator_codes(self):
3150 return list(self.iter_operator_codes())
3152 def print_tables(self, table_names=None, stream=None):
3153 '''
3154 Dump raw database tables in textual form (for debugging purposes).
3156 :param table_names:
3157 Names of tables to be dumped or ``None`` to dump all.
3158 :type table_names:
3159 :py:class:`list` of :py:class:`str`
3161 :param stream:
3162 Open file or ``None`` to dump to standard output.
3163 '''
3165 if stream is None:
3166 stream = sys.stdout
3168 if isinstance(table_names, str):
3169 table_names = [table_names]
3171 if table_names is None:
3172 table_names = [
3173 'selection_file_states',
3174 'selection_nuts',
3175 'selection_kind_codes_count',
3176 'files', 'nuts', 'kind_codes', 'kind_codes_count']
3178 m = {
3179 'selection_file_states': '%(db)s.%(file_states)s',
3180 'selection_nuts': '%(db)s.%(nuts)s',
3181 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
3182 'files': 'files',
3183 'nuts': 'nuts',
3184 'kind_codes': 'kind_codes',
3185 'kind_codes_count': 'kind_codes_count'}
3187 for table_name in table_names:
3188 self._database.print_table(
3189 m[table_name] % self._names, stream=stream)
3192class SquirrelStats(Object):
3193 '''
3194 Container to hold statistics about contents available from a Squirrel.
3196 See also :py:meth:`Squirrel.get_stats`.
3197 '''
3199 nfiles = Int.T(
3200 help='Number of files in selection.')
3201 nnuts = Int.T(
3202 help='Number of index nuts in selection.')
3203 codes = List.T(
3204 Tuple.T(content_t=String.T()),
3205 help='Available code sequences in selection, e.g. '
3206 '(agency, network, station, location) for stations nuts.')
3207 kinds = List.T(
3208 String.T(),
3209 help='Available content types in selection.')
3210 total_size = Int.T(
3211 help='Aggregated file size of files is selection.')
3212 counts = Dict.T(
3213 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
3214 help='Breakdown of how many nuts of any content type and code '
3215 'sequence are available in selection, ``counts[kind][codes]``.')
3216 time_spans = Dict.T(
3217 String.T(), Tuple.T(content_t=Timestamp.T()),
3218 help='Time spans by content type.')
3219 sources = List.T(
3220 String.T(),
3221 help='Descriptions of attached sources.')
3222 operators = List.T(
3223 String.T(),
3224 help='Descriptions of attached operators.')
3226 def __str__(self):
3227 kind_counts = dict(
3228 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
3230 scodes = model.codes_to_str_abbreviated(self.codes)
3232 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
3233 ' ' + s for s in self.sources)
3235 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
3236 ' ' + s for s in self.operators)
3238 def stime(t):
3239 return util.tts(t) if t is not None and t not in (
3240 model.g_tmin, model.g_tmax) else '<none>'
3242 def stable(rows):
3243 ns = [max(len(w) for w in col) for col in zip(*rows)]
3244 return '\n'.join(
3245 ' '.join(w.ljust(n) for n, w in zip(ns, row))
3246 for row in rows)
3248 def indent(s):
3249 return '\n'.join(' '+line for line in s.splitlines())
3251 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
3252 kind + ':',
3253 str(kind_counts[kind]),
3254 stime(self.time_spans[kind][0]),
3255 '-',
3256 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
3258 s = '''
3259Number of files: %i
3260Total size of known files: %s
3261Number of index nuts: %i
3262Available content kinds: %s
3263Available codes: %s
3264Sources: %s
3265Operators: %s''' % (
3266 self.nfiles,
3267 util.human_bytesize(self.total_size),
3268 self.nnuts,
3269 stspans, scodes, ssources, soperators)
3271 return s.lstrip()
3274__all__ = [
3275 'Squirrel',
3276 'SquirrelStats',
3277]