Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%
871 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-27 09:36 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-27 09:36 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Squirrel main classes.
8'''
10import sys
11import os
12import time
13import math
14import logging
15import threading
16import queue
17from collections import defaultdict
19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
20from pyrocko import util, trace
21from pyrocko import progress
22from pyrocko.plot import nice_time_tick_inc_approx_secs
24from . import model, io, cache, dataset
26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
28 codes_patterns_for_kind
29from .client import fdsn, catalog
30from .selection import Selection, filldocs
31from .database import abspath
32from .operators.base import Operator, CodesPatternFiltering
33from . import client, environment, error
35logger = logging.getLogger('psq.base')
37guts_prefix = 'squirrel'
40def nonef(f, xs):
41 xs_ = [x for x in xs if x is not None]
42 if xs_:
43 return f(xs_)
44 else:
45 return None
48def make_task(*args):
49 return progress.task(*args, logger=logger)
52def lpick(condition, seq):
53 ft = [], []
54 for ele in seq:
55 ft[int(bool(condition(ele)))].append(ele)
57 return ft
60def len_plural(obj):
61 return len(obj), '' if len(obj) == 1 else 's'
64def blocks(tmin, tmax, deltat, nsamples_block=100000):
65 tblock = nice_time_tick_inc_approx_secs(
66 util.to_time_float(deltat * nsamples_block))
67 iblock_min = int(math.floor(tmin / tblock))
68 iblock_max = int(math.ceil(tmax / tblock))
69 for iblock in range(iblock_min, iblock_max):
70 yield iblock * tblock, (iblock+1) * tblock
73def gaps(avail, tmin, tmax):
74 assert tmin < tmax
76 data = [(tmax, 1), (tmin, -1)]
77 for (tmin_a, tmax_a) in avail:
78 assert tmin_a < tmax_a
79 data.append((tmin_a, 1))
80 data.append((tmax_a, -1))
82 data.sort()
83 s = 1
84 gaps = []
85 tmin_g = None
86 for t, x in data:
87 if s == 1 and x == -1:
88 tmin_g = t
89 elif s == 0 and x == 1 and tmin_g is not None:
90 tmax_g = t
91 if tmin_g != tmax_g:
92 gaps.append((tmin_g, tmax_g))
94 s += x
96 return gaps
99def order_key(order):
100 return (order.codes, order.tmin, order.tmax)
103def _is_exact(pat):
104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
107def prefix_tree(tups):
108 if not tups:
109 return []
111 if len(tups[0]) == 1:
112 return sorted((tup[0], []) for tup in tups)
114 d = defaultdict(list)
115 for tup in tups:
116 d[tup[0]].append(tup[1:])
118 sub = []
119 for k in sorted(d.keys()):
120 sub.append((k, prefix_tree(d[k])))
122 return sub
125def match_time_span(tmin, tmax, obj):
126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \
127 and (tmin is None or obj.tmax is None or tmin < obj.tmax)
130class Batch(object):
131 '''
132 Batch of waveforms from window-wise data extraction.
134 Encapsulates state and results yielded for each window in window-wise
135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
137 *Attributes:*
139 .. py:attribute:: tmin
141 Start of this time window.
143 .. py:attribute:: tmax
145 End of this time window.
147 .. py:attribute:: i
149 Index of this time window in sequence.
151 .. py:attribute:: n
153 Total number of time windows in sequence.
155 .. py:attribute:: igroup
157 Index of this time window's sequence group.
159 .. py:attribute:: ngroups
161 Total number of sequence groups.
163 .. py:attribute:: traces
165 Extracted waveforms for this time window.
166 '''
168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces):
169 self.tmin = tmin
170 self.tmax = tmax
171 self.i = i
172 self.n = n
173 self.igroup = igroup
174 self.ngroups = ngroups
175 self.traces = traces
178class Squirrel(Selection):
179 '''
180 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
182 :param env:
183 Squirrel environment instance or directory path to use as starting
184 point for its detection. By default, the current directory is used as
185 starting point. When searching for a usable environment the directory
186 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
187 directory is used if it exists, otherwise the parent directories are
188 search upwards for the existence of such a directory. If no such
189 directory is found, the user's global Squirrel environment
190 ``'$HOME/.pyrocko/squirrel'`` is used.
191 :type env:
192 :py:class:`~pyrocko.squirrel.environment.Environment` or
193 :py:class:`str`
195 :param database:
196 Database instance or path to database. By default the
197 database found in the detected Squirrel environment is used.
198 :type database:
199 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
201 :param cache_path:
202 Directory path to use for data caching. By default, the ``'cache'``
203 directory in the detected Squirrel environment is used.
204 :type cache_path:
205 :py:class:`str`
207 :param persistent:
208 If given a name, create a persistent selection.
209 :type persistent:
210 :py:class:`str`
212 This is the central class of the Squirrel framework. It provides a unified
213 interface to query and access seismic waveforms, station meta-data and
214 event information from local file collections and remote data sources. For
215 prompt responses, a profound database setup is used under the hood. To
216 speed up assemblage of ad-hoc data selections, files are indexed on first
217 use and the extracted meta-data is remembered in the database for
218 subsequent accesses. Bulk data is lazily loaded from disk and remote
219 sources, just when requested. Once loaded, data is cached in memory to
220 expedite typical access patterns. Files and data sources can be dynamically
221 added to and removed from the Squirrel selection at runtime.
223 Queries are restricted to the contents of the files currently added to the
224 Squirrel selection (usually a subset of the file meta-information
225 collection in the database). This list of files is referred to here as the
226 "selection". By default, temporary tables are created in the attached
227 database to hold the names of the files in the selection as well as various
228 indices and counters. These tables are only visible inside the application
229 which created them and are deleted when the database connection is closed
230 or the application exits. To create a selection which is not deleted at
231 exit, supply a name to the ``persistent`` argument of the Squirrel
232 constructor. Persistent selections are shared among applications using the
233 same database.
235 **Method summary**
237 Some of the methods are implemented in :py:class:`Squirrel`'s base class
238 :py:class:`~pyrocko.squirrel.selection.Selection`.
240 .. autosummary::
242 ~Squirrel.add
243 ~Squirrel.add_source
244 ~Squirrel.add_fdsn
245 ~Squirrel.add_catalog
246 ~Squirrel.add_dataset
247 ~Squirrel.add_virtual
248 ~Squirrel.update
249 ~Squirrel.update_waveform_promises
250 ~Squirrel.advance_accessor
251 ~Squirrel.clear_accessor
252 ~Squirrel.reload
253 ~pyrocko.squirrel.selection.Selection.iter_paths
254 ~Squirrel.iter_nuts
255 ~Squirrel.iter_kinds
256 ~Squirrel.iter_deltats
257 ~Squirrel.iter_codes
258 ~pyrocko.squirrel.selection.Selection.get_paths
259 ~Squirrel.get_nuts
260 ~Squirrel.get_kinds
261 ~Squirrel.get_deltats
262 ~Squirrel.get_codes
263 ~Squirrel.get_counts
264 ~Squirrel.get_time_span
265 ~Squirrel.get_deltat_span
266 ~Squirrel.get_nfiles
267 ~Squirrel.get_nnuts
268 ~Squirrel.get_total_size
269 ~Squirrel.get_stats
270 ~Squirrel.get_content
271 ~Squirrel.get_stations
272 ~Squirrel.get_channels
273 ~Squirrel.get_responses
274 ~Squirrel.get_events
275 ~Squirrel.get_waveform_nuts
276 ~Squirrel.get_waveforms
277 ~Squirrel.chopper_waveforms
278 ~Squirrel.get_coverage
279 ~Squirrel.pile
280 ~Squirrel.snuffle
281 ~Squirrel.glob_codes
282 ~pyrocko.squirrel.selection.Selection.get_database
283 ~Squirrel.print_tables
284 '''
286 def __init__(
287 self, env=None, database=None, cache_path=None, persistent=None):
289 if not isinstance(env, environment.Environment):
290 env = environment.get_environment(env)
292 if database is None:
293 database = env.expand_path(env.database_path)
295 if cache_path is None:
296 cache_path = env.expand_path(env.cache_path)
298 if persistent is None:
299 persistent = env.persistent
301 Selection.__init__(
302 self, database=database, persistent=persistent)
304 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
306 self._content_caches = {
307 'waveform': cache.ContentCache(),
308 'default': cache.ContentCache()}
310 self._cache_path = cache_path
312 self._sources = []
313 self._operators = []
314 self._operator_registry = {}
316 self._pending_orders = []
318 self._pile = None
319 self._n_choppers_active = 0
321 self.downloads_enabled = True
323 self._names.update({
324 'nuts': self.name + '_nuts',
325 'kind_codes_count': self.name + '_kind_codes_count',
326 'coverage': self.name + '_coverage'})
328 with self.transaction('create tables') as cursor:
329 self._create_tables_squirrel(cursor)
331 def _create_tables_squirrel(self, cursor):
333 cursor.execute(self._register_table(self._sql(
334 '''
335 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
336 nut_id integer PRIMARY KEY,
337 file_id integer,
338 file_segment integer,
339 file_element integer,
340 kind_id integer,
341 kind_codes_id integer,
342 tmin_seconds integer,
343 tmin_offset integer,
344 tmax_seconds integer,
345 tmax_offset integer,
346 kscale integer)
347 ''')))
349 cursor.execute(self._register_table(self._sql(
350 '''
351 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
352 kind_codes_id integer PRIMARY KEY,
353 count integer)
354 ''')))
356 cursor.execute(self._sql(
357 '''
358 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
359 ON %(nuts)s (file_id, file_segment, file_element)
360 '''))
362 cursor.execute(self._sql(
363 '''
364 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
365 ON %(nuts)s (file_id)
366 '''))
368 cursor.execute(self._sql(
369 '''
370 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
371 ON %(nuts)s (kind_id, tmin_seconds)
372 '''))
374 cursor.execute(self._sql(
375 '''
376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
377 ON %(nuts)s (kind_id, tmax_seconds)
378 '''))
380 cursor.execute(self._sql(
381 '''
382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
383 ON %(nuts)s (kind_id, kscale, tmin_seconds)
384 '''))
386 cursor.execute(self._sql(
387 '''
388 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
389 BEFORE DELETE ON main.files FOR EACH ROW
390 BEGIN
391 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
392 END
393 '''))
395 # trigger only on size to make silent update of mtime possible
396 cursor.execute(self._sql(
397 '''
398 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
399 BEFORE UPDATE OF size ON main.files FOR EACH ROW
400 BEGIN
401 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
402 END
403 '''))
405 cursor.execute(self._sql(
406 '''
407 CREATE TRIGGER IF NOT EXISTS
408 %(db)s.%(file_states)s_delete_files
409 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
410 BEGIN
411 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
412 END
413 '''))
415 cursor.execute(self._sql(
416 '''
417 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
418 BEFORE INSERT ON %(nuts)s FOR EACH ROW
419 BEGIN
420 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
421 (new.kind_codes_id, 0);
422 UPDATE %(kind_codes_count)s
423 SET count = count + 1
424 WHERE new.kind_codes_id
425 == %(kind_codes_count)s.kind_codes_id;
426 END
427 '''))
429 cursor.execute(self._sql(
430 '''
431 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
432 BEFORE DELETE ON %(nuts)s FOR EACH ROW
433 BEGIN
434 UPDATE %(kind_codes_count)s
435 SET count = count - 1
436 WHERE old.kind_codes_id
437 == %(kind_codes_count)s.kind_codes_id;
438 END
439 '''))
441 cursor.execute(self._register_table(self._sql(
442 '''
443 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
444 kind_codes_id integer,
445 time_seconds integer,
446 time_offset integer,
447 step integer)
448 ''')))
450 cursor.execute(self._sql(
451 '''
452 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
453 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
454 '''))
456 cursor.execute(self._sql(
457 '''
458 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
459 AFTER INSERT ON %(nuts)s FOR EACH ROW
460 BEGIN
461 INSERT OR IGNORE INTO %(coverage)s VALUES
462 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
463 ;
464 UPDATE %(coverage)s
465 SET step = step + 1
466 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
467 AND new.tmin_seconds == %(coverage)s.time_seconds
468 AND new.tmin_offset == %(coverage)s.time_offset
469 ;
470 INSERT OR IGNORE INTO %(coverage)s VALUES
471 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
472 ;
473 UPDATE %(coverage)s
474 SET step = step - 1
475 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
476 AND new.tmax_seconds == %(coverage)s.time_seconds
477 AND new.tmax_offset == %(coverage)s.time_offset
478 ;
479 DELETE FROM %(coverage)s
480 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
481 AND new.tmin_seconds == %(coverage)s.time_seconds
482 AND new.tmin_offset == %(coverage)s.time_offset
483 AND step == 0
484 ;
485 DELETE FROM %(coverage)s
486 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
487 AND new.tmax_seconds == %(coverage)s.time_seconds
488 AND new.tmax_offset == %(coverage)s.time_offset
489 AND step == 0
490 ;
491 END
492 '''))
494 cursor.execute(self._sql(
495 '''
496 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
497 BEFORE DELETE ON %(nuts)s FOR EACH ROW
498 BEGIN
499 INSERT OR IGNORE INTO %(coverage)s VALUES
500 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
501 ;
502 UPDATE %(coverage)s
503 SET step = step - 1
504 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
505 AND old.tmin_seconds == %(coverage)s.time_seconds
506 AND old.tmin_offset == %(coverage)s.time_offset
507 ;
508 INSERT OR IGNORE INTO %(coverage)s VALUES
509 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
510 ;
511 UPDATE %(coverage)s
512 SET step = step + 1
513 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
514 AND old.tmax_seconds == %(coverage)s.time_seconds
515 AND old.tmax_offset == %(coverage)s.time_offset
516 ;
517 DELETE FROM %(coverage)s
518 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
519 AND old.tmin_seconds == %(coverage)s.time_seconds
520 AND old.tmin_offset == %(coverage)s.time_offset
521 AND step == 0
522 ;
523 DELETE FROM %(coverage)s
524 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
525 AND old.tmax_seconds == %(coverage)s.time_seconds
526 AND old.tmax_offset == %(coverage)s.time_offset
527 AND step == 0
528 ;
529 END
530 '''))
532 def _delete(self):
533 '''Delete database tables associated with this Squirrel.'''
535 with self.transaction('delete tables') as cursor:
536 for s in '''
537 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
538 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
539 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
540 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
541 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
542 DROP TABLE %(db)s.%(nuts)s;
543 DROP TABLE %(db)s.%(kind_codes_count)s;
544 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
545 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
546 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
547 '''.strip().splitlines():
549 cursor.execute(self._sql(s))
551 Selection._delete(self)
553 @filldocs
554 def add(self,
555 paths,
556 kinds=None,
557 format='detect',
558 include=None,
559 exclude=None,
560 check=True):
562 '''
563 Add files to the selection.
565 :param paths:
566 Iterator yielding paths to files or directories to be added to the
567 selection. Recurses into directories. If given a ``str``, it
568 is treated as a single path to be added.
569 :type paths:
570 :py:class:`list` of :py:class:`str`
572 :param kinds:
573 Content types to be made available through the Squirrel selection.
574 By default, all known content types are accepted.
575 :type kinds:
576 :py:class:`list` of :py:class:`str`
578 :param format:
579 File format identifier or ``'detect'`` to enable auto-detection
580 (available: %(file_formats)s).
581 :type format:
582 str
584 :param include:
585 If not ``None``, files are only included if their paths match the
586 given regular expression pattern.
587 :type format:
588 str
590 :param exclude:
591 If not ``None``, files are only included if their paths do not
592 match the given regular expression pattern.
593 :type format:
594 str
596 :param check:
597 If ``True``, all file modification times are checked to see if
598 cached information has to be updated (slow). If ``False``, only
599 previously unknown files are indexed and cached information is used
600 for known files, regardless of file state (fast, corrresponds to
601 Squirrel's ``--optimistic`` mode). File deletions will go
602 undetected in the latter case.
603 :type check:
604 bool
606 :Complexity:
607 O(log N)
608 '''
610 if isinstance(kinds, str):
611 kinds = (kinds,)
613 if isinstance(paths, str):
614 paths = [paths]
616 kind_mask = model.to_kind_mask(kinds)
618 Selection.add(
619 self, util.iter_select_files(
620 paths,
621 show_progress=False,
622 include=include,
623 exclude=exclude,
624 pass_through=lambda path: path.startswith('virtual:')
625 ), kind_mask, format)
627 self._load(check)
628 self._update_nuts()
630 def reload(self):
631 '''
632 Check for modifications and reindex modified files.
634 Based on file modification times.
635 '''
637 self._set_file_states_force_check()
638 self._load(check=True)
639 self._update_nuts()
641 def add_virtual(self, nuts, virtual_paths=None):
642 '''
643 Add content which is not backed by files.
645 :param nuts:
646 Content pieces to be added.
647 :type nuts:
648 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
650 :param virtual_paths:
651 List of virtual paths to prevent creating a temporary list of the
652 nuts while aggregating the file paths for the selection.
653 :type virtual_paths:
654 :py:class:`list` of :py:class:`str`
656 Stores to the main database and the selection.
657 '''
659 if isinstance(virtual_paths, str):
660 virtual_paths = [virtual_paths]
662 if virtual_paths is None:
663 if not isinstance(nuts, list):
664 nuts = list(nuts)
665 virtual_paths = set(nut.file_path for nut in nuts)
667 Selection.add(self, virtual_paths)
668 self.get_database().dig(nuts)
669 self._update_nuts()
671 def add_volatile(self, nuts):
672 if not isinstance(nuts, list):
673 nuts = list(nuts)
675 paths = list(set(nut.file_path for nut in nuts))
676 io.backends.virtual.add_nuts(nuts)
677 self.add_virtual(nuts, paths)
678 self._volatile_paths.extend(paths)
680 def add_volatile_waveforms(self, traces):
681 '''
682 Add in-memory waveforms which will be removed when the app closes.
683 '''
685 name = model.random_name()
687 path = 'virtual:volatile:%s' % name
689 nuts = []
690 for itr, tr in enumerate(traces):
691 assert tr.tmin <= tr.tmax
692 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
693 tmax_seconds, tmax_offset = model.tsplit(
694 tr.tmin + tr.data_len()*tr.deltat)
696 nuts.append(model.Nut(
697 file_path=path,
698 file_format='virtual',
699 file_segment=itr,
700 file_element=0,
701 file_mtime=0,
702 codes=tr.codes,
703 tmin_seconds=tmin_seconds,
704 tmin_offset=tmin_offset,
705 tmax_seconds=tmax_seconds,
706 tmax_offset=tmax_offset,
707 deltat=tr.deltat,
708 kind_id=to_kind_id('waveform'),
709 content=tr))
711 self.add_volatile(nuts)
712 return path
714 def _load(self, check):
715 for _ in io.iload(
716 self,
717 content=[],
718 skip_unchanged=True,
719 check=check):
720 pass
722 def _update_nuts(self, transaction=None):
723 transaction = transaction or self.transaction('update nuts')
724 with make_task('Aggregating selection') as task, \
725 transaction as cursor:
727 self._conn.set_progress_handler(task.update, 100000)
728 nrows = cursor.execute(self._sql(
729 '''
730 INSERT INTO %(db)s.%(nuts)s
731 SELECT NULL,
732 nuts.file_id, nuts.file_segment, nuts.file_element,
733 nuts.kind_id, nuts.kind_codes_id,
734 nuts.tmin_seconds, nuts.tmin_offset,
735 nuts.tmax_seconds, nuts.tmax_offset,
736 nuts.kscale
737 FROM %(db)s.%(file_states)s
738 INNER JOIN nuts
739 ON %(db)s.%(file_states)s.file_id == nuts.file_id
740 INNER JOIN kind_codes
741 ON nuts.kind_codes_id ==
742 kind_codes.kind_codes_id
743 WHERE %(db)s.%(file_states)s.file_state != 2
744 AND (((1 << kind_codes.kind_id)
745 & %(db)s.%(file_states)s.kind_mask) != 0)
746 ''')).rowcount
748 task.update(nrows)
749 self._set_file_states_known(transaction)
750 self._conn.set_progress_handler(None, 0)
752 def add_source(self, source, check=True):
753 '''
754 Add remote resource.
756 :param source:
757 Remote data access client instance.
758 :type source:
759 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
760 '''
762 self._sources.append(source)
763 source.setup(self, check=check)
765 def add_fdsn(self, *args, **kwargs):
766 '''
767 Add FDSN site for transparent remote data access.
769 Arguments are passed to
770 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
771 '''
773 self.add_source(fdsn.FDSNSource(*args, **kwargs))
775 def add_catalog(self, *args, **kwargs):
776 '''
777 Add online catalog for transparent event data access.
779 Arguments are passed to
780 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
781 '''
783 self.add_source(catalog.CatalogSource(*args, **kwargs))
785 def add_dataset(self, ds, check=True):
786 '''
787 Read dataset description from file and add its contents.
789 :param ds:
790 Path to dataset description file, dataset description object
791 or name of a built-in dataset. See
792 :py:mod:`~pyrocko.squirrel.dataset`.
793 :type ds:
794 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
796 :param check:
797 If ``True``, all file modification times are checked to see if
798 cached information has to be updated (slow). If ``False``, only
799 previously unknown files are indexed and cached information is used
800 for known files, regardless of file state (fast, corrresponds to
801 Squirrel's ``--optimistic`` mode). File deletions will go
802 undetected in the latter case.
803 :type check:
804 bool
805 '''
806 if isinstance(ds, str):
807 ds = dataset.read_dataset(ds)
809 ds.setup(self, check=check)
811 def _get_selection_args(
812 self, kind_id,
813 obj=None, tmin=None, tmax=None, time=None, codes=None):
815 if codes is not None:
816 codes = codes_patterns_for_kind(kind_id, codes)
818 if time is not None:
819 tmin = time
820 tmax = time
822 if obj is not None:
823 tmin = tmin if tmin is not None else obj.tmin
824 tmax = tmax if tmax is not None else obj.tmax
825 codes = codes if codes is not None else codes_patterns_for_kind(
826 kind_id, obj.codes)
828 return tmin, tmax, codes
830 def _get_selection_args_str(self, *args, **kwargs):
832 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
833 return 'tmin: %s, tmax: %s, codes: %s' % (
834 util.time_to_str(tmin) if tmin is not None else 'none',
835 util.time_to_str(tmax) if tmax is not None else 'none',
836 ','.join(str(entry) for entry in codes))
838 def _selection_args_to_kwargs(
839 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
841 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
843 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
845 tmin_seconds, tmin_offset = model.tsplit(tmin)
846 tmax_seconds, tmax_offset = model.tsplit(tmax)
847 if naiv:
848 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
849 args.append(tmax_seconds)
850 else:
851 tscale_edges = model.tscale_edges
852 tmin_cond = []
853 for kscale in range(tscale_edges.size + 1):
854 if kscale != tscale_edges.size:
855 tscale = int(tscale_edges[kscale])
856 tmin_cond.append('''
857 (%(db)s.%(nuts)s.kind_id = ?
858 AND %(db)s.%(nuts)s.kscale == ?
859 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
860 ''')
861 args.extend(
862 (to_kind_id(kind), kscale,
863 tmin_seconds - tscale - 1, tmax_seconds + 1))
865 else:
866 tmin_cond.append('''
867 (%(db)s.%(nuts)s.kind_id == ?
868 AND %(db)s.%(nuts)s.kscale == ?
869 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
870 ''')
872 args.extend(
873 (to_kind_id(kind), kscale, tmax_seconds + 1))
874 if tmin_cond:
875 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
877 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
878 args.append(tmin_seconds)
880 def _codes_match_sql(self, positive, kind_id, codes, cond, args):
881 pats = codes_patterns_for_kind(kind_id, codes)
882 if pats is None:
883 return
885 pats_exact = []
886 pats_nonexact = []
887 for pat in pats:
888 spat = pat.safe_str
889 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
891 codes_cond = []
892 if pats_exact:
893 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
894 '?'*len(pats_exact)))
896 args.extend(pats_exact)
898 if pats_nonexact:
899 codes_cond.append(' ( %s ) ' % ' OR '.join(
900 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)))
902 args.extend(pats_nonexact)
904 if codes_cond:
905 cond.append('%s ( %s )' % (
906 'NOT' if not positive else '',
907 ' OR '.join(codes_cond)))
909 def iter_nuts(
910 self, kind=None, tmin=None, tmax=None, codes=None,
911 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
912 naiv=False, kind_codes_ids=None, path=None, limit=None):
914 '''
915 Iterate over content entities matching given constraints.
917 :param kind:
918 Content kind (or kinds) to extract.
919 :type kind:
920 :py:class:`str`, :py:class:`list` of :py:class:`str`
922 :param tmin:
923 Start time of query interval.
924 :type tmin:
925 :py:func:`~pyrocko.util.get_time_float`
927 :param tmax:
928 End time of query interval.
929 :type tmax:
930 :py:func:`~pyrocko.util.get_time_float`
932 :param codes:
933 List of code patterns to query.
934 :type codes:
935 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
936 objects appropriate for the queried content type, or anything which
937 can be converted to such objects.
939 :param naiv:
940 Bypass time span lookup through indices (slow, for testing).
941 :type naiv:
942 :py:class:`bool`
944 :param kind_codes_ids:
945 Kind-codes IDs of contents to be retrieved (internal use).
946 :type kind_codes_ids:
947 :py:class:`list` of :py:class:`int`
949 :yields:
950 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
951 intersecting content.
953 :complexity:
954 O(log N) for the time selection part due to heavy use of database
955 indices.
957 Query time span is treated as a half-open interval ``[tmin, tmax)``.
958 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
959 closed-interval so that content intersecting with the time instant ``t
960 = tmin = tmax`` is returned (otherwise nothing would be returned as
961 ``[t, t)`` never matches anything).
963 Time spans of content entities to be matched are also treated as half
964 open intervals, e.g. content span ``[0, 1)`` is matched by query span
965 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
966 modified to closed-interval when the content time span is an empty
967 interval, i.e. to indicate a time instant. E.g. time instant 0 is
968 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
969 '''
971 if not isinstance(kind, str):
972 if kind is None:
973 kind = model.g_content_kinds
974 for kind_ in kind:
975 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
976 yield nut
978 return
980 kind_id = to_kind_id(kind)
982 cond = []
983 args = []
984 if tmin is not None or tmax is not None:
985 assert kind is not None
986 if tmin is None:
987 tmin = self.get_time_span()[0]
988 if tmax is None:
989 tmax = self.get_time_span()[1] + 1.0
991 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
993 cond.append('kind_codes.kind_id == ?')
994 args.append(kind_id)
996 if codes is not None:
997 self._codes_match_sql(True, kind_id, codes, cond, args)
999 if codes_exclude is not None:
1000 self._codes_match_sql(False, kind_id, codes_exclude, cond, args)
1002 if sample_rate_min is not None:
1003 cond.append('kind_codes.deltat <= ?')
1004 args.append(1.0/sample_rate_min)
1006 if sample_rate_max is not None:
1007 cond.append('? <= kind_codes.deltat')
1008 args.append(1.0/sample_rate_max)
1010 if kind_codes_ids is not None:
1011 cond.append(
1012 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
1013 '?'*len(kind_codes_ids)))
1015 args.extend(kind_codes_ids)
1017 db = self.get_database()
1018 if path is not None:
1019 cond.append('files.path == ?')
1020 args.append(db.relpath(abspath(path)))
1022 sql = ('''
1023 SELECT
1024 files.path,
1025 files.format,
1026 files.mtime,
1027 files.size,
1028 %(db)s.%(nuts)s.file_segment,
1029 %(db)s.%(nuts)s.file_element,
1030 kind_codes.kind_id,
1031 kind_codes.codes,
1032 %(db)s.%(nuts)s.tmin_seconds,
1033 %(db)s.%(nuts)s.tmin_offset,
1034 %(db)s.%(nuts)s.tmax_seconds,
1035 %(db)s.%(nuts)s.tmax_offset,
1036 kind_codes.deltat
1037 FROM files
1038 INNER JOIN %(db)s.%(nuts)s
1039 ON files.file_id == %(db)s.%(nuts)s.file_id
1040 INNER JOIN kind_codes
1041 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1042 ''')
1044 if cond:
1045 sql += ''' WHERE ''' + ' AND '.join(cond)
1047 if limit is not None:
1048 sql += ''' LIMIT %i''' % limit
1050 sql = self._sql(sql)
1051 if tmin is None and tmax is None:
1052 for row in self._conn.execute(sql, args):
1053 row = (db.abspath(row[0]),) + row[1:]
1054 nut = model.Nut(values_nocheck=row)
1055 yield nut
1056 else:
1057 assert tmin is not None and tmax is not None
1058 if tmin == tmax:
1059 for row in self._conn.execute(sql, args):
1060 row = (db.abspath(row[0]),) + row[1:]
1061 nut = model.Nut(values_nocheck=row)
1062 if (nut.tmin <= tmin < nut.tmax) \
1063 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1065 yield nut
1066 else:
1067 for row in self._conn.execute(sql, args):
1068 row = (db.abspath(row[0]),) + row[1:]
1069 nut = model.Nut(values_nocheck=row)
1070 if (tmin < nut.tmax and nut.tmin < tmax) \
1071 or (nut.tmin == nut.tmax
1072 and tmin <= nut.tmin < tmax):
1074 yield nut
1076 def get_nuts(self, *args, **kwargs):
1077 '''
1078 Get content entities matching given constraints.
1080 Like :py:meth:`iter_nuts` but returns results as a list.
1081 '''
1083 return list(self.iter_nuts(*args, **kwargs))
1085 def _split_nuts(
1086 self, kind, tmin=None, tmax=None, codes=None, path=None):
1088 kind_id = to_kind_id(kind)
1089 tmin_seconds, tmin_offset = model.tsplit(tmin)
1090 tmax_seconds, tmax_offset = model.tsplit(tmax)
1092 names_main_nuts = dict(self._names)
1093 names_main_nuts.update(db='main', nuts='nuts')
1095 db = self.get_database()
1097 def main_nuts(s):
1098 return s % names_main_nuts
1100 with self.transaction('split nuts') as cursor:
1101 # modify selection and main
1102 for sql_subst in [
1103 self._sql, main_nuts]:
1105 cond = []
1106 args = []
1108 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1110 if codes is not None:
1111 self._codes_match_sql(True, kind_id, codes, cond, args)
1113 if path is not None:
1114 cond.append('files.path == ?')
1115 args.append(db.relpath(abspath(path)))
1117 sql = sql_subst('''
1118 SELECT
1119 %(db)s.%(nuts)s.nut_id,
1120 %(db)s.%(nuts)s.tmin_seconds,
1121 %(db)s.%(nuts)s.tmin_offset,
1122 %(db)s.%(nuts)s.tmax_seconds,
1123 %(db)s.%(nuts)s.tmax_offset,
1124 kind_codes.deltat
1125 FROM files
1126 INNER JOIN %(db)s.%(nuts)s
1127 ON files.file_id == %(db)s.%(nuts)s.file_id
1128 INNER JOIN kind_codes
1129 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1130 WHERE ''' + ' AND '.join(cond)) # noqa
1132 insert = []
1133 delete = []
1134 for row in cursor.execute(sql, args):
1135 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1136 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1138 nut_tmin = model.tjoin(
1139 nut_tmin_seconds, nut_tmin_offset)
1140 nut_tmax = model.tjoin(
1141 nut_tmax_seconds, nut_tmax_offset)
1143 if nut_tmin < tmax and tmin < nut_tmax:
1144 if nut_tmin < tmin:
1145 insert.append((
1146 nut_tmin_seconds, nut_tmin_offset,
1147 tmin_seconds, tmin_offset,
1148 model.tscale_to_kscale(
1149 tmin_seconds - nut_tmin_seconds),
1150 nut_id))
1152 if tmax < nut_tmax:
1153 insert.append((
1154 tmax_seconds, tmax_offset,
1155 nut_tmax_seconds, nut_tmax_offset,
1156 model.tscale_to_kscale(
1157 nut_tmax_seconds - tmax_seconds),
1158 nut_id))
1160 delete.append((nut_id,))
1162 sql_add = '''
1163 INSERT INTO %(db)s.%(nuts)s (
1164 file_id, file_segment, file_element, kind_id,
1165 kind_codes_id, tmin_seconds, tmin_offset,
1166 tmax_seconds, tmax_offset, kscale )
1167 SELECT
1168 file_id, file_segment, file_element,
1169 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1170 FROM %(db)s.%(nuts)s
1171 WHERE nut_id == ?
1172 '''
1173 cursor.executemany(sql_subst(sql_add), insert)
1175 sql_delete = '''
1176 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1177 '''
1178 cursor.executemany(sql_subst(sql_delete), delete)
1180 def get_time_span(self, kinds=None, tight=True, dummy_limits=True):
1181 '''
1182 Get time interval over all content in selection.
1184 :param kinds:
1185 If not ``None``, restrict query to given content kinds.
1186 :type kind:
1187 list of str
1189 :complexity:
1190 O(1), independent of the number of nuts.
1192 :returns:
1193 ``(tmin, tmax)``, combined time interval of queried content kinds.
1194 '''
1196 sql_min = self._sql('''
1197 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1198 FROM %(db)s.%(nuts)s
1199 WHERE kind_id == ?
1200 AND tmin_seconds == (
1201 SELECT MIN(tmin_seconds)
1202 FROM %(db)s.%(nuts)s
1203 WHERE kind_id == ?)
1204 ''')
1206 sql_max = self._sql('''
1207 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1208 FROM %(db)s.%(nuts)s
1209 WHERE kind_id == ?
1210 AND tmax_seconds == (
1211 SELECT MAX(tmax_seconds)
1212 FROM %(db)s.%(nuts)s
1213 WHERE kind_id == ?)
1214 ''')
1216 gtmin = None
1217 gtmax = None
1219 if isinstance(kinds, str):
1220 kinds = [kinds]
1222 if kinds is None:
1223 kind_ids = model.g_content_kind_ids
1224 else:
1225 kind_ids = model.to_kind_ids(kinds)
1227 tmins = []
1228 tmaxs = []
1229 for kind_id in kind_ids:
1230 for tmin_seconds, tmin_offset in self._conn.execute(
1231 sql_min, (kind_id, kind_id)):
1232 tmins.append(model.tjoin(tmin_seconds, tmin_offset))
1234 for (tmax_seconds, tmax_offset) in self._conn.execute(
1235 sql_max, (kind_id, kind_id)):
1236 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset))
1238 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins]
1239 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs]
1241 if tight:
1242 gtmin = nonef(min, tmins)
1243 gtmax = nonef(max, tmaxs)
1244 else:
1245 gtmin = None if None in tmins else nonef(min, tmins)
1246 gtmax = None if None in tmaxs else nonef(max, tmaxs)
1248 if dummy_limits:
1249 if gtmin is None:
1250 gtmin = model.g_tmin
1251 if gtmax is None:
1252 gtmax = model.g_tmax
1254 return gtmin, gtmax
1256 def has(self, kinds):
1257 '''
1258 Check availability of given content kinds.
1260 :param kinds:
1261 Content kinds to query.
1262 :type kind:
1263 list of str
1265 :returns:
1266 ``True`` if any of the queried content kinds is available
1267 in the selection.
1268 '''
1269 self_tmin, self_tmax = self.get_time_span(
1270 kinds, dummy_limits=False)
1272 return None not in (self_tmin, self_tmax)
1274 def get_deltat_span(self, kind):
1275 '''
1276 Get min and max sampling interval of all content of given kind.
1278 :param kind:
1279 Content kind
1280 :type kind:
1281 str
1283 :returns: ``(deltat_min, deltat_max)``
1284 '''
1286 deltats = [
1287 deltat for deltat in self.get_deltats(kind)
1288 if deltat is not None]
1290 if deltats:
1291 return min(deltats), max(deltats)
1292 else:
1293 return None, None
1295 def iter_kinds(self, codes=None):
1296 '''
1297 Iterate over content types available in selection.
1299 :param codes:
1300 If given, get kinds only for selected codes identifier.
1301 Only a single identifier may be given here and no pattern matching
1302 is done, currently.
1303 :type codes:
1304 :py:class:`~pyrocko.squirrel.model.Codes`
1306 :yields:
1307 Available content kinds as :py:class:`str`.
1309 :complexity:
1310 O(1), independent of number of nuts.
1311 '''
1313 return self._database._iter_kinds(
1314 codes=codes,
1315 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1317 def iter_deltats(self, kind=None):
1318 '''
1319 Iterate over sampling intervals available in selection.
1321 :param kind:
1322 If given, get sampling intervals only for a given content type.
1323 :type kind:
1324 str
1326 :yields:
1327 :py:class:`float` values.
1329 :complexity:
1330 O(1), independent of number of nuts.
1331 '''
1332 return self._database._iter_deltats(
1333 kind=kind,
1334 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1336 def iter_codes(self, kind=None):
1337 '''
1338 Iterate over content identifier code sequences available in selection.
1340 :param kind:
1341 If given, get codes only for a given content type.
1342 :type kind:
1343 str
1345 :yields:
1346 :py:class:`tuple` of :py:class:`str`
1348 :complexity:
1349 O(1), independent of number of nuts.
1350 '''
1351 return self._database._iter_codes(
1352 kind=kind,
1353 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1355 def _iter_codes_info(self, kind=None, codes=None):
1356 '''
1357 Iterate over number of occurrences of any (kind, codes) combination.
1359 :param kind:
1360 If given, get counts only for selected content type.
1361 :type kind:
1362 str
1364 :yields:
1365 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1367 :complexity:
1368 O(1), independent of number of nuts.
1369 '''
1370 return self._database._iter_codes_info(
1371 kind=kind,
1372 codes=codes,
1373 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1375 def get_kinds(self, codes=None):
1376 '''
1377 Get content types available in selection.
1379 :param codes:
1380 If given, get kinds only for selected codes identifier.
1381 Only a single identifier may be given here and no pattern matching
1382 is done, currently.
1383 :type codes:
1384 :py:class:`~pyrocko.squirrel.model.Codes`
1386 :returns:
1387 Sorted list of available content types.
1388 :rtype:
1389 py:class:`list` of :py:class:`str`
1391 :complexity:
1392 O(1), independent of number of nuts.
1394 '''
1395 return sorted(list(self.iter_kinds(codes=codes)))
1397 def get_deltats(self, kind=None):
1398 '''
1399 Get sampling intervals available in selection.
1401 :param kind:
1402 If given, get sampling intervals only for selected content type.
1403 :type kind:
1404 str
1406 :complexity:
1407 O(1), independent of number of nuts.
1409 :returns: Sorted list of available sampling intervals.
1410 '''
1411 return sorted(list(self.iter_deltats(kind=kind)))
1413 def get_codes(self, kind=None):
1414 '''
1415 Get identifier code sequences available in selection.
1417 :param kind:
1418 If given, get codes only for selected content type.
1419 :type kind:
1420 str
1422 :complexity:
1423 O(1), independent of number of nuts.
1425 :returns: Sorted list of available codes as tuples of strings.
1426 '''
1427 return sorted(list(self.iter_codes(kind=kind)))
1429 def get_counts(self, kind=None):
1430 '''
1431 Get number of occurrences of any (kind, codes) combination.
1433 :param kind:
1434 If given, get codes only for selected content type.
1435 :type kind:
1436 str
1438 :complexity:
1439 O(1), independent of number of nuts.
1441 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1442 if kind is not ``None``
1443 '''
1444 d = {}
1445 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1446 if kind_id not in d:
1447 v = d[kind_id] = {}
1448 else:
1449 v = d[kind_id]
1451 if codes not in v:
1452 v[codes] = 0
1454 v[codes] += count
1456 if kind is not None:
1457 return d[to_kind_id(kind)]
1458 else:
1459 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1461 def glob_codes(self, kind, codes):
1462 '''
1463 Find codes matching given patterns.
1465 :param kind:
1466 Content kind to be queried.
1467 :type kind:
1468 str
1470 :param codes:
1471 List of code patterns to query.
1472 :type codes:
1473 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1474 objects appropriate for the queried content type, or anything which
1475 can be converted to such objects.
1477 :returns:
1478 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1479 '''
1481 kind_id = to_kind_id(kind)
1482 args = [kind_id]
1483 pats = codes_patterns_for_kind(kind_id, codes)
1485 if pats:
1486 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1487 ('kind_codes.codes GLOB ?',) * len(pats))
1489 args.extend(pat.safe_str for pat in pats)
1490 else:
1491 codes_cond = ''
1493 sql = self._sql('''
1494 SELECT kind_codes_id, codes, deltat FROM kind_codes
1495 WHERE
1496 kind_id == ? ''' + codes_cond)
1498 return list(map(list, self._conn.execute(sql, args)))
1500 def update(self, constraint=None, **kwargs):
1501 '''
1502 Update or partially update channel and event inventories.
1504 :param constraint:
1505 Selection of times or areas to be brought up to date.
1506 :type constraint:
1507 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1509 :param \\*\\*kwargs:
1510 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1512 This function triggers all attached remote sources, to check for
1513 updates in the meta-data. The sources will only submit queries when
1514 their expiration date has passed, or if the selection spans into
1515 previously unseen times or areas.
1516 '''
1518 if constraint is None:
1519 constraint = client.Constraint(**kwargs)
1521 task = make_task('Updating sources')
1522 for source in task(self._sources):
1523 source.update_channel_inventory(self, constraint)
1524 source.update_event_inventory(self, constraint)
1526 def update_waveform_promises(self, constraint=None, **kwargs):
1527 '''
1528 Permit downloading of remote waveforms.
1530 :param constraint:
1531 Remote waveforms compatible with the given constraint are enabled
1532 for download.
1533 :type constraint:
1534 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1536 :param \\*\\*kwargs:
1537 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1539 Calling this method permits Squirrel to download waveforms from remote
1540 sources when processing subsequent waveform requests. This works by
1541 inserting so called waveform promises into the database. It will look
1542 into the available channels for each remote source and create a promise
1543 for each channel compatible with the given constraint. If the promise
1544 then matches in a waveform request, Squirrel tries to download the
1545 waveform. If the download is successful, the downloaded waveform is
1546 added to the Squirrel and the promise is deleted. If the download
1547 fails, the promise is kept if the reason of failure looks like being
1548 temporary, e.g. because of a network failure. If the cause of failure
1549 however seems to be permanent, the promise is deleted so that no
1550 further attempts are made to download a waveform which might not be
1551 available from that server at all. To force re-scheduling after a
1552 permanent failure, call :py:meth:`update_waveform_promises`
1553 yet another time.
1554 '''
1556 if constraint is None:
1557 constraint = client.Constraint(**kwargs)
1559 for source in self._sources:
1560 source.update_waveform_promises(self, constraint)
1562 def remove_waveform_promises(self, from_database='selection'):
1563 '''
1564 Remove waveform promises from live selection or global database.
1566 Calling this function removes all waveform promises provided by the
1567 attached sources.
1569 :param from_database:
1570 Remove from live selection ``'selection'`` or global database
1571 ``'global'``.
1572 '''
1573 for source in self._sources:
1574 source.remove_waveform_promises(self, from_database=from_database)
1576 def update_responses(self, constraint=None, **kwargs):
1577 if constraint is None:
1578 constraint = client.Constraint(**kwargs)
1580 for source in self._sources:
1581 source.update_response_inventory(self, constraint)
1583 def get_nfiles(self):
1584 '''
1585 Get number of files in selection.
1586 '''
1588 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1589 for row in self._conn.execute(sql):
1590 return row[0]
1592 def get_nnuts(self):
1593 '''
1594 Get number of nuts in selection.
1595 '''
1597 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1598 for row in self._conn.execute(sql):
1599 return row[0]
1601 def get_total_size(self):
1602 '''
1603 Get aggregated file size available in selection.
1604 '''
1606 sql = self._sql('''
1607 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1608 INNER JOIN files
1609 ON %(db)s.%(file_states)s.file_id = files.file_id
1610 ''')
1612 for row in self._conn.execute(sql):
1613 return row[0] or 0
1615 def get_stats(self):
1616 '''
1617 Get statistics on contents available through this selection.
1618 '''
1620 kinds = self.get_kinds()
1621 time_spans = {}
1622 for kind in kinds:
1623 time_spans[kind] = self.get_time_span([kind])
1625 return SquirrelStats(
1626 nfiles=self.get_nfiles(),
1627 nnuts=self.get_nnuts(),
1628 kinds=kinds,
1629 codes=self.get_codes(),
1630 total_size=self.get_total_size(),
1631 counts=self.get_counts(),
1632 time_spans=time_spans,
1633 sources=[s.describe() for s in self._sources],
1634 operators=[op.describe() for op in self._operators])
1636 @filldocs
1637 def check(
1638 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1639 ignore=[]):
1640 '''
1641 Check for common data/metadata problems.
1643 %(query_args)s
1645 :param ignore:
1646 Problem types to be ignored.
1647 :type ignore:
1648 :class:`list` of :class:`str`
1649 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)
1651 :returns:
1652 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object
1653 containing the results of the check.
1655 See :py:func:`~pyrocko.squirrel.check.do_check`.
1656 '''
1658 from .check import do_check
1659 tmin, tmax, codes = self._get_selection_args(
1660 CHANNEL, obj, tmin, tmax, time, codes)
1662 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)
1664 def get_content(
1665 self,
1666 nut,
1667 cache_id='default',
1668 accessor_id='default',
1669 show_progress=False,
1670 model='squirrel'):
1672 '''
1673 Get and possibly load full content for a given index entry from file.
1675 Loads the actual content objects (channel, station, waveform, ...) from
1676 file. For efficiency, sibling content (all stuff in the same file
1677 segment) will also be loaded as a side effect. The loaded contents are
1678 cached in the Squirrel object.
1679 '''
1681 content_cache = self._content_caches[cache_id]
1682 if not content_cache.has(nut):
1684 for nut_loaded in io.iload(
1685 nut.file_path,
1686 segment=nut.file_segment,
1687 format=nut.file_format,
1688 database=self._database,
1689 update_selection=self,
1690 show_progress=show_progress):
1692 content_cache.put(nut_loaded)
1694 try:
1695 return content_cache.get(nut, accessor_id, model)
1697 except KeyError:
1698 raise error.NotAvailable(
1699 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1701 def advance_accessor(self, accessor_id='default', cache_id=None):
1702 '''
1703 Notify memory caches about consumer moving to a new data batch.
1705 :param accessor_id:
1706 Name of accessing consumer to be advanced.
1707 :type accessor_id:
1708 str
1710 :param cache_id:
1711 Name of cache to for which the accessor should be advanced. By
1712 default the named accessor is advanced in all registered caches.
1713 By default, two caches named ``'default'`` and ``'waveform'`` are
1714 available.
1715 :type cache_id:
1716 str
1718 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1719 Squirrel's memory caching works and can be tuned. Default behaviour is
1720 to release data when it has not been used in the latest data
1721 window/batch. If the accessor is never advanced, data is cached
1722 indefinitely - which is often desired e.g. for station meta-data.
1723 Methods for consecutive data traversal, like
1724 :py:meth:`chopper_waveforms` automatically advance and clear
1725 their accessor.
1726 '''
1727 for cache_ in (
1728 self._content_caches.keys()
1729 if cache_id is None
1730 else [cache_id]):
1732 self._content_caches[cache_].advance_accessor(accessor_id)
1734 def clear_accessor(self, accessor_id, cache_id=None):
1735 '''
1736 Notify memory caches about a consumer having finished.
1738 :param accessor_id:
1739 Name of accessor to be cleared.
1740 :type accessor_id:
1741 str
1743 :param cache_id:
1744 Name of cache for which the accessor should be cleared. By default
1745 the named accessor is cleared from all registered caches. By
1746 default, two caches named ``'default'`` and ``'waveform'`` are
1747 available.
1748 :type cache_id:
1749 str
1751 Calling this method clears all references to cache entries held by the
1752 named accessor. Cache entries are then freed if not referenced by any
1753 other accessor.
1754 '''
1756 for cache_ in (
1757 self._content_caches.keys()
1758 if cache_id is None
1759 else [cache_id]):
1761 self._content_caches[cache_].clear_accessor(accessor_id)
1763 def get_cache_stats(self, cache_id):
1764 return self._content_caches[cache_id].get_stats()
1766 @filldocs
1767 def get_stations(
1768 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1769 model='squirrel', on_error='raise'):
1771 '''
1772 Get stations matching given constraints.
1774 %(query_args)s
1776 :param model:
1777 Select object model for returned values: ``'squirrel'`` to get
1778 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1779 objects with channel information attached.
1780 :type model:
1781 str
1783 :returns:
1784 List of :py:class:`pyrocko.squirrel.Station
1785 <pyrocko.squirrel.model.Station>` objects by default or list of
1786 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1787 objects if ``model='pyrocko'`` is requested.
1789 See :py:meth:`iter_nuts` for details on time span matching.
1790 '''
1792 if model == 'pyrocko':
1793 return self._get_pyrocko_stations(
1794 obj, tmin, tmax, time, codes, on_error=on_error)
1795 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1796 args = self._get_selection_args(
1797 STATION, obj, tmin, tmax, time, codes)
1799 nuts = sorted(
1800 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1802 return [self.get_content(nut, model=model) for nut in nuts]
1803 else:
1804 raise ValueError('Invalid station model: %s' % model)
1806 @filldocs
1807 def get_channels(
1808 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1809 model='squirrel'):
1811 '''
1812 Get channels matching given constraints.
1814 %(query_args)s
1816 :returns:
1817 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1819 See :py:meth:`iter_nuts` for details on time span matching.
1820 '''
1822 args = self._get_selection_args(
1823 CHANNEL, obj, tmin, tmax, time, codes)
1825 nuts = sorted(
1826 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1828 return [self.get_content(nut, model=model) for nut in nuts]
1830 @filldocs
1831 def get_sensors(
1832 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1834 '''
1835 Get sensors matching given constraints.
1837 %(query_args)s
1839 :returns:
1840 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1842 See :py:meth:`iter_nuts` for details on time span matching.
1843 '''
1845 tmin, tmax, codes = self._get_selection_args(
1846 CHANNEL, obj, tmin, tmax, time, codes)
1848 if codes is not None:
1849 codes = codes_patterns_list(
1850 (entry.replace(channel=entry.channel[:-1] + '?')
1851 if entry.channel != '*' else entry)
1852 for entry in codes)
1854 nuts = sorted(
1855 self.iter_nuts(
1856 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1858 return [
1859 sensor for sensor in model.Sensor.from_channels(
1860 self.get_content(nut) for nut in nuts)
1861 if match_time_span(tmin, tmax, sensor)]
1863 @filldocs
1864 def get_responses(
1865 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1866 model='squirrel'):
1868 '''
1869 Get instrument responses matching given constraints.
1871 %(query_args)s
1873 :param model:
1874 Select data model for returned objects. Choices: ``'squirrel'``,
1875 ``'stationxml'``, ``'stationxml+'``. See return value description.
1876 :type model:
1877 str
1879 :returns:
1880 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model ==
1881 'squirrel'`` or list of
1882 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`
1883 if ``model == 'stationxml'`` or list of
1884 (:py:class:`~pyrocko.squirrel.model.Response`,
1885 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1886 'stationxml+'``.
1888 See :py:meth:`iter_nuts` for details on time span matching.
1889 '''
1891 args = self._get_selection_args(
1892 RESPONSE, obj, tmin, tmax, time, codes)
1894 nuts = sorted(
1895 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1897 return [self.get_content(nut, model=model) for nut in nuts]
1899 @filldocs
1900 def get_response(
1901 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1902 model='squirrel', on_duplicate='raise'):
1904 '''
1905 Get instrument response matching given constraints.
1907 %(query_args)s
1909 :param model:
1910 Select data model for returned object. Choices: ``'squirrel'``,
1911 ``'stationxml'``, ``'stationxml+'``. See return value description.
1912 :type model:
1913 str
1915 :param on_duplicate:
1916 Determines how duplicates/multiple matching responses are handled.
1917 Choices: ``'raise'`` - raise
1918 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a
1919 warning and return first match, ``'ignore'`` - silently return
1920 first match.
1921 :type on_duplicate:
1922 str
1924 :returns:
1925 :py:class:`~pyrocko.squirrel.model.Response` if
1926 ``model == 'squirrel'`` or
1927 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model ==
1928 'stationxml'`` or
1929 (:py:class:`~pyrocko.squirrel.model.Response`,
1930 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1931 'stationxml+'``.
1933 Same as :py:meth:`get_responses` but returning exactly one response.
1934 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is
1935 available. Duplicates are handled according to the ``on_duplicate``
1936 argument.
1938 See :py:meth:`iter_nuts` for details on time span matching.
1939 '''
1941 if model == 'stationxml':
1942 model_ = 'stationxml+'
1943 else:
1944 model_ = model
1946 responses = self.get_responses(
1947 obj, tmin, tmax, time, codes, model=model_)
1948 if len(responses) == 0:
1949 raise error.NotAvailable(
1950 'No instrument response available (%s).'
1951 % self._get_selection_args_str(
1952 RESPONSE, obj, tmin, tmax, time, codes))
1954 elif len(responses) > 1:
1956 if on_duplicate in ('raise', 'warn'):
1957 if model_ == 'squirrel':
1958 resps_sq = responses
1959 elif model_ == 'stationxml+':
1960 resps_sq = [resp[0] for resp in responses]
1961 else:
1962 raise ValueError('Invalid response model: %s' % model)
1964 rinfo = ':\n' + '\n'.join(
1965 ' ' + resp.summary for resp in resps_sq)
1967 message = \
1968 'Multiple instrument responses matching given ' \
1969 'constraints (%s)%s%s' % (
1970 self._get_selection_args_str(
1971 RESPONSE, obj, tmin, tmax, time, codes),
1972 ' -> using first' if on_duplicate == 'warn' else '',
1973 rinfo)
1975 if on_duplicate == 'raise':
1976 raise error.Duplicate(message)
1978 elif on_duplicate == 'warn':
1979 logger.warning(message)
1981 elif on_duplicate == 'ignore':
1982 pass
1984 else:
1985 ValueError(
1986 'Invalid argument for on_duplicate: %s' % on_duplicate)
1988 if model == 'stationxml':
1989 return responses[0][1]
1990 else:
1991 return responses[0]
1993 @filldocs
1994 def get_events(
1995 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1997 '''
1998 Get events matching given constraints.
2000 %(query_args)s
2002 :returns:
2003 List of :py:class:`~pyrocko.model.event.Event` objects.
2005 See :py:meth:`iter_nuts` for details on time span matching.
2006 '''
2008 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
2009 nuts = sorted(
2010 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
2012 return [self.get_content(nut) for nut in nuts]
2014 def _redeem_promises(self, *args, order_only=False):
2016 def split_promise(order, tmax=None):
2017 self._split_nuts(
2018 'waveform_promise',
2019 order.tmin, tmax if tmax is not None else order.tmax,
2020 codes=order.codes,
2021 path=order.source_id)
2023 tmin, tmax = args[:2]
2025 waveforms = list(self.iter_nuts('waveform', *args))
2026 promises = list(self.iter_nuts('waveform_promise', *args))
2028 codes_to_avail = defaultdict(list)
2029 for nut in waveforms:
2030 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
2032 def tts(x):
2033 if isinstance(x, tuple):
2034 return tuple(tts(e) for e in x)
2035 elif isinstance(x, list):
2036 return list(tts(e) for e in x)
2037 else:
2038 return util.time_to_str(x)
2040 now = time.time()
2041 orders = []
2042 for promise in promises:
2043 waveforms_avail = codes_to_avail[promise.codes]
2044 for block_tmin, block_tmax in blocks(
2045 max(tmin, promise.tmin),
2046 min(tmax, promise.tmax),
2047 promise.deltat):
2049 if block_tmin > now:
2050 continue
2052 orders.append(
2053 WaveformOrder(
2054 source_id=promise.file_path,
2055 codes=promise.codes,
2056 tmin=block_tmin,
2057 tmax=block_tmax,
2058 deltat=promise.deltat,
2059 gaps=gaps(waveforms_avail, block_tmin, block_tmax),
2060 time_created=now))
2062 orders_noop, orders = lpick(lambda order: order.gaps, orders)
2064 order_keys_noop = set(order_key(order) for order in orders_noop)
2065 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
2066 logger.info(
2067 'Waveform orders already satisified with cached/local data: '
2068 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
2070 for order in orders_noop:
2071 split_promise(order)
2073 if order_only:
2074 if orders:
2075 self._pending_orders.extend(orders)
2076 logger.info(
2077 'Enqueuing %i waveform order%s.'
2078 % len_plural(orders))
2079 return
2080 else:
2081 if self._pending_orders:
2082 orders.extend(self._pending_orders)
2083 logger.info(
2084 'Adding %i previously enqueued order%s.'
2085 % len_plural(self._pending_orders))
2087 self._pending_orders = []
2089 source_ids = []
2090 sources = {}
2091 for source in self._sources:
2092 if isinstance(source, fdsn.FDSNSource):
2093 source_ids.append(source._source_id)
2094 sources[source._source_id] = source
2096 source_priority = dict(
2097 (source_id, i) for (i, source_id) in enumerate(source_ids))
2099 order_groups = defaultdict(list)
2100 for order in orders:
2101 order_groups[order_key(order)].append(order)
2103 for k, order_group in order_groups.items():
2104 order_group.sort(
2105 key=lambda order: source_priority[order.source_id])
2107 n_order_groups = len(order_groups)
2109 if len(order_groups) != 0 or len(orders) != 0:
2110 logger.info(
2111 'Waveform orders standing for download: %i (%i)'
2112 % (len(order_groups), len(orders)))
2114 task = make_task('Waveform orders processed', n_order_groups)
2115 else:
2116 task = None
2118 def release_order_group(order):
2119 okey = order_key(order)
2120 for followup in order_groups[okey]:
2121 if followup is not order:
2122 split_promise(followup)
2124 del order_groups[okey]
2126 if task:
2127 task.update(n_order_groups - len(order_groups))
2129 def noop(order):
2130 pass
2132 def success(order, trs):
2133 release_order_group(order)
2134 if order.is_near_real_time():
2135 if not trs:
2136 return # keep promise when no data received at real time
2137 else:
2138 tmax = max(tr.tmax+tr.deltat for tr in trs)
2139 tmax = order.tmin \
2140 + round((tmax - order.tmin) / order.deltat) \
2141 * order.deltat
2142 split_promise(order, tmax)
2143 else:
2144 split_promise(order)
2146 def batch_add(paths):
2147 self.add(paths)
2149 calls = queue.Queue()
2151 def enqueue(f):
2152 def wrapper(*args):
2153 calls.put((f, args))
2155 return wrapper
2157 while order_groups:
2159 orders_now = []
2160 empty = []
2161 for k, order_group in order_groups.items():
2162 try:
2163 orders_now.append(order_group.pop(0))
2164 except IndexError:
2165 empty.append(k)
2167 for k in empty:
2168 del order_groups[k]
2170 by_source_id = defaultdict(list)
2171 for order in orders_now:
2172 by_source_id[order.source_id].append(order)
2174 threads = []
2175 for source_id in by_source_id:
2176 def download():
2177 try:
2178 sources[source_id].download_waveforms(
2179 by_source_id[source_id],
2180 success=enqueue(success),
2181 error_permanent=enqueue(split_promise),
2182 error_temporary=noop,
2183 batch_add=enqueue(batch_add))
2185 finally:
2186 calls.put(None)
2188 if len(by_source_id) > 1:
2189 thread = threading.Thread(target=download)
2190 thread.start()
2191 threads.append(thread)
2192 else:
2193 download()
2194 calls.put(None)
2196 ndone = 0
2197 while ndone < len(by_source_id):
2198 ret = calls.get()
2199 if ret is None:
2200 ndone += 1
2201 else:
2202 ret[0](*ret[1])
2204 for thread in threads:
2205 thread.join()
2207 if task:
2208 task.update(n_order_groups - len(order_groups))
2210 if task:
2211 task.done()
2213 @filldocs
2214 def get_waveform_nuts(
2215 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2216 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2217 order_only=False):
2219 '''
2220 Get waveform content entities matching given constraints.
2222 %(query_args)s
2224 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2225 resolves matching waveform promises (downloads waveforms from remote
2226 sources).
2228 See :py:meth:`iter_nuts` for details on time span matching.
2229 '''
2231 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2233 if self.downloads_enabled:
2234 self._redeem_promises(
2235 *args,
2236 codes_exclude,
2237 sample_rate_min,
2238 sample_rate_max,
2239 order_only=order_only)
2241 nuts = sorted(
2242 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2244 return nuts
2246 @filldocs
2247 def have_waveforms(
2248 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2250 '''
2251 Check if any waveforms or waveform promises are available for given
2252 constraints.
2254 %(query_args)s
2255 '''
2257 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2258 return bool(list(
2259 self.iter_nuts('waveform', *args, limit=1))) \
2260 or (self.downloads_enabled and bool(list(
2261 self.iter_nuts('waveform_promise', *args, limit=1))))
2263 @filldocs
2264 def get_waveforms(
2265 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2266 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2267 uncut=False, want_incomplete=True, degap=True,
2268 maxgap=5, maxlap=None, snap=None, include_last=False,
2269 load_data=True, accessor_id='default', operator_params=None,
2270 order_only=False, channel_priorities=None):
2272 '''
2273 Get waveforms matching given constraints.
2275 %(query_args)s
2277 :param sample_rate_min:
2278 Consider only waveforms with a sampling rate equal to or greater
2279 than the given value [Hz].
2280 :type sample_rate_min:
2281 float
2283 :param sample_rate_max:
2284 Consider only waveforms with a sampling rate equal to or less than
2285 the given value [Hz].
2286 :type sample_rate_max:
2287 float
2289 :param uncut:
2290 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2291 and to disable degapping/deoverlapping. Returns untouched traces as
2292 they are read from file segment. File segments are always read in
2293 their entirety.
2294 :type uncut:
2295 bool
2297 :param want_incomplete:
2298 If ``True``, gappy/incomplete traces are included in the result.
2299 :type want_incomplete:
2300 bool
2302 :param degap:
2303 If ``True``, connect traces and remove gaps and overlaps.
2304 :type degap:
2305 bool
2307 :param maxgap:
2308 Maximum gap size in samples which is filled with interpolated
2309 samples when ``degap`` is ``True``.
2310 :type maxgap:
2311 int
2313 :param maxlap:
2314 Maximum overlap size in samples which is removed when ``degap`` is
2315 ``True``.
2316 :type maxlap:
2317 int
2319 :param snap:
2320 Rounding functions used when computing sample index from time
2321 instance, for trace start and trace end, respectively. By default,
2322 ``(round, round)`` is used.
2323 :type snap:
2324 :py:class:`tuple` of 2 callables
2326 :param include_last:
2327 If ``True``, add one more sample to the returned traces (the sample
2328 which would be the first sample of a query with ``tmin`` set to the
2329 current value of ``tmax``).
2330 :type include_last:
2331 bool
2333 :param load_data:
2334 If ``True``, waveform data samples are read from files (or cache).
2335 If ``False``, meta-information-only traces are returned (dummy
2336 traces with no data samples).
2337 :type load_data:
2338 bool
2340 :param accessor_id:
2341 Name of consumer on who's behalf data is accessed. Used in cache
2342 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2343 to distinguish different points of extraction for the decision of
2344 when to release cached waveform data. Should be used when data is
2345 alternately extracted from more than one region / selection.
2346 :type accessor_id:
2347 str
2349 :param channel_priorities:
2350 List of band/instrument code combinations to try. For example,
2351 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and
2352 then fallback to ``BH?`` if these are not available. The first
2353 matching waveforms are returned. Use in combination with
2354 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample
2355 rate.
2356 :type channel_priorities:
2357 :py:class:`list` of :py:class:`str`
2359 See :py:meth:`iter_nuts` for details on time span matching.
2361 Loaded data is kept in memory (at least) until
2362 :py:meth:`clear_accessor` has been called or
2363 :py:meth:`advance_accessor` has been called two consecutive times
2364 without data being accessed between the two calls (by this accessor).
2365 Data may still be further kept in the memory cache if held alive by
2366 consumers with a different ``accessor_id``.
2367 '''
2369 tmin, tmax, codes = self._get_selection_args(
2370 WAVEFORM, obj, tmin, tmax, time, codes)
2372 if channel_priorities is not None:
2373 return self._get_waveforms_prioritized(
2374 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude,
2375 sample_rate_min=sample_rate_min,
2376 sample_rate_max=sample_rate_max,
2377 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2378 maxgap=maxgap, maxlap=maxlap, snap=snap,
2379 include_last=include_last, load_data=load_data,
2380 accessor_id=accessor_id, operator_params=operator_params,
2381 order_only=order_only, channel_priorities=channel_priorities)
2383 kinds = ['waveform']
2384 if self.downloads_enabled:
2385 kinds.append('waveform_promise')
2387 self_tmin, self_tmax = self.get_time_span(kinds)
2389 if None in (self_tmin, self_tmax):
2390 logger.warning(
2391 'No waveforms available.')
2392 return []
2394 tmin = tmin if tmin is not None else self_tmin
2395 tmax = tmax if tmax is not None else self_tmax
2397 if codes is not None and len(codes) == 1:
2398 # TODO: fix for multiple / mixed codes
2399 operator = self.get_operator(codes[0])
2400 if operator is not None:
2401 return operator.get_waveforms(
2402 self, codes[0],
2403 tmin=tmin, tmax=tmax,
2404 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2405 maxgap=maxgap, maxlap=maxlap, snap=snap,
2406 include_last=include_last, load_data=load_data,
2407 accessor_id=accessor_id, params=operator_params)
2409 nuts = self.get_waveform_nuts(
2410 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min,
2411 sample_rate_max, order_only=order_only)
2413 if order_only:
2414 return []
2416 if load_data:
2417 traces = [
2418 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2420 else:
2421 traces = [
2422 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2424 if uncut:
2425 return traces
2427 if snap is None:
2428 snap = (round, round)
2430 chopped = []
2431 for tr in traces:
2432 if not load_data and tr.ydata is not None:
2433 tr = tr.copy(data=False)
2434 tr.ydata = None
2436 try:
2437 chopped.append(tr.chop(
2438 tmin, tmax,
2439 inplace=False,
2440 snap=snap,
2441 include_last=include_last))
2443 except trace.NoData:
2444 pass
2446 processed = self._process_chopped(
2447 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2449 return processed
2451 def _get_waveforms_prioritized(
2452 self, tmin=None, tmax=None, codes=None, codes_exclude=None,
2453 channel_priorities=None, **kwargs):
2455 trs_all = []
2456 codes_have = set()
2457 for channel in channel_priorities:
2458 assert len(channel) == 2
2459 if codes is not None:
2460 codes_now = [
2461 codes_.replace(channel=channel+'?') for codes_ in codes]
2462 else:
2463 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?')
2465 codes_exclude_now = list(set(
2466 codes_.replace(channel=channel+codes_.channel[-1])
2467 for codes_ in codes_have))
2469 if codes_exclude:
2470 codes_exclude_now.extend(codes_exclude)
2472 trs = self.get_waveforms(
2473 tmin=tmin,
2474 tmax=tmax,
2475 codes=codes_now,
2476 codes_exclude=codes_exclude_now,
2477 **kwargs)
2479 codes_have.update(set(tr.codes for tr in trs))
2480 trs_all.extend(trs)
2482 return trs_all
2484 @filldocs
2485 def chopper_waveforms(
2486 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2487 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2488 tinc=None, tpad=0.,
2489 want_incomplete=True, snap_window=False,
2490 degap=True, maxgap=5, maxlap=None,
2491 snap=None, include_last=False, load_data=True,
2492 accessor_id=None, clear_accessor=True, operator_params=None,
2493 grouping=None, channel_priorities=None):
2495 '''
2496 Iterate window-wise over waveform archive.
2498 %(query_args)s
2500 :param tinc:
2501 Time increment (window shift time) (default uses ``tmax-tmin``).
2502 :type tinc:
2503 :py:func:`~pyrocko.util.get_time_float`
2505 :param tpad:
2506 Padding time appended on either side of the data window (window
2507 overlap is ``2*tpad``).
2508 :type tpad:
2509 :py:func:`~pyrocko.util.get_time_float`
2511 :param want_incomplete:
2512 If ``True``, gappy/incomplete traces are included in the result.
2513 :type want_incomplete:
2514 bool
2516 :param snap_window:
2517 If ``True``, start time windows at multiples of tinc with respect
2518 to system time zero.
2519 :type snap_window:
2520 bool
2522 :param degap:
2523 If ``True``, connect traces and remove gaps and overlaps.
2524 :type degap:
2525 bool
2527 :param maxgap:
2528 Maximum gap size in samples which is filled with interpolated
2529 samples when ``degap`` is ``True``.
2530 :type maxgap:
2531 int
2533 :param maxlap:
2534 Maximum overlap size in samples which is removed when ``degap`` is
2535 ``True``.
2536 :type maxlap:
2537 int
2539 :param snap:
2540 Rounding functions used when computing sample index from time
2541 instance, for trace start and trace end, respectively. By default,
2542 ``(round, round)`` is used.
2543 :type snap:
2544 :py:class:`tuple` of 2 callables
2546 :param include_last:
2547 If ``True``, add one more sample to the returned traces (the sample
2548 which would be the first sample of a query with ``tmin`` set to the
2549 current value of ``tmax``).
2550 :type include_last:
2551 bool
2553 :param load_data:
2554 If ``True``, waveform data samples are read from files (or cache).
2555 If ``False``, meta-information-only traces are returned (dummy
2556 traces with no data samples).
2557 :type load_data:
2558 bool
2560 :param accessor_id:
2561 Name of consumer on who's behalf data is accessed. Used in cache
2562 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2563 to distinguish different points of extraction for the decision of
2564 when to release cached waveform data. Should be used when data is
2565 alternately extracted from more than one region / selection.
2566 :type accessor_id:
2567 str
2569 :param clear_accessor:
2570 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2571 chopper finishes. Set to ``False`` to keep loaded waveforms in
2572 memory when the generator returns.
2573 :type clear_accessor:
2574 bool
2576 :param grouping:
2577 By default, traversal over the data is over time and all matching
2578 traces of a time window are yielded. Using this option, it is
2579 possible to traverse the data first by group (e.g. station or
2580 network) and second by time. This can reduce the number of traces
2581 in each batch and thus reduce the memory footprint of the process.
2582 :type grouping:
2583 :py:class:`~pyrocko.squirrel.operators.base.Grouping`
2585 :yields:
2586 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2587 extracted time window.
2589 See :py:meth:`iter_nuts` for details on time span matching.
2590 '''
2592 tmin, tmax, codes = self._get_selection_args(
2593 WAVEFORM, obj, tmin, tmax, time, codes)
2595 kinds = ['waveform']
2596 if self.downloads_enabled:
2597 kinds.append('waveform_promise')
2599 self_tmin, self_tmax = self.get_time_span(kinds)
2601 if None in (self_tmin, self_tmax):
2602 logger.warning(
2603 'Content has undefined time span. No waveforms and no '
2604 'waveform promises?')
2605 return
2607 if snap_window and tinc is not None:
2608 tmin = tmin if tmin is not None else self_tmin
2609 tmax = tmax if tmax is not None else self_tmax
2610 tmin = math.floor(tmin / tinc) * tinc
2611 tmax = math.ceil(tmax / tinc) * tinc
2612 else:
2613 tmin = tmin if tmin is not None else self_tmin + tpad
2614 tmax = tmax if tmax is not None else self_tmax - tpad
2616 tinc = tinc if tinc is not None else tmax - tmin
2618 try:
2619 if accessor_id is None:
2620 accessor_id = 'chopper%i' % self._n_choppers_active
2622 self._n_choppers_active += 1
2624 eps = tinc * 1e-6
2625 if tinc != 0.0:
2626 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2627 else:
2628 nwin = 1
2630 if grouping is None:
2631 codes_list = [codes]
2632 else:
2633 operator = Operator(
2634 filtering=CodesPatternFiltering(codes=codes),
2635 grouping=grouping)
2637 available = set(self.get_codes(kind='waveform'))
2638 if self.downloads_enabled:
2639 available.update(self.get_codes(kind='waveform_promise'))
2640 operator.update_mappings(sorted(available))
2642 codes_list = [
2643 codes_patterns_list(scl)
2644 for scl in operator.iter_in_codes()]
2646 ngroups = len(codes_list)
2647 for igroup, scl in enumerate(codes_list):
2648 for iwin in range(nwin):
2649 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2651 chopped = self.get_waveforms(
2652 tmin=wmin-tpad,
2653 tmax=wmax+tpad,
2654 codes=scl,
2655 codes_exclude=codes_exclude,
2656 sample_rate_min=sample_rate_min,
2657 sample_rate_max=sample_rate_max,
2658 snap=snap,
2659 include_last=include_last,
2660 load_data=load_data,
2661 want_incomplete=want_incomplete,
2662 degap=degap,
2663 maxgap=maxgap,
2664 maxlap=maxlap,
2665 accessor_id=accessor_id,
2666 operator_params=operator_params,
2667 channel_priorities=channel_priorities)
2669 self.advance_accessor(accessor_id)
2671 yield Batch(
2672 tmin=wmin,
2673 tmax=wmax,
2674 i=iwin,
2675 n=nwin,
2676 igroup=igroup,
2677 ngroups=ngroups,
2678 traces=chopped)
2680 finally:
2681 self._n_choppers_active -= 1
2682 if clear_accessor:
2683 self.clear_accessor(accessor_id, 'waveform')
2685 def _process_chopped(
2686 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2688 chopped.sort(key=lambda a: a.full_id)
2689 if degap:
2690 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2692 if not want_incomplete:
2693 chopped_weeded = []
2694 for tr in chopped:
2695 emin = tr.tmin - tmin
2696 emax = tr.tmax + tr.deltat - tmax
2697 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2698 chopped_weeded.append(tr)
2700 elif degap:
2701 if (0. < emin <= 5. * tr.deltat
2702 and -5. * tr.deltat <= emax < 0.):
2704 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2705 chopped_weeded.append(tr)
2707 chopped = chopped_weeded
2709 return chopped
2711 def _get_pyrocko_stations(
2712 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2713 on_error='raise'):
2715 from pyrocko import model as pmodel
2717 if codes is not None:
2718 codes = codes_patterns_for_kind(STATION, codes)
2720 by_nsl = defaultdict(lambda: (list(), list()))
2721 for station in self.get_stations(obj, tmin, tmax, time, codes):
2722 sargs = station._get_pyrocko_station_args()
2723 by_nsl[station.codes.nsl][0].append(sargs)
2725 if codes is not None:
2726 codes = [model.CodesNSLCE(c) for c in codes]
2728 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2729 sargs = channel._get_pyrocko_station_args()
2730 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2731 sargs_list.append(sargs)
2732 channels_list.append(channel)
2734 pstations = []
2735 nsls = list(by_nsl.keys())
2736 nsls.sort()
2737 for nsl in nsls:
2738 sargs_list, channels_list = by_nsl[nsl]
2739 sargs = util.consistency_merge(
2740 [('',) + x for x in sargs_list],
2741 error=on_error)
2743 by_c = defaultdict(list)
2744 for ch in channels_list:
2745 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2747 chas = list(by_c.keys())
2748 chas.sort()
2749 pchannels = []
2750 for cha in chas:
2751 list_of_cargs = by_c[cha]
2752 cargs = util.consistency_merge(
2753 [('',) + x for x in list_of_cargs],
2754 error=on_error)
2755 pchannels.append(pmodel.Channel(*cargs))
2757 pstations.append(
2758 pmodel.Station(*sargs, channels=pchannels))
2760 return pstations
2762 @property
2763 def pile(self):
2765 '''
2766 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2768 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2769 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2770 but uses the fluffy power of the Squirrel under the hood.
2772 This interface can be used as a drop-in replacement for piles which are
2773 used in existing scripts and programs for efficient waveform data
2774 access. The Squirrel-based pile scales better for large datasets. Newer
2775 scripts should use Squirrel's native methods to avoid the emulation
2776 overhead.
2777 '''
2778 from . import pile
2780 if self._pile is None:
2781 self._pile = pile.Pile(self)
2783 return self._pile
2785 def snuffle(self, **kwargs):
2786 '''
2787 Look at dataset in Snuffler.
2788 '''
2789 self.pile.snuffle(**kwargs)
2791 def _gather_codes_keys(self, kind, gather, selector):
2792 return set(
2793 gather(codes)
2794 for codes in self.iter_codes(kind)
2795 if selector is None or selector(codes))
2797 def __str__(self):
2798 return str(self.get_stats())
2800 def get_coverage(
2801 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2803 '''
2804 Get coverage information.
2806 Get information about strips of gapless data coverage.
2808 :param kind:
2809 Content kind to be queried.
2810 :type kind:
2811 str
2813 :param tmin:
2814 Start time of query interval.
2815 :type tmin:
2816 :py:func:`~pyrocko.util.get_time_float`
2818 :param tmax:
2819 End time of query interval.
2820 :type tmax:
2821 :py:func:`~pyrocko.util.get_time_float`
2823 :param codes:
2824 If given, restrict query to given content codes patterns.
2825 :type codes:
2826 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2827 objects appropriate for the queried content type, or anything which
2828 can be converted to such objects.
2830 :param limit:
2831 Limit query to return only up to a given maximum number of entries
2832 per matching time series (without setting this option, very gappy
2833 data could cause the query to execute for a very long time).
2834 :type limit:
2835 int
2837 :returns:
2838 Information about time spans covered by the requested time series
2839 data.
2840 :rtype:
2841 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage`
2842 '''
2844 tmin_seconds, tmin_offset = model.tsplit(tmin)
2845 tmax_seconds, tmax_offset = model.tsplit(tmax)
2846 kind_id = to_kind_id(kind)
2848 codes_info = list(self._iter_codes_info(kind=kind))
2850 kdata_all = []
2851 if codes is None:
2852 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2853 kdata_all.append(
2854 (codes_entry, kind_codes_id, codes_entry, deltat))
2856 else:
2857 for codes_entry in codes:
2858 pattern = to_codes(kind_id, codes_entry)
2859 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2860 if model.match_codes(pattern, codes_entry):
2861 kdata_all.append(
2862 (pattern, kind_codes_id, codes_entry, deltat))
2864 kind_codes_ids = [x[1] for x in kdata_all]
2866 counts_at_tmin = {}
2867 if tmin is not None:
2868 for nut in self.iter_nuts(
2869 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2871 k = nut.codes, nut.deltat
2872 if k not in counts_at_tmin:
2873 counts_at_tmin[k] = 0
2875 counts_at_tmin[k] += 1
2877 coverages = []
2878 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2879 entry = [pattern, codes_entry, deltat, None, None, []]
2880 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2881 sql = self._sql('''
2882 SELECT
2883 time_seconds,
2884 time_offset
2885 FROM %(db)s.%(coverage)s
2886 WHERE
2887 kind_codes_id == ?
2888 ORDER BY
2889 kind_codes_id ''' + order + ''',
2890 time_seconds ''' + order + ''',
2891 time_offset ''' + order + '''
2892 LIMIT 1
2893 ''')
2895 for row in self._conn.execute(sql, [kind_codes_id]):
2896 entry[3+i] = model.tjoin(row[0], row[1])
2898 if None in entry[3:5]:
2899 continue
2901 args = [kind_codes_id]
2903 sql_time = ''
2904 if tmin is not None:
2905 # intentionally < because (== tmin) is queried from nuts
2906 sql_time += ' AND ( ? < time_seconds ' \
2907 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2908 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2910 if tmax is not None:
2911 sql_time += ' AND ( time_seconds < ? ' \
2912 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2913 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2915 sql_limit = ''
2916 if limit is not None:
2917 sql_limit = ' LIMIT ?'
2918 args.append(limit)
2920 sql = self._sql('''
2921 SELECT
2922 time_seconds,
2923 time_offset,
2924 step
2925 FROM %(db)s.%(coverage)s
2926 WHERE
2927 kind_codes_id == ?
2928 ''' + sql_time + '''
2929 ORDER BY
2930 kind_codes_id,
2931 time_seconds,
2932 time_offset
2933 ''' + sql_limit)
2935 rows = list(self._conn.execute(sql, args))
2937 if limit is not None and len(rows) == limit:
2938 entry[-1] = None
2939 else:
2940 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2941 tlast = None
2942 if tmin is not None:
2943 entry[-1].append((tmin, counts))
2944 tlast = tmin
2946 for row in rows:
2947 t = model.tjoin(row[0], row[1])
2948 counts += row[2]
2949 entry[-1].append((t, counts))
2950 tlast = t
2952 if tmax is not None and (tlast is None or tlast != tmax):
2953 entry[-1].append((tmax, counts))
2955 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2957 return coverages
2959 def get_stationxml(
2960 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2961 level='response', on_error='raise'):
2963 '''
2964 Get station/channel/response metadata in StationXML representation.
2966 %(query_args)s
2968 :returns:
2969 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2970 '''
2972 if level not in ('network', 'station', 'channel', 'response'):
2973 raise ValueError('Invalid level: %s' % level)
2975 tmin, tmax, codes = self._get_selection_args(
2976 CHANNEL, obj, tmin, tmax, time, codes)
2978 def tts(t):
2979 if t is None:
2980 return '<none>'
2981 else:
2982 return util.tts(t, format='%Y-%m-%d %H:%M:%S')
2984 if on_error == 'ignore':
2985 def handle_error(exc):
2986 pass
2988 elif on_error == 'warn':
2989 def handle_error(exc):
2990 logger.warning(str(exc))
2992 elif on_error == 'raise':
2993 def handle_error(exc):
2994 raise exc
2996 def use_first(node_type_name, codes, k, group):
2997 if on_error == 'warn':
2998 logger.warning(
2999 'Duplicates for %s %s, %s - %s -> using first' % (
3000 node_type_name,
3001 '.'.join(codes),
3002 tts(k[0]), tts(k[1])))
3004 return group[0]
3006 def deduplicate(node_type_name, codes, nodes):
3007 groups = defaultdict(list)
3008 for node in nodes:
3009 k = (node.start_date, node.end_date)
3010 groups[k].append(node)
3012 return [
3013 use_first(node_type_name, codes, k, group)
3014 for (k, group) in groups.items()]
3016 filtering = CodesPatternFiltering(codes=codes)
3018 nslcs = list(set(
3019 codes.nslc for codes in
3020 filtering.filter(self.get_codes(kind='channel'))))
3022 from pyrocko.io import stationxml as sx
3024 networks = []
3025 for net, stas in prefix_tree(nslcs):
3026 network = sx.Network(code=net)
3027 networks.append(network)
3029 if level not in ('station', 'channel', 'response'):
3030 continue
3032 for sta, locs in stas:
3033 stations = self.get_stations(
3034 tmin=tmin,
3035 tmax=tmax,
3036 codes=(net, sta, '*'),
3037 model='stationxml')
3039 if on_error != 'raise':
3040 stations = deduplicate(
3041 'Station', (net, sta), stations)
3043 errors = sx.check_overlaps(
3044 'Station', (net, sta), stations)
3046 if errors:
3047 handle_error(error.Duplicate(
3048 'Overlapping/duplicate station info:\n %s'
3049 % '\n '.join(errors)))
3051 network.station_list.extend(stations)
3053 if level not in ('channel', 'response'):
3054 continue
3056 for loc, chas in locs:
3057 for cha, _ in chas:
3058 channels = self.get_channels(
3059 tmin=tmin,
3060 tmax=tmax,
3061 codes=(net, sta, loc, cha),
3062 model='stationxml')
3064 if on_error != 'raise':
3065 channels = deduplicate(
3066 'Channel', (net, sta, loc, cha), channels)
3068 errors = sx.check_overlaps(
3069 'Channel', (net, sta, loc, cha), channels)
3071 if errors:
3072 handle_error(error.Duplicate(
3073 'Overlapping/duplicate channel info:\n %s'
3074 % '\n '.join(errors)))
3076 for channel in channels:
3077 station = sx.find_containing(stations, channel)
3078 if station is not None:
3079 station.channel_list.append(channel)
3080 else:
3081 handle_error(error.NotAvailable(
3082 'No station or station epoch found '
3083 'for channel: %s' % '.'.join(
3084 (net, sta, loc, cha))))
3086 continue
3088 if level != 'response':
3089 continue
3091 try:
3092 response_sq, response_sx = self.get_response(
3093 codes=(net, sta, loc, cha),
3094 tmin=channel.start_date,
3095 tmax=channel.end_date,
3096 model='stationxml+',
3097 on_duplicate=on_error)
3099 except error.NotAvailable as e:
3100 handle_error(e)
3101 continue
3103 if not (
3104 sx.eq_open(
3105 channel.start_date, response_sq.tmin)
3106 and sx.eq_open(
3107 channel.end_date, response_sq.tmax)):
3109 handle_error(error.Inconsistencies(
3110 'Response time span does not match '
3111 'channel time span: %s' % '.'.join(
3112 (net, sta, loc, cha))))
3114 channel.response = response_sx
3116 return sx.FDSNStationXML(
3117 source='Generated by Pyrocko Squirrel.',
3118 network_list=networks)
3120 def add_operator(self, op):
3121 self._operators.append(op)
3123 def update_operator_mappings(self):
3124 available = self.get_codes(kind=('channel'))
3126 for operator in self._operators:
3127 operator.update_mappings(available, self._operator_registry)
3129 def iter_operator_mappings(self):
3130 for operator in self._operators:
3131 for in_codes, out_codes in operator.iter_mappings():
3132 yield operator, in_codes, out_codes
3134 def get_operator_mappings(self):
3135 return list(self.iter_operator_mappings())
3137 def get_operator(self, codes):
3138 try:
3139 return self._operator_registry[codes][0]
3140 except KeyError:
3141 return None
3143 def get_operator_group(self, codes):
3144 try:
3145 return self._operator_registry[codes]
3146 except KeyError:
3147 return None, (None, None, None)
3149 def iter_operator_codes(self):
3150 for _, _, out_codes in self.iter_operator_mappings():
3151 for codes in out_codes:
3152 yield codes
3154 def get_operator_codes(self):
3155 return list(self.iter_operator_codes())
3157 def print_tables(self, table_names=None, stream=None):
3158 '''
3159 Dump raw database tables in textual form (for debugging purposes).
3161 :param table_names:
3162 Names of tables to be dumped or ``None`` to dump all.
3163 :type table_names:
3164 :py:class:`list` of :py:class:`str`
3166 :param stream:
3167 Open file or ``None`` to dump to standard output.
3168 '''
3170 if stream is None:
3171 stream = sys.stdout
3173 if isinstance(table_names, str):
3174 table_names = [table_names]
3176 if table_names is None:
3177 table_names = [
3178 'selection_file_states',
3179 'selection_nuts',
3180 'selection_kind_codes_count',
3181 'files', 'nuts', 'kind_codes', 'kind_codes_count']
3183 m = {
3184 'selection_file_states': '%(db)s.%(file_states)s',
3185 'selection_nuts': '%(db)s.%(nuts)s',
3186 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
3187 'files': 'files',
3188 'nuts': 'nuts',
3189 'kind_codes': 'kind_codes',
3190 'kind_codes_count': 'kind_codes_count'}
3192 for table_name in table_names:
3193 self._database.print_table(
3194 m[table_name] % self._names, stream=stream)
3197class SquirrelStats(Object):
3198 '''
3199 Container to hold statistics about contents available from a Squirrel.
3201 See also :py:meth:`Squirrel.get_stats`.
3202 '''
3204 nfiles = Int.T(
3205 help='Number of files in selection.')
3206 nnuts = Int.T(
3207 help='Number of index nuts in selection.')
3208 codes = List.T(
3209 Tuple.T(content_t=String.T()),
3210 help='Available code sequences in selection, e.g. '
3211 '(agency, network, station, location) for stations nuts.')
3212 kinds = List.T(
3213 String.T(),
3214 help='Available content types in selection.')
3215 total_size = Int.T(
3216 help='Aggregated file size of files is selection.')
3217 counts = Dict.T(
3218 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
3219 help='Breakdown of how many nuts of any content type and code '
3220 'sequence are available in selection, ``counts[kind][codes]``.')
3221 time_spans = Dict.T(
3222 String.T(), Tuple.T(content_t=Timestamp.T()),
3223 help='Time spans by content type.')
3224 sources = List.T(
3225 String.T(),
3226 help='Descriptions of attached sources.')
3227 operators = List.T(
3228 String.T(),
3229 help='Descriptions of attached operators.')
3231 def __str__(self):
3232 kind_counts = dict(
3233 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
3235 scodes = model.codes_to_str_abbreviated(self.codes)
3237 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
3238 ' ' + s for s in self.sources)
3240 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
3241 ' ' + s for s in self.operators)
3243 def stime(t):
3244 return util.tts(t) if t is not None and t not in (
3245 model.g_tmin, model.g_tmax) else '<none>'
3247 def stable(rows):
3248 ns = [max(len(w) for w in col) for col in zip(*rows)]
3249 return '\n'.join(
3250 ' '.join(w.ljust(n) for n, w in zip(ns, row))
3251 for row in rows)
3253 def indent(s):
3254 return '\n'.join(' '+line for line in s.splitlines())
3256 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
3257 kind + ':',
3258 str(kind_counts[kind]),
3259 stime(self.time_spans[kind][0]),
3260 '-',
3261 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
3263 s = '''
3264Number of files: %i
3265Total size of known files: %s
3266Number of index nuts: %i
3267Available content kinds: %s
3268Available codes: %s
3269Sources: %s
3270Operators: %s''' % (
3271 self.nfiles,
3272 util.human_bytesize(self.total_size),
3273 self.nnuts,
3274 stspans, scodes, ssources, soperators)
3276 return s.lstrip()
3279__all__ = [
3280 'Squirrel',
3281 'SquirrelStats',
3282]