Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%
877 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-05 09:37 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-05 09:37 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Squirrel main classes.
8'''
10import sys
11import os
12import time
13import math
14import logging
15import threading
16import queue
17from collections import defaultdict
19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
20from pyrocko import util, trace
21from pyrocko import progress
22from pyrocko.plot import nice_time_tick_inc_approx_secs
24from . import model, io, cache, dataset
26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
28 codes_patterns_for_kind
29from .client import fdsn, catalog
30from .selection import Selection, filldocs
31from .database import abspath
32from .operators.base import Operator, CodesPatternFiltering
33from . import client, environment, error
35logger = logging.getLogger('psq.base')
37guts_prefix = 'squirrel'
40def nonef(f, xs):
41 xs_ = [x for x in xs if x is not None]
42 if xs_:
43 return f(xs_)
44 else:
45 return None
48def make_task(*args):
49 return progress.task(*args, logger=logger)
52def lpick(condition, seq):
53 ft = [], []
54 for ele in seq:
55 ft[int(bool(condition(ele)))].append(ele)
57 return ft
60def len_plural(obj):
61 return len(obj), '' if len(obj) == 1 else 's'
64def blocks(tmin, tmax, deltat, nsamples_block=100000):
65 tblock = nice_time_tick_inc_approx_secs(
66 util.to_time_float(deltat * nsamples_block))
67 iblock_min = int(math.floor(tmin / tblock))
68 iblock_max = int(math.ceil(tmax / tblock))
69 for iblock in range(iblock_min, iblock_max):
70 yield iblock * tblock, (iblock+1) * tblock
73def gaps(avail, tmin, tmax):
74 assert tmin < tmax
76 data = [(tmax, 1), (tmin, -1)]
77 for (tmin_a, tmax_a) in avail:
78 assert tmin_a < tmax_a
79 data.append((tmin_a, 1))
80 data.append((tmax_a, -1))
82 data.sort()
83 s = 1
84 gaps = []
85 tmin_g = None
86 for t, x in data:
87 if s == 1 and x == -1:
88 tmin_g = t
89 elif s == 0 and x == 1 and tmin_g is not None:
90 tmax_g = t
91 if tmin_g != tmax_g:
92 gaps.append((tmin_g, tmax_g))
94 s += x
96 return gaps
99def order_key(order):
100 return (order.codes, order.tmin, order.tmax)
103def _is_exact(pat):
104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
107def prefix_tree(tups):
108 if not tups:
109 return []
111 if len(tups[0]) == 1:
112 return sorted((tup[0], []) for tup in tups)
114 d = defaultdict(list)
115 for tup in tups:
116 d[tup[0]].append(tup[1:])
118 sub = []
119 for k in sorted(d.keys()):
120 sub.append((k, prefix_tree(d[k])))
122 return sub
125def match_time_span(tmin, tmax, obj):
126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \
127 and (tmin is None or obj.tmax is None or tmin < obj.tmax)
130class Batch(object):
131 '''
132 Batch of waveforms from window-wise data extraction.
134 Encapsulates state and results yielded for each window in window-wise
135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
137 *Attributes:*
139 .. py:attribute:: tmin
141 Start of this time window.
143 .. py:attribute:: tmax
145 End of this time window.
147 .. py:attribute:: i
149 Index of this time window in sequence.
151 .. py:attribute:: n
153 Total number of time windows in sequence.
155 .. py:attribute:: igroup
157 Index of this time window's sequence group.
159 .. py:attribute:: ngroups
161 Total number of sequence groups.
163 .. py:attribute:: traces
165 Extracted waveforms for this time window.
166 '''
168 def __init__(self, tmin, tmax, i, n, igroup, ngroups, traces):
169 self.tmin = tmin
170 self.tmax = tmax
171 self.i = i
172 self.n = n
173 self.igroup = igroup
174 self.ngroups = ngroups
175 self.traces = traces
177 def as_multitrace(self):
178 from pyrocko import multitrace
180 data, codes, tmin, deltat = trace.merge_traces_data_as_array(
181 self.traces, tmin=self.tmin, tmax=self.tmax)
183 return multitrace.MultiTrace(
184 data=data,
185 codes=codes,
186 tmin=tmin,
187 deltat=deltat)
190class Squirrel(Selection):
191 '''
192 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
194 :param env:
195 Squirrel environment instance or directory path to use as starting
196 point for its detection. By default, the current directory is used as
197 starting point. When searching for a usable environment the directory
198 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
199 directory is used if it exists, otherwise the parent directories are
200 search upwards for the existence of such a directory. If no such
201 directory is found, the user's global Squirrel environment
202 ``'$HOME/.pyrocko/squirrel'`` is used.
203 :type env:
204 :py:class:`~pyrocko.squirrel.environment.Environment` or
205 :py:class:`str`
207 :param database:
208 Database instance or path to database. By default the
209 database found in the detected Squirrel environment is used.
210 :type database:
211 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
213 :param cache_path:
214 Directory path to use for data caching. By default, the ``'cache'``
215 directory in the detected Squirrel environment is used.
216 :type cache_path:
217 :py:class:`str`
219 :param persistent:
220 If given a name, create a persistent selection.
221 :type persistent:
222 :py:class:`str`
224 This is the central class of the Squirrel framework. It provides a unified
225 interface to query and access seismic waveforms, station meta-data and
226 event information from local file collections and remote data sources. For
227 prompt responses, a profound database setup is used under the hood. To
228 speed up assemblage of ad-hoc data selections, files are indexed on first
229 use and the extracted meta-data is remembered in the database for
230 subsequent accesses. Bulk data is lazily loaded from disk and remote
231 sources, just when requested. Once loaded, data is cached in memory to
232 expedite typical access patterns. Files and data sources can be dynamically
233 added to and removed from the Squirrel selection at runtime.
235 Queries are restricted to the contents of the files currently added to the
236 Squirrel selection (usually a subset of the file meta-information
237 collection in the database). This list of files is referred to here as the
238 "selection". By default, temporary tables are created in the attached
239 database to hold the names of the files in the selection as well as various
240 indices and counters. These tables are only visible inside the application
241 which created them and are deleted when the database connection is closed
242 or the application exits. To create a selection which is not deleted at
243 exit, supply a name to the ``persistent`` argument of the Squirrel
244 constructor. Persistent selections are shared among applications using the
245 same database.
247 **Method summary**
249 Some of the methods are implemented in :py:class:`Squirrel`'s base class
250 :py:class:`~pyrocko.squirrel.selection.Selection`.
252 .. autosummary::
254 ~Squirrel.add
255 ~Squirrel.add_source
256 ~Squirrel.add_fdsn
257 ~Squirrel.add_catalog
258 ~Squirrel.add_dataset
259 ~Squirrel.add_virtual
260 ~Squirrel.update
261 ~Squirrel.update_waveform_promises
262 ~Squirrel.advance_accessor
263 ~Squirrel.clear_accessor
264 ~Squirrel.reload
265 ~pyrocko.squirrel.selection.Selection.iter_paths
266 ~Squirrel.iter_nuts
267 ~Squirrel.iter_kinds
268 ~Squirrel.iter_deltats
269 ~Squirrel.iter_codes
270 ~pyrocko.squirrel.selection.Selection.get_paths
271 ~Squirrel.get_nuts
272 ~Squirrel.get_kinds
273 ~Squirrel.get_deltats
274 ~Squirrel.get_codes
275 ~Squirrel.get_counts
276 ~Squirrel.get_time_span
277 ~Squirrel.get_deltat_span
278 ~Squirrel.get_nfiles
279 ~Squirrel.get_nnuts
280 ~Squirrel.get_total_size
281 ~Squirrel.get_stats
282 ~Squirrel.get_content
283 ~Squirrel.get_stations
284 ~Squirrel.get_channels
285 ~Squirrel.get_responses
286 ~Squirrel.get_events
287 ~Squirrel.get_waveform_nuts
288 ~Squirrel.get_waveforms
289 ~Squirrel.chopper_waveforms
290 ~Squirrel.get_coverage
291 ~Squirrel.pile
292 ~Squirrel.snuffle
293 ~Squirrel.glob_codes
294 ~pyrocko.squirrel.selection.Selection.get_database
295 ~Squirrel.print_tables
296 '''
298 def __init__(
299 self, env=None, database=None, cache_path=None, persistent=None):
301 if not isinstance(env, environment.Environment):
302 env = environment.get_environment(env)
304 if database is None:
305 database = env.expand_path(env.database_path)
307 if cache_path is None:
308 cache_path = env.expand_path(env.cache_path)
310 if persistent is None:
311 persistent = env.persistent
313 Selection.__init__(
314 self, database=database, persistent=persistent)
316 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
318 self._content_caches = {
319 'waveform': cache.ContentCache(),
320 'default': cache.ContentCache()}
322 self._cache_path = cache_path
324 self._sources = []
325 self._operators = []
326 self._operator_registry = {}
328 self._pending_orders = []
330 self._pile = None
331 self._n_choppers_active = 0
333 self.downloads_enabled = True
335 self._names.update({
336 'nuts': self.name + '_nuts',
337 'kind_codes_count': self.name + '_kind_codes_count',
338 'coverage': self.name + '_coverage'})
340 with self.transaction('create tables') as cursor:
341 self._create_tables_squirrel(cursor)
343 def _create_tables_squirrel(self, cursor):
345 cursor.execute(self._register_table(self._sql(
346 '''
347 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
348 nut_id integer PRIMARY KEY,
349 file_id integer,
350 file_segment integer,
351 file_element integer,
352 kind_id integer,
353 kind_codes_id integer,
354 tmin_seconds integer,
355 tmin_offset integer,
356 tmax_seconds integer,
357 tmax_offset integer,
358 kscale integer)
359 ''')))
361 cursor.execute(self._register_table(self._sql(
362 '''
363 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
364 kind_codes_id integer PRIMARY KEY,
365 count integer)
366 ''')))
368 cursor.execute(self._sql(
369 '''
370 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
371 ON %(nuts)s (file_id, file_segment, file_element)
372 '''))
374 cursor.execute(self._sql(
375 '''
376 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
377 ON %(nuts)s (file_id)
378 '''))
380 cursor.execute(self._sql(
381 '''
382 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
383 ON %(nuts)s (kind_id, tmin_seconds)
384 '''))
386 cursor.execute(self._sql(
387 '''
388 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
389 ON %(nuts)s (kind_id, tmax_seconds)
390 '''))
392 cursor.execute(self._sql(
393 '''
394 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
395 ON %(nuts)s (kind_id, kscale, tmin_seconds)
396 '''))
398 cursor.execute(self._sql(
399 '''
400 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
401 BEFORE DELETE ON main.files FOR EACH ROW
402 BEGIN
403 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
404 END
405 '''))
407 # trigger only on size to make silent update of mtime possible
408 cursor.execute(self._sql(
409 '''
410 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
411 BEFORE UPDATE OF size ON main.files FOR EACH ROW
412 BEGIN
413 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
414 END
415 '''))
417 cursor.execute(self._sql(
418 '''
419 CREATE TRIGGER IF NOT EXISTS
420 %(db)s.%(file_states)s_delete_files
421 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
422 BEGIN
423 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
424 END
425 '''))
427 cursor.execute(self._sql(
428 '''
429 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
430 BEFORE INSERT ON %(nuts)s FOR EACH ROW
431 BEGIN
432 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
433 (new.kind_codes_id, 0);
434 UPDATE %(kind_codes_count)s
435 SET count = count + 1
436 WHERE new.kind_codes_id
437 == %(kind_codes_count)s.kind_codes_id;
438 END
439 '''))
441 cursor.execute(self._sql(
442 '''
443 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
444 BEFORE DELETE ON %(nuts)s FOR EACH ROW
445 BEGIN
446 UPDATE %(kind_codes_count)s
447 SET count = count - 1
448 WHERE old.kind_codes_id
449 == %(kind_codes_count)s.kind_codes_id;
450 END
451 '''))
453 cursor.execute(self._register_table(self._sql(
454 '''
455 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
456 kind_codes_id integer,
457 time_seconds integer,
458 time_offset integer,
459 step integer)
460 ''')))
462 cursor.execute(self._sql(
463 '''
464 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
465 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
466 '''))
468 cursor.execute(self._sql(
469 '''
470 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
471 AFTER INSERT ON %(nuts)s FOR EACH ROW
472 BEGIN
473 INSERT OR IGNORE INTO %(coverage)s VALUES
474 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
475 ;
476 UPDATE %(coverage)s
477 SET step = step + 1
478 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
479 AND new.tmin_seconds == %(coverage)s.time_seconds
480 AND new.tmin_offset == %(coverage)s.time_offset
481 ;
482 INSERT OR IGNORE INTO %(coverage)s VALUES
483 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
484 ;
485 UPDATE %(coverage)s
486 SET step = step - 1
487 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
488 AND new.tmax_seconds == %(coverage)s.time_seconds
489 AND new.tmax_offset == %(coverage)s.time_offset
490 ;
491 DELETE FROM %(coverage)s
492 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
493 AND new.tmin_seconds == %(coverage)s.time_seconds
494 AND new.tmin_offset == %(coverage)s.time_offset
495 AND step == 0
496 ;
497 DELETE FROM %(coverage)s
498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
499 AND new.tmax_seconds == %(coverage)s.time_seconds
500 AND new.tmax_offset == %(coverage)s.time_offset
501 AND step == 0
502 ;
503 END
504 '''))
506 cursor.execute(self._sql(
507 '''
508 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
509 BEFORE DELETE ON %(nuts)s FOR EACH ROW
510 BEGIN
511 INSERT OR IGNORE INTO %(coverage)s VALUES
512 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
513 ;
514 UPDATE %(coverage)s
515 SET step = step - 1
516 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
517 AND old.tmin_seconds == %(coverage)s.time_seconds
518 AND old.tmin_offset == %(coverage)s.time_offset
519 ;
520 INSERT OR IGNORE INTO %(coverage)s VALUES
521 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
522 ;
523 UPDATE %(coverage)s
524 SET step = step + 1
525 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
526 AND old.tmax_seconds == %(coverage)s.time_seconds
527 AND old.tmax_offset == %(coverage)s.time_offset
528 ;
529 DELETE FROM %(coverage)s
530 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
531 AND old.tmin_seconds == %(coverage)s.time_seconds
532 AND old.tmin_offset == %(coverage)s.time_offset
533 AND step == 0
534 ;
535 DELETE FROM %(coverage)s
536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
537 AND old.tmax_seconds == %(coverage)s.time_seconds
538 AND old.tmax_offset == %(coverage)s.time_offset
539 AND step == 0
540 ;
541 END
542 '''))
544 def _delete(self):
545 '''Delete database tables associated with this Squirrel.'''
547 with self.transaction('delete tables') as cursor:
548 for s in '''
549 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
550 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
551 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
552 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
553 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
554 DROP TABLE %(db)s.%(nuts)s;
555 DROP TABLE %(db)s.%(kind_codes_count)s;
556 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
557 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
558 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
559 '''.strip().splitlines():
561 cursor.execute(self._sql(s))
563 Selection._delete(self)
565 @filldocs
566 def add(self,
567 paths,
568 kinds=None,
569 format='detect',
570 include=None,
571 exclude=None,
572 check=True):
574 '''
575 Add files to the selection.
577 :param paths:
578 Iterator yielding paths to files or directories to be added to the
579 selection. Recurses into directories. If given a ``str``, it
580 is treated as a single path to be added.
581 :type paths:
582 :py:class:`list` of :py:class:`str`
584 :param kinds:
585 Content types to be made available through the Squirrel selection.
586 By default, all known content types are accepted.
587 :type kinds:
588 :py:class:`list` of :py:class:`str`
590 :param format:
591 File format identifier or ``'detect'`` to enable auto-detection
592 (available: %(file_formats)s).
593 :type format:
594 str
596 :param include:
597 If not ``None``, files are only included if their paths match the
598 given regular expression pattern.
599 :type format:
600 str
602 :param exclude:
603 If not ``None``, files are only included if their paths do not
604 match the given regular expression pattern.
605 :type format:
606 str
608 :param check:
609 If ``True``, all file modification times are checked to see if
610 cached information has to be updated (slow). If ``False``, only
611 previously unknown files are indexed and cached information is used
612 for known files, regardless of file state (fast, corrresponds to
613 Squirrel's ``--optimistic`` mode). File deletions will go
614 undetected in the latter case.
615 :type check:
616 bool
618 :Complexity:
619 O(log N)
620 '''
622 if isinstance(kinds, str):
623 kinds = (kinds,)
625 if isinstance(paths, str):
626 paths = [paths]
628 kind_mask = model.to_kind_mask(kinds)
630 Selection.add(
631 self, util.iter_select_files(
632 paths,
633 show_progress=False,
634 include=include,
635 exclude=exclude,
636 pass_through=lambda path: path.startswith('virtual:')
637 ), kind_mask, format)
639 self._load(check)
640 self._update_nuts()
642 def reload(self):
643 '''
644 Check for modifications and reindex modified files.
646 Based on file modification times.
647 '''
649 self._set_file_states_force_check()
650 self._load(check=True)
651 self._update_nuts()
653 def add_virtual(self, nuts, virtual_paths=None):
654 '''
655 Add content which is not backed by files.
657 :param nuts:
658 Content pieces to be added.
659 :type nuts:
660 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
662 :param virtual_paths:
663 List of virtual paths to prevent creating a temporary list of the
664 nuts while aggregating the file paths for the selection.
665 :type virtual_paths:
666 :py:class:`list` of :py:class:`str`
668 Stores to the main database and the selection.
669 '''
671 if isinstance(virtual_paths, str):
672 virtual_paths = [virtual_paths]
674 if virtual_paths is None:
675 if not isinstance(nuts, list):
676 nuts = list(nuts)
677 virtual_paths = set(nut.file_path for nut in nuts)
679 Selection.add(self, virtual_paths)
680 self.get_database().dig(nuts)
681 self._update_nuts()
683 def add_volatile(self, nuts):
684 if not isinstance(nuts, list):
685 nuts = list(nuts)
687 paths = list(set(nut.file_path for nut in nuts))
688 io.backends.virtual.add_nuts(nuts)
689 self.add_virtual(nuts, paths)
690 self._volatile_paths.extend(paths)
692 def add_volatile_waveforms(self, traces):
693 '''
694 Add in-memory waveforms which will be removed when the app closes.
695 '''
697 name = model.random_name()
699 path = 'virtual:volatile:%s' % name
701 nuts = []
702 for itr, tr in enumerate(traces):
703 assert tr.tmin <= tr.tmax
704 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
705 tmax_seconds, tmax_offset = model.tsplit(
706 tr.tmin + tr.data_len()*tr.deltat)
708 nuts.append(model.Nut(
709 file_path=path,
710 file_format='virtual',
711 file_segment=itr,
712 file_element=0,
713 file_mtime=0,
714 codes=tr.codes,
715 tmin_seconds=tmin_seconds,
716 tmin_offset=tmin_offset,
717 tmax_seconds=tmax_seconds,
718 tmax_offset=tmax_offset,
719 deltat=tr.deltat,
720 kind_id=to_kind_id('waveform'),
721 content=tr))
723 self.add_volatile(nuts)
724 return path
726 def _load(self, check):
727 for _ in io.iload(
728 self,
729 content=[],
730 skip_unchanged=True,
731 check=check):
732 pass
734 def _update_nuts(self, transaction=None):
735 transaction = transaction or self.transaction('update nuts')
736 with make_task('Aggregating selection') as task, \
737 transaction as cursor:
739 self._conn.set_progress_handler(task.update, 100000)
740 nrows = cursor.execute(self._sql(
741 '''
742 INSERT INTO %(db)s.%(nuts)s
743 SELECT NULL,
744 nuts.file_id, nuts.file_segment, nuts.file_element,
745 nuts.kind_id, nuts.kind_codes_id,
746 nuts.tmin_seconds, nuts.tmin_offset,
747 nuts.tmax_seconds, nuts.tmax_offset,
748 nuts.kscale
749 FROM %(db)s.%(file_states)s
750 INNER JOIN nuts
751 ON %(db)s.%(file_states)s.file_id == nuts.file_id
752 INNER JOIN kind_codes
753 ON nuts.kind_codes_id ==
754 kind_codes.kind_codes_id
755 WHERE %(db)s.%(file_states)s.file_state != 2
756 AND (((1 << kind_codes.kind_id)
757 & %(db)s.%(file_states)s.kind_mask) != 0)
758 ''')).rowcount
760 task.update(nrows)
761 self._set_file_states_known(transaction)
762 self._conn.set_progress_handler(None, 0)
764 def add_source(self, source, check=True):
765 '''
766 Add remote resource.
768 :param source:
769 Remote data access client instance.
770 :type source:
771 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
772 '''
774 self._sources.append(source)
775 source.setup(self, check=check)
777 def add_fdsn(self, *args, **kwargs):
778 '''
779 Add FDSN site for transparent remote data access.
781 Arguments are passed to
782 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
783 '''
785 self.add_source(fdsn.FDSNSource(*args, **kwargs))
787 def add_catalog(self, *args, **kwargs):
788 '''
789 Add online catalog for transparent event data access.
791 Arguments are passed to
792 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
793 '''
795 self.add_source(catalog.CatalogSource(*args, **kwargs))
797 def add_dataset(self, ds, check=True):
798 '''
799 Read dataset description from file and add its contents.
801 :param ds:
802 Path to dataset description file, dataset description object
803 or name of a built-in dataset. See
804 :py:mod:`~pyrocko.squirrel.dataset`.
805 :type ds:
806 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
808 :param check:
809 If ``True``, all file modification times are checked to see if
810 cached information has to be updated (slow). If ``False``, only
811 previously unknown files are indexed and cached information is used
812 for known files, regardless of file state (fast, corrresponds to
813 Squirrel's ``--optimistic`` mode). File deletions will go
814 undetected in the latter case.
815 :type check:
816 bool
817 '''
818 if isinstance(ds, str):
819 ds = dataset.read_dataset(ds)
821 ds.setup(self, check=check)
823 def _get_selection_args(
824 self, kind_id,
825 obj=None, tmin=None, tmax=None, time=None, codes=None):
827 if codes is not None:
828 codes = codes_patterns_for_kind(kind_id, codes)
830 if time is not None:
831 tmin = time
832 tmax = time
834 if obj is not None:
835 tmin = tmin if tmin is not None else obj.tmin
836 tmax = tmax if tmax is not None else obj.tmax
837 codes = codes if codes is not None else codes_patterns_for_kind(
838 kind_id, obj.codes)
840 return tmin, tmax, codes
842 def _get_selection_args_str(self, *args, **kwargs):
844 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
845 return 'tmin: %s, tmax: %s, codes: %s' % (
846 util.time_to_str(tmin) if tmin is not None else 'none',
847 util.time_to_str(tmax) if tmax is not None else 'none',
848 ','.join(str(entry) for entry in codes))
850 def _selection_args_to_kwargs(
851 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
853 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
855 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
857 tmin_seconds, tmin_offset = model.tsplit(tmin)
858 tmax_seconds, tmax_offset = model.tsplit(tmax)
859 if naiv:
860 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
861 args.append(tmax_seconds)
862 else:
863 tscale_edges = model.tscale_edges
864 tmin_cond = []
865 for kscale in range(tscale_edges.size + 1):
866 if kscale != tscale_edges.size:
867 tscale = int(tscale_edges[kscale])
868 tmin_cond.append('''
869 (%(db)s.%(nuts)s.kind_id = ?
870 AND %(db)s.%(nuts)s.kscale == ?
871 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
872 ''')
873 args.extend(
874 (to_kind_id(kind), kscale,
875 tmin_seconds - tscale - 1, tmax_seconds + 1))
877 else:
878 tmin_cond.append('''
879 (%(db)s.%(nuts)s.kind_id == ?
880 AND %(db)s.%(nuts)s.kscale == ?
881 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
882 ''')
884 args.extend(
885 (to_kind_id(kind), kscale, tmax_seconds + 1))
886 if tmin_cond:
887 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
889 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
890 args.append(tmin_seconds)
892 def _codes_match_sql(self, positive, kind_id, codes, cond, args):
893 pats = codes_patterns_for_kind(kind_id, codes)
894 if pats is None:
895 return
897 pats_exact = []
898 pats_nonexact = []
899 for pat in pats:
900 spat = pat.safe_str
901 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
903 codes_cond = []
904 if pats_exact:
905 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
906 '?'*len(pats_exact)))
908 args.extend(pats_exact)
910 if pats_nonexact:
911 codes_cond.append(' ( %s ) ' % ' OR '.join(
912 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)))
914 args.extend(pats_nonexact)
916 if codes_cond:
917 cond.append('%s ( %s )' % (
918 'NOT' if not positive else '',
919 ' OR '.join(codes_cond)))
921 def iter_nuts(
922 self, kind=None, tmin=None, tmax=None, codes=None,
923 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
924 naiv=False, kind_codes_ids=None, path=None, limit=None):
926 '''
927 Iterate over content entities matching given constraints.
929 :param kind:
930 Content kind (or kinds) to extract.
931 :type kind:
932 :py:class:`str`, :py:class:`list` of :py:class:`str`
934 :param tmin:
935 Start time of query interval.
936 :type tmin:
937 :py:func:`~pyrocko.util.get_time_float`
939 :param tmax:
940 End time of query interval.
941 :type tmax:
942 :py:func:`~pyrocko.util.get_time_float`
944 :param codes:
945 List of code patterns to query.
946 :type codes:
947 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
948 objects appropriate for the queried content type, or anything which
949 can be converted to such objects.
951 :param naiv:
952 Bypass time span lookup through indices (slow, for testing).
953 :type naiv:
954 :py:class:`bool`
956 :param kind_codes_ids:
957 Kind-codes IDs of contents to be retrieved (internal use).
958 :type kind_codes_ids:
959 :py:class:`list` of :py:class:`int`
961 :yields:
962 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
963 intersecting content.
965 :complexity:
966 O(log N) for the time selection part due to heavy use of database
967 indices.
969 Query time span is treated as a half-open interval ``[tmin, tmax)``.
970 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
971 closed-interval so that content intersecting with the time instant ``t
972 = tmin = tmax`` is returned (otherwise nothing would be returned as
973 ``[t, t)`` never matches anything).
975 Time spans of content entities to be matched are also treated as half
976 open intervals, e.g. content span ``[0, 1)`` is matched by query span
977 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
978 modified to closed-interval when the content time span is an empty
979 interval, i.e. to indicate a time instant. E.g. time instant 0 is
980 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
981 '''
983 if not isinstance(kind, str):
984 if kind is None:
985 kind = model.g_content_kinds
986 for kind_ in kind:
987 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
988 yield nut
990 return
992 kind_id = to_kind_id(kind)
994 cond = []
995 args = []
996 if tmin is not None or tmax is not None:
997 assert kind is not None
998 if tmin is None:
999 tmin = self.get_time_span()[0]
1000 if tmax is None:
1001 tmax = self.get_time_span()[1] + 1.0
1003 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
1005 cond.append('kind_codes.kind_id == ?')
1006 args.append(kind_id)
1008 if codes is not None:
1009 self._codes_match_sql(True, kind_id, codes, cond, args)
1011 if codes_exclude is not None:
1012 self._codes_match_sql(False, kind_id, codes_exclude, cond, args)
1014 if sample_rate_min is not None:
1015 cond.append('kind_codes.deltat <= ?')
1016 args.append(1.0/sample_rate_min)
1018 if sample_rate_max is not None:
1019 cond.append('? <= kind_codes.deltat')
1020 args.append(1.0/sample_rate_max)
1022 if kind_codes_ids is not None:
1023 cond.append(
1024 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
1025 '?'*len(kind_codes_ids)))
1027 args.extend(kind_codes_ids)
1029 db = self.get_database()
1030 if path is not None:
1031 cond.append('files.path == ?')
1032 args.append(db.relpath(abspath(path)))
1034 sql = ('''
1035 SELECT
1036 files.path,
1037 files.format,
1038 files.mtime,
1039 files.size,
1040 %(db)s.%(nuts)s.file_segment,
1041 %(db)s.%(nuts)s.file_element,
1042 kind_codes.kind_id,
1043 kind_codes.codes,
1044 %(db)s.%(nuts)s.tmin_seconds,
1045 %(db)s.%(nuts)s.tmin_offset,
1046 %(db)s.%(nuts)s.tmax_seconds,
1047 %(db)s.%(nuts)s.tmax_offset,
1048 kind_codes.deltat
1049 FROM files
1050 INNER JOIN %(db)s.%(nuts)s
1051 ON files.file_id == %(db)s.%(nuts)s.file_id
1052 INNER JOIN kind_codes
1053 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1054 ''')
1056 if cond:
1057 sql += ''' WHERE ''' + ' AND '.join(cond)
1059 if limit is not None:
1060 sql += ''' LIMIT %i''' % limit
1062 sql = self._sql(sql)
1063 if tmin is None and tmax is None:
1064 for row in self._conn.execute(sql, args):
1065 row = (db.abspath(row[0]),) + row[1:]
1066 nut = model.Nut(values_nocheck=row)
1067 yield nut
1068 else:
1069 assert tmin is not None and tmax is not None
1070 if tmin == tmax:
1071 for row in self._conn.execute(sql, args):
1072 row = (db.abspath(row[0]),) + row[1:]
1073 nut = model.Nut(values_nocheck=row)
1074 if (nut.tmin <= tmin < nut.tmax) \
1075 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1077 yield nut
1078 else:
1079 for row in self._conn.execute(sql, args):
1080 row = (db.abspath(row[0]),) + row[1:]
1081 nut = model.Nut(values_nocheck=row)
1082 if (tmin < nut.tmax and nut.tmin < tmax) \
1083 or (nut.tmin == nut.tmax
1084 and tmin <= nut.tmin < tmax):
1086 yield nut
1088 def get_nuts(self, *args, **kwargs):
1089 '''
1090 Get content entities matching given constraints.
1092 Like :py:meth:`iter_nuts` but returns results as a list.
1093 '''
1095 return list(self.iter_nuts(*args, **kwargs))
1097 def _split_nuts(
1098 self, kind, tmin=None, tmax=None, codes=None, path=None):
1100 kind_id = to_kind_id(kind)
1101 tmin_seconds, tmin_offset = model.tsplit(tmin)
1102 tmax_seconds, tmax_offset = model.tsplit(tmax)
1104 names_main_nuts = dict(self._names)
1105 names_main_nuts.update(db='main', nuts='nuts')
1107 db = self.get_database()
1109 def main_nuts(s):
1110 return s % names_main_nuts
1112 with self.transaction('split nuts') as cursor:
1113 # modify selection and main
1114 for sql_subst in [
1115 self._sql, main_nuts]:
1117 cond = []
1118 args = []
1120 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1122 if codes is not None:
1123 self._codes_match_sql(True, kind_id, codes, cond, args)
1125 if path is not None:
1126 cond.append('files.path == ?')
1127 args.append(db.relpath(abspath(path)))
1129 sql = sql_subst('''
1130 SELECT
1131 %(db)s.%(nuts)s.nut_id,
1132 %(db)s.%(nuts)s.tmin_seconds,
1133 %(db)s.%(nuts)s.tmin_offset,
1134 %(db)s.%(nuts)s.tmax_seconds,
1135 %(db)s.%(nuts)s.tmax_offset,
1136 kind_codes.deltat
1137 FROM files
1138 INNER JOIN %(db)s.%(nuts)s
1139 ON files.file_id == %(db)s.%(nuts)s.file_id
1140 INNER JOIN kind_codes
1141 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1142 WHERE ''' + ' AND '.join(cond)) # noqa
1144 insert = []
1145 delete = []
1146 for row in cursor.execute(sql, args):
1147 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1148 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1150 nut_tmin = model.tjoin(
1151 nut_tmin_seconds, nut_tmin_offset)
1152 nut_tmax = model.tjoin(
1153 nut_tmax_seconds, nut_tmax_offset)
1155 if nut_tmin < tmax and tmin < nut_tmax:
1156 if nut_tmin < tmin:
1157 insert.append((
1158 nut_tmin_seconds, nut_tmin_offset,
1159 tmin_seconds, tmin_offset,
1160 model.tscale_to_kscale(
1161 tmin_seconds - nut_tmin_seconds),
1162 nut_id))
1164 if tmax < nut_tmax:
1165 insert.append((
1166 tmax_seconds, tmax_offset,
1167 nut_tmax_seconds, nut_tmax_offset,
1168 model.tscale_to_kscale(
1169 nut_tmax_seconds - tmax_seconds),
1170 nut_id))
1172 delete.append((nut_id,))
1174 sql_add = '''
1175 INSERT INTO %(db)s.%(nuts)s (
1176 file_id, file_segment, file_element, kind_id,
1177 kind_codes_id, tmin_seconds, tmin_offset,
1178 tmax_seconds, tmax_offset, kscale )
1179 SELECT
1180 file_id, file_segment, file_element,
1181 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1182 FROM %(db)s.%(nuts)s
1183 WHERE nut_id == ?
1184 '''
1185 cursor.executemany(sql_subst(sql_add), insert)
1187 sql_delete = '''
1188 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1189 '''
1190 cursor.executemany(sql_subst(sql_delete), delete)
1192 def get_time_span(self, kinds=None, tight=True, dummy_limits=True):
1193 '''
1194 Get time interval over all content in selection.
1196 :param kinds:
1197 If not ``None``, restrict query to given content kinds.
1198 :type kind:
1199 list of str
1201 :complexity:
1202 O(1), independent of the number of nuts.
1204 :returns:
1205 ``(tmin, tmax)``, combined time interval of queried content kinds.
1206 '''
1208 sql_min = self._sql('''
1209 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1210 FROM %(db)s.%(nuts)s
1211 WHERE kind_id == ?
1212 AND tmin_seconds == (
1213 SELECT MIN(tmin_seconds)
1214 FROM %(db)s.%(nuts)s
1215 WHERE kind_id == ?)
1216 ''')
1218 sql_max = self._sql('''
1219 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1220 FROM %(db)s.%(nuts)s
1221 WHERE kind_id == ?
1222 AND tmax_seconds == (
1223 SELECT MAX(tmax_seconds)
1224 FROM %(db)s.%(nuts)s
1225 WHERE kind_id == ?)
1226 ''')
1228 gtmin = None
1229 gtmax = None
1231 if isinstance(kinds, str):
1232 kinds = [kinds]
1234 if kinds is None:
1235 kind_ids = model.g_content_kind_ids
1236 else:
1237 kind_ids = model.to_kind_ids(kinds)
1239 tmins = []
1240 tmaxs = []
1241 for kind_id in kind_ids:
1242 for tmin_seconds, tmin_offset in self._conn.execute(
1243 sql_min, (kind_id, kind_id)):
1244 tmins.append(model.tjoin(tmin_seconds, tmin_offset))
1246 for (tmax_seconds, tmax_offset) in self._conn.execute(
1247 sql_max, (kind_id, kind_id)):
1248 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset))
1250 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins]
1251 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs]
1253 if tight:
1254 gtmin = nonef(min, tmins)
1255 gtmax = nonef(max, tmaxs)
1256 else:
1257 gtmin = None if None in tmins else nonef(min, tmins)
1258 gtmax = None if None in tmaxs else nonef(max, tmaxs)
1260 if dummy_limits:
1261 if gtmin is None:
1262 gtmin = model.g_tmin
1263 if gtmax is None:
1264 gtmax = model.g_tmax
1266 return gtmin, gtmax
1268 def has(self, kinds):
1269 '''
1270 Check availability of given content kinds.
1272 :param kinds:
1273 Content kinds to query.
1274 :type kind:
1275 list of str
1277 :returns:
1278 ``True`` if any of the queried content kinds is available
1279 in the selection.
1280 '''
1281 self_tmin, self_tmax = self.get_time_span(
1282 kinds, dummy_limits=False)
1284 return None not in (self_tmin, self_tmax)
1286 def get_deltat_span(self, kind):
1287 '''
1288 Get min and max sampling interval of all content of given kind.
1290 :param kind:
1291 Content kind
1292 :type kind:
1293 str
1295 :returns: ``(deltat_min, deltat_max)``
1296 '''
1298 deltats = [
1299 deltat for deltat in self.get_deltats(kind)
1300 if deltat is not None]
1302 if deltats:
1303 return min(deltats), max(deltats)
1304 else:
1305 return None, None
1307 def iter_kinds(self, codes=None):
1308 '''
1309 Iterate over content types available in selection.
1311 :param codes:
1312 If given, get kinds only for selected codes identifier.
1313 Only a single identifier may be given here and no pattern matching
1314 is done, currently.
1315 :type codes:
1316 :py:class:`~pyrocko.squirrel.model.Codes`
1318 :yields:
1319 Available content kinds as :py:class:`str`.
1321 :complexity:
1322 O(1), independent of number of nuts.
1323 '''
1325 return self._database._iter_kinds(
1326 codes=codes,
1327 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1329 def iter_deltats(self, kind=None):
1330 '''
1331 Iterate over sampling intervals available in selection.
1333 :param kind:
1334 If given, get sampling intervals only for a given content type.
1335 :type kind:
1336 str
1338 :yields:
1339 :py:class:`float` values.
1341 :complexity:
1342 O(1), independent of number of nuts.
1343 '''
1344 return self._database._iter_deltats(
1345 kind=kind,
1346 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1348 def iter_codes(self, kind=None):
1349 '''
1350 Iterate over content identifier code sequences available in selection.
1352 :param kind:
1353 If given, get codes only for a given content type.
1354 :type kind:
1355 str
1357 :yields:
1358 :py:class:`tuple` of :py:class:`str`
1360 :complexity:
1361 O(1), independent of number of nuts.
1362 '''
1363 return self._database._iter_codes(
1364 kind=kind,
1365 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1367 def _iter_codes_info(self, kind=None, codes=None):
1368 '''
1369 Iterate over number of occurrences of any (kind, codes) combination.
1371 :param kind:
1372 If given, get counts only for selected content type.
1373 :type kind:
1374 str
1376 :yields:
1377 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1379 :complexity:
1380 O(1), independent of number of nuts.
1381 '''
1382 return self._database._iter_codes_info(
1383 kind=kind,
1384 codes=codes,
1385 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1387 def get_kinds(self, codes=None):
1388 '''
1389 Get content types available in selection.
1391 :param codes:
1392 If given, get kinds only for selected codes identifier.
1393 Only a single identifier may be given here and no pattern matching
1394 is done, currently.
1395 :type codes:
1396 :py:class:`~pyrocko.squirrel.model.Codes`
1398 :returns:
1399 Sorted list of available content types.
1400 :rtype:
1401 py:class:`list` of :py:class:`str`
1403 :complexity:
1404 O(1), independent of number of nuts.
1406 '''
1407 return sorted(list(self.iter_kinds(codes=codes)))
1409 def get_deltats(self, kind=None):
1410 '''
1411 Get sampling intervals available in selection.
1413 :param kind:
1414 If given, get sampling intervals only for selected content type.
1415 :type kind:
1416 str
1418 :complexity:
1419 O(1), independent of number of nuts.
1421 :returns: Sorted list of available sampling intervals.
1422 '''
1423 return sorted(list(self.iter_deltats(kind=kind)))
1425 def get_codes(self, kind=None):
1426 '''
1427 Get identifier code sequences available in selection.
1429 :param kind:
1430 If given, get codes only for selected content type.
1431 :type kind:
1432 str
1434 :complexity:
1435 O(1), independent of number of nuts.
1437 :returns: Sorted list of available codes as tuples of strings.
1438 '''
1439 return sorted(list(self.iter_codes(kind=kind)))
1441 def get_counts(self, kind=None):
1442 '''
1443 Get number of occurrences of any (kind, codes) combination.
1445 :param kind:
1446 If given, get codes only for selected content type.
1447 :type kind:
1448 str
1450 :complexity:
1451 O(1), independent of number of nuts.
1453 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1454 if kind is not ``None``
1455 '''
1456 d = {}
1457 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1458 if kind_id not in d:
1459 v = d[kind_id] = {}
1460 else:
1461 v = d[kind_id]
1463 if codes not in v:
1464 v[codes] = 0
1466 v[codes] += count
1468 if kind is not None:
1469 return d[to_kind_id(kind)]
1470 else:
1471 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1473 def glob_codes(self, kind, codes):
1474 '''
1475 Find codes matching given patterns.
1477 :param kind:
1478 Content kind to be queried.
1479 :type kind:
1480 str
1482 :param codes:
1483 List of code patterns to query.
1484 :type codes:
1485 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1486 objects appropriate for the queried content type, or anything which
1487 can be converted to such objects.
1489 :returns:
1490 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1491 '''
1493 kind_id = to_kind_id(kind)
1494 args = [kind_id]
1495 pats = codes_patterns_for_kind(kind_id, codes)
1497 if pats:
1498 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1499 ('kind_codes.codes GLOB ?',) * len(pats))
1501 args.extend(pat.safe_str for pat in pats)
1502 else:
1503 codes_cond = ''
1505 sql = self._sql('''
1506 SELECT kind_codes_id, codes, deltat FROM kind_codes
1507 WHERE
1508 kind_id == ? ''' + codes_cond)
1510 return list(map(list, self._conn.execute(sql, args)))
1512 def update(self, constraint=None, **kwargs):
1513 '''
1514 Update or partially update channel and event inventories.
1516 :param constraint:
1517 Selection of times or areas to be brought up to date.
1518 :type constraint:
1519 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1521 :param \\*\\*kwargs:
1522 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1524 This function triggers all attached remote sources, to check for
1525 updates in the meta-data. The sources will only submit queries when
1526 their expiration date has passed, or if the selection spans into
1527 previously unseen times or areas.
1528 '''
1530 if constraint is None:
1531 constraint = client.Constraint(**kwargs)
1533 task = make_task('Updating sources')
1534 for source in task(self._sources):
1535 source.update_channel_inventory(self, constraint)
1536 source.update_event_inventory(self, constraint)
1538 def update_waveform_promises(self, constraint=None, **kwargs):
1539 '''
1540 Permit downloading of remote waveforms.
1542 :param constraint:
1543 Remote waveforms compatible with the given constraint are enabled
1544 for download.
1545 :type constraint:
1546 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1548 :param \\*\\*kwargs:
1549 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1551 Calling this method permits Squirrel to download waveforms from remote
1552 sources when processing subsequent waveform requests. This works by
1553 inserting so called waveform promises into the database. It will look
1554 into the available channels for each remote source and create a promise
1555 for each channel compatible with the given constraint. If the promise
1556 then matches in a waveform request, Squirrel tries to download the
1557 waveform. If the download is successful, the downloaded waveform is
1558 added to the Squirrel and the promise is deleted. If the download
1559 fails, the promise is kept if the reason of failure looks like being
1560 temporary, e.g. because of a network failure. If the cause of failure
1561 however seems to be permanent, the promise is deleted so that no
1562 further attempts are made to download a waveform which might not be
1563 available from that server at all. To force re-scheduling after a
1564 permanent failure, call :py:meth:`update_waveform_promises`
1565 yet another time.
1566 '''
1568 if constraint is None:
1569 constraint = client.Constraint(**kwargs)
1571 for source in self._sources:
1572 source.update_waveform_promises(self, constraint)
1574 def remove_waveform_promises(self, from_database='selection'):
1575 '''
1576 Remove waveform promises from live selection or global database.
1578 Calling this function removes all waveform promises provided by the
1579 attached sources.
1581 :param from_database:
1582 Remove from live selection ``'selection'`` or global database
1583 ``'global'``.
1584 '''
1585 for source in self._sources:
1586 source.remove_waveform_promises(self, from_database=from_database)
1588 def update_responses(self, constraint=None, **kwargs):
1589 if constraint is None:
1590 constraint = client.Constraint(**kwargs)
1592 for source in self._sources:
1593 source.update_response_inventory(self, constraint)
1595 def get_nfiles(self):
1596 '''
1597 Get number of files in selection.
1598 '''
1600 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1601 for row in self._conn.execute(sql):
1602 return row[0]
1604 def get_nnuts(self):
1605 '''
1606 Get number of nuts in selection.
1607 '''
1609 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1610 for row in self._conn.execute(sql):
1611 return row[0]
1613 def get_total_size(self):
1614 '''
1615 Get aggregated file size available in selection.
1616 '''
1618 sql = self._sql('''
1619 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1620 INNER JOIN files
1621 ON %(db)s.%(file_states)s.file_id = files.file_id
1622 ''')
1624 for row in self._conn.execute(sql):
1625 return row[0] or 0
1627 def get_stats(self):
1628 '''
1629 Get statistics on contents available through this selection.
1630 '''
1632 kinds = self.get_kinds()
1633 time_spans = {}
1634 for kind in kinds:
1635 time_spans[kind] = self.get_time_span([kind])
1637 return SquirrelStats(
1638 nfiles=self.get_nfiles(),
1639 nnuts=self.get_nnuts(),
1640 kinds=kinds,
1641 codes=self.get_codes(),
1642 total_size=self.get_total_size(),
1643 counts=self.get_counts(),
1644 time_spans=time_spans,
1645 sources=[s.describe() for s in self._sources],
1646 operators=[op.describe() for op in self._operators])
1648 @filldocs
1649 def check(
1650 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1651 ignore=[]):
1652 '''
1653 Check for common data/metadata problems.
1655 %(query_args)s
1657 :param ignore:
1658 Problem types to be ignored.
1659 :type ignore:
1660 :class:`list` of :class:`str`
1661 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)
1663 :returns:
1664 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object
1665 containing the results of the check.
1667 See :py:func:`~pyrocko.squirrel.check.do_check`.
1668 '''
1670 from .check import do_check
1671 tmin, tmax, codes = self._get_selection_args(
1672 CHANNEL, obj, tmin, tmax, time, codes)
1674 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)
1676 def get_content(
1677 self,
1678 nut,
1679 cache_id='default',
1680 accessor_id='default',
1681 show_progress=False,
1682 model='squirrel'):
1684 '''
1685 Get and possibly load full content for a given index entry from file.
1687 Loads the actual content objects (channel, station, waveform, ...) from
1688 file. For efficiency, sibling content (all stuff in the same file
1689 segment) will also be loaded as a side effect. The loaded contents are
1690 cached in the Squirrel object.
1691 '''
1693 content_cache = self._content_caches[cache_id]
1694 if not content_cache.has(nut):
1696 for nut_loaded in io.iload(
1697 nut.file_path,
1698 segment=nut.file_segment,
1699 format=nut.file_format,
1700 database=self._database,
1701 update_selection=self,
1702 show_progress=show_progress):
1704 content_cache.put(nut_loaded)
1706 try:
1707 return content_cache.get(nut, accessor_id, model)
1709 except KeyError:
1710 raise error.NotAvailable(
1711 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1713 def advance_accessor(self, accessor_id='default', cache_id=None):
1714 '''
1715 Notify memory caches about consumer moving to a new data batch.
1717 :param accessor_id:
1718 Name of accessing consumer to be advanced.
1719 :type accessor_id:
1720 str
1722 :param cache_id:
1723 Name of cache to for which the accessor should be advanced. By
1724 default the named accessor is advanced in all registered caches.
1725 By default, two caches named ``'default'`` and ``'waveform'`` are
1726 available.
1727 :type cache_id:
1728 str
1730 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1731 Squirrel's memory caching works and can be tuned. Default behaviour is
1732 to release data when it has not been used in the latest data
1733 window/batch. If the accessor is never advanced, data is cached
1734 indefinitely - which is often desired e.g. for station meta-data.
1735 Methods for consecutive data traversal, like
1736 :py:meth:`chopper_waveforms` automatically advance and clear
1737 their accessor.
1738 '''
1739 for cache_ in (
1740 self._content_caches.keys()
1741 if cache_id is None
1742 else [cache_id]):
1744 self._content_caches[cache_].advance_accessor(accessor_id)
1746 def clear_accessor(self, accessor_id, cache_id=None):
1747 '''
1748 Notify memory caches about a consumer having finished.
1750 :param accessor_id:
1751 Name of accessor to be cleared.
1752 :type accessor_id:
1753 str
1755 :param cache_id:
1756 Name of cache for which the accessor should be cleared. By default
1757 the named accessor is cleared from all registered caches. By
1758 default, two caches named ``'default'`` and ``'waveform'`` are
1759 available.
1760 :type cache_id:
1761 str
1763 Calling this method clears all references to cache entries held by the
1764 named accessor. Cache entries are then freed if not referenced by any
1765 other accessor.
1766 '''
1768 for cache_ in (
1769 self._content_caches.keys()
1770 if cache_id is None
1771 else [cache_id]):
1773 self._content_caches[cache_].clear_accessor(accessor_id)
1775 def get_cache_stats(self, cache_id):
1776 return self._content_caches[cache_id].get_stats()
1778 @filldocs
1779 def get_stations(
1780 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1781 model='squirrel', on_error='raise'):
1783 '''
1784 Get stations matching given constraints.
1786 %(query_args)s
1788 :param model:
1789 Select object model for returned values: ``'squirrel'`` to get
1790 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1791 objects with channel information attached.
1792 :type model:
1793 str
1795 :returns:
1796 List of :py:class:`pyrocko.squirrel.Station
1797 <pyrocko.squirrel.model.Station>` objects by default or list of
1798 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1799 objects if ``model='pyrocko'`` is requested.
1801 See :py:meth:`iter_nuts` for details on time span matching.
1802 '''
1804 if model == 'pyrocko':
1805 return self._get_pyrocko_stations(
1806 obj, tmin, tmax, time, codes, on_error=on_error)
1807 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1808 args = self._get_selection_args(
1809 STATION, obj, tmin, tmax, time, codes)
1811 nuts = sorted(
1812 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1814 return [self.get_content(nut, model=model) for nut in nuts]
1815 else:
1816 raise ValueError('Invalid station model: %s' % model)
1818 @filldocs
1819 def get_channels(
1820 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1821 model='squirrel'):
1823 '''
1824 Get channels matching given constraints.
1826 %(query_args)s
1828 :returns:
1829 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1831 See :py:meth:`iter_nuts` for details on time span matching.
1832 '''
1834 args = self._get_selection_args(
1835 CHANNEL, obj, tmin, tmax, time, codes)
1837 nuts = sorted(
1838 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1840 return [self.get_content(nut, model=model) for nut in nuts]
1842 @filldocs
1843 def get_sensors(
1844 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1846 '''
1847 Get sensors matching given constraints.
1849 %(query_args)s
1851 :returns:
1852 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1854 See :py:meth:`iter_nuts` for details on time span matching.
1855 '''
1857 tmin, tmax, codes = self._get_selection_args(
1858 CHANNEL, obj, tmin, tmax, time, codes)
1860 if codes is not None:
1861 codes = codes_patterns_list(
1862 (entry.replace(channel=entry.channel[:-1] + '?')
1863 if entry.channel != '*' else entry)
1864 for entry in codes)
1866 nuts = sorted(
1867 self.iter_nuts(
1868 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1870 return [
1871 sensor for sensor in model.Sensor.from_channels(
1872 self.get_content(nut) for nut in nuts)
1873 if match_time_span(tmin, tmax, sensor)]
1875 @filldocs
1876 def get_responses(
1877 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1878 model='squirrel'):
1880 '''
1881 Get instrument responses matching given constraints.
1883 %(query_args)s
1885 :param model:
1886 Select data model for returned objects. Choices: ``'squirrel'``,
1887 ``'stationxml'``, ``'stationxml+'``. See return value description.
1888 :type model:
1889 str
1891 :returns:
1892 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model ==
1893 'squirrel'`` or list of
1894 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`
1895 if ``model == 'stationxml'`` or list of
1896 (:py:class:`~pyrocko.squirrel.model.Response`,
1897 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1898 'stationxml+'``.
1900 See :py:meth:`iter_nuts` for details on time span matching.
1901 '''
1903 args = self._get_selection_args(
1904 RESPONSE, obj, tmin, tmax, time, codes)
1906 nuts = sorted(
1907 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1909 return [self.get_content(nut, model=model) for nut in nuts]
1911 @filldocs
1912 def get_response(
1913 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1914 model='squirrel', on_duplicate='raise'):
1916 '''
1917 Get instrument response matching given constraints.
1919 %(query_args)s
1921 :param model:
1922 Select data model for returned object. Choices: ``'squirrel'``,
1923 ``'stationxml'``, ``'stationxml+'``. See return value description.
1924 :type model:
1925 str
1927 :param on_duplicate:
1928 Determines how duplicates/multiple matching responses are handled.
1929 Choices: ``'raise'`` - raise
1930 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a
1931 warning and return first match, ``'ignore'`` - silently return
1932 first match.
1933 :type on_duplicate:
1934 str
1936 :returns:
1937 :py:class:`~pyrocko.squirrel.model.Response` if
1938 ``model == 'squirrel'`` or
1939 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model ==
1940 'stationxml'`` or
1941 (:py:class:`~pyrocko.squirrel.model.Response`,
1942 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1943 'stationxml+'``.
1945 Same as :py:meth:`get_responses` but returning exactly one response.
1946 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is
1947 available. Duplicates are handled according to the ``on_duplicate``
1948 argument.
1950 See :py:meth:`iter_nuts` for details on time span matching.
1951 '''
1953 if model == 'stationxml':
1954 model_ = 'stationxml+'
1955 else:
1956 model_ = model
1958 responses = self.get_responses(
1959 obj, tmin, tmax, time, codes, model=model_)
1960 if len(responses) == 0:
1961 raise error.NotAvailable(
1962 'No instrument response available (%s).'
1963 % self._get_selection_args_str(
1964 RESPONSE, obj, tmin, tmax, time, codes))
1966 elif len(responses) > 1:
1968 if on_duplicate in ('raise', 'warn'):
1969 if model_ == 'squirrel':
1970 resps_sq = responses
1971 elif model_ == 'stationxml+':
1972 resps_sq = [resp[0] for resp in responses]
1973 else:
1974 raise ValueError('Invalid response model: %s' % model)
1976 rinfo = ':\n' + '\n'.join(
1977 ' ' + resp.summary for resp in resps_sq)
1979 message = \
1980 'Multiple instrument responses matching given ' \
1981 'constraints (%s)%s%s' % (
1982 self._get_selection_args_str(
1983 RESPONSE, obj, tmin, tmax, time, codes),
1984 ' -> using first' if on_duplicate == 'warn' else '',
1985 rinfo)
1987 if on_duplicate == 'raise':
1988 raise error.Duplicate(message)
1990 elif on_duplicate == 'warn':
1991 logger.warning(message)
1993 elif on_duplicate == 'ignore':
1994 pass
1996 else:
1997 ValueError(
1998 'Invalid argument for on_duplicate: %s' % on_duplicate)
2000 if model == 'stationxml':
2001 return responses[0][1]
2002 else:
2003 return responses[0]
2005 @filldocs
2006 def get_events(
2007 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2009 '''
2010 Get events matching given constraints.
2012 %(query_args)s
2014 :returns:
2015 List of :py:class:`~pyrocko.model.event.Event` objects.
2017 See :py:meth:`iter_nuts` for details on time span matching.
2018 '''
2020 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
2021 nuts = sorted(
2022 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
2024 return [self.get_content(nut) for nut in nuts]
2026 def _redeem_promises(self, *args, order_only=False):
2028 def split_promise(order, tmax=None):
2029 self._split_nuts(
2030 'waveform_promise',
2031 order.tmin, tmax if tmax is not None else order.tmax,
2032 codes=order.codes,
2033 path=order.source_id)
2035 tmin, tmax = args[:2]
2037 waveforms = list(self.iter_nuts('waveform', *args))
2038 promises = list(self.iter_nuts('waveform_promise', *args))
2040 codes_to_avail = defaultdict(list)
2041 for nut in waveforms:
2042 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
2044 def tts(x):
2045 if isinstance(x, tuple):
2046 return tuple(tts(e) for e in x)
2047 elif isinstance(x, list):
2048 return list(tts(e) for e in x)
2049 else:
2050 return util.time_to_str(x)
2052 now = time.time()
2053 orders = []
2054 for promise in promises:
2055 waveforms_avail = codes_to_avail[promise.codes]
2056 for block_tmin, block_tmax in blocks(
2057 max(tmin, promise.tmin),
2058 min(tmax, promise.tmax),
2059 promise.deltat):
2061 if block_tmin > now:
2062 continue
2064 orders.append(
2065 WaveformOrder(
2066 source_id=promise.file_path,
2067 codes=promise.codes,
2068 tmin=block_tmin,
2069 tmax=block_tmax,
2070 deltat=promise.deltat,
2071 gaps=gaps(waveforms_avail, block_tmin, block_tmax),
2072 time_created=now))
2074 orders_noop, orders = lpick(lambda order: order.gaps, orders)
2076 order_keys_noop = set(order_key(order) for order in orders_noop)
2077 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
2078 logger.info(
2079 'Waveform orders already satisified with cached/local data: '
2080 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
2082 for order in orders_noop:
2083 split_promise(order)
2085 if order_only:
2086 if orders:
2087 self._pending_orders.extend(orders)
2088 logger.info(
2089 'Enqueuing %i waveform order%s.'
2090 % len_plural(orders))
2091 return
2092 else:
2093 if self._pending_orders:
2094 orders.extend(self._pending_orders)
2095 logger.info(
2096 'Adding %i previously enqueued order%s.'
2097 % len_plural(self._pending_orders))
2099 self._pending_orders = []
2101 source_ids = []
2102 sources = {}
2103 for source in self._sources:
2104 if isinstance(source, fdsn.FDSNSource):
2105 source_ids.append(source._source_id)
2106 sources[source._source_id] = source
2108 source_priority = dict(
2109 (source_id, i) for (i, source_id) in enumerate(source_ids))
2111 order_groups = defaultdict(list)
2112 for order in orders:
2113 order_groups[order_key(order)].append(order)
2115 for k, order_group in order_groups.items():
2116 order_group.sort(
2117 key=lambda order: source_priority[order.source_id])
2119 n_order_groups = len(order_groups)
2121 if len(order_groups) != 0 or len(orders) != 0:
2122 logger.info(
2123 'Waveform orders standing for download: %i (%i)'
2124 % (len(order_groups), len(orders)))
2126 task = make_task('Waveform orders processed', n_order_groups)
2127 else:
2128 task = None
2130 def release_order_group(order):
2131 okey = order_key(order)
2132 for followup in order_groups[okey]:
2133 if followup is not order:
2134 split_promise(followup)
2136 del order_groups[okey]
2138 if task:
2139 task.update(n_order_groups - len(order_groups))
2141 def noop(order):
2142 pass
2144 def success(order, trs):
2145 release_order_group(order)
2146 if order.is_near_real_time():
2147 if not trs:
2148 return # keep promise when no data received at real time
2149 else:
2150 tmax = max(tr.tmax+tr.deltat for tr in trs)
2151 tmax = order.tmin \
2152 + round((tmax - order.tmin) / order.deltat) \
2153 * order.deltat
2154 split_promise(order, tmax)
2155 else:
2156 split_promise(order)
2158 def batch_add(paths):
2159 self.add(paths)
2161 calls = queue.Queue()
2163 def enqueue(f):
2164 def wrapper(*args):
2165 calls.put((f, args))
2167 return wrapper
2169 while order_groups:
2171 orders_now = []
2172 empty = []
2173 for k, order_group in order_groups.items():
2174 try:
2175 orders_now.append(order_group.pop(0))
2176 except IndexError:
2177 empty.append(k)
2179 for k in empty:
2180 del order_groups[k]
2182 by_source_id = defaultdict(list)
2183 for order in orders_now:
2184 by_source_id[order.source_id].append(order)
2186 threads = []
2187 for source_id in by_source_id:
2188 def download():
2189 try:
2190 sources[source_id].download_waveforms(
2191 by_source_id[source_id],
2192 success=enqueue(success),
2193 error_permanent=enqueue(split_promise),
2194 error_temporary=noop,
2195 batch_add=enqueue(batch_add))
2197 finally:
2198 calls.put(None)
2200 if len(by_source_id) > 1:
2201 thread = threading.Thread(target=download)
2202 thread.start()
2203 threads.append(thread)
2204 else:
2205 download()
2206 calls.put(None)
2208 ndone = 0
2209 while ndone < len(by_source_id):
2210 ret = calls.get()
2211 if ret is None:
2212 ndone += 1
2213 else:
2214 ret[0](*ret[1])
2216 for thread in threads:
2217 thread.join()
2219 if task:
2220 task.update(n_order_groups - len(order_groups))
2222 if task:
2223 task.done()
2225 @filldocs
2226 def get_waveform_nuts(
2227 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2228 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2229 order_only=False):
2231 '''
2232 Get waveform content entities matching given constraints.
2234 %(query_args)s
2236 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2237 resolves matching waveform promises (downloads waveforms from remote
2238 sources).
2240 See :py:meth:`iter_nuts` for details on time span matching.
2241 '''
2243 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2245 if self.downloads_enabled:
2246 self._redeem_promises(
2247 *args,
2248 codes_exclude,
2249 sample_rate_min,
2250 sample_rate_max,
2251 order_only=order_only)
2253 nuts = sorted(
2254 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2256 return nuts
2258 @filldocs
2259 def have_waveforms(
2260 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2262 '''
2263 Check if any waveforms or waveform promises are available for given
2264 constraints.
2266 %(query_args)s
2267 '''
2269 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2270 return bool(list(
2271 self.iter_nuts('waveform', *args, limit=1))) \
2272 or (self.downloads_enabled and bool(list(
2273 self.iter_nuts('waveform_promise', *args, limit=1))))
2275 @filldocs
2276 def get_waveforms(
2277 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2278 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2279 uncut=False, want_incomplete=True, degap=True,
2280 maxgap=5, maxlap=None, snap=None, include_last=False,
2281 load_data=True, accessor_id='default', operator_params=None,
2282 order_only=False, channel_priorities=None):
2284 '''
2285 Get waveforms matching given constraints.
2287 %(query_args)s
2289 :param sample_rate_min:
2290 Consider only waveforms with a sampling rate equal to or greater
2291 than the given value [Hz].
2292 :type sample_rate_min:
2293 float
2295 :param sample_rate_max:
2296 Consider only waveforms with a sampling rate equal to or less than
2297 the given value [Hz].
2298 :type sample_rate_max:
2299 float
2301 :param uncut:
2302 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2303 and to disable degapping/deoverlapping. Returns untouched traces as
2304 they are read from file segment. File segments are always read in
2305 their entirety.
2306 :type uncut:
2307 bool
2309 :param want_incomplete:
2310 If ``True``, gappy/incomplete traces are included in the result.
2311 :type want_incomplete:
2312 bool
2314 :param degap:
2315 If ``True``, connect traces and remove gaps and overlaps.
2316 :type degap:
2317 bool
2319 :param maxgap:
2320 Maximum gap size in samples which is filled with interpolated
2321 samples when ``degap`` is ``True``.
2322 :type maxgap:
2323 int
2325 :param maxlap:
2326 Maximum overlap size in samples which is removed when ``degap`` is
2327 ``True``.
2328 :type maxlap:
2329 int
2331 :param snap:
2332 Rounding functions used when computing sample index from time
2333 instance, for trace start and trace end, respectively. By default,
2334 ``(round, round)`` is used.
2335 :type snap:
2336 :py:class:`tuple` of 2 callables
2338 :param include_last:
2339 If ``True``, add one more sample to the returned traces (the sample
2340 which would be the first sample of a query with ``tmin`` set to the
2341 current value of ``tmax``).
2342 :type include_last:
2343 bool
2345 :param load_data:
2346 If ``True``, waveform data samples are read from files (or cache).
2347 If ``False``, meta-information-only traces are returned (dummy
2348 traces with no data samples).
2349 :type load_data:
2350 bool
2352 :param accessor_id:
2353 Name of consumer on who's behalf data is accessed. Used in cache
2354 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2355 to distinguish different points of extraction for the decision of
2356 when to release cached waveform data. Should be used when data is
2357 alternately extracted from more than one region / selection.
2358 :type accessor_id:
2359 str
2361 :param channel_priorities:
2362 List of band/instrument code combinations to try. For example,
2363 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and
2364 then fallback to ``BH?`` if these are not available. The first
2365 matching waveforms are returned. Use in combination with
2366 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample
2367 rate.
2368 :type channel_priorities:
2369 :py:class:`list` of :py:class:`str`
2371 See :py:meth:`iter_nuts` for details on time span matching.
2373 Loaded data is kept in memory (at least) until
2374 :py:meth:`clear_accessor` has been called or
2375 :py:meth:`advance_accessor` has been called two consecutive times
2376 without data being accessed between the two calls (by this accessor).
2377 Data may still be further kept in the memory cache if held alive by
2378 consumers with a different ``accessor_id``.
2379 '''
2381 tmin, tmax, codes = self._get_selection_args(
2382 WAVEFORM, obj, tmin, tmax, time, codes)
2384 if channel_priorities is not None:
2385 return self._get_waveforms_prioritized(
2386 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude,
2387 sample_rate_min=sample_rate_min,
2388 sample_rate_max=sample_rate_max,
2389 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2390 maxgap=maxgap, maxlap=maxlap, snap=snap,
2391 include_last=include_last, load_data=load_data,
2392 accessor_id=accessor_id, operator_params=operator_params,
2393 order_only=order_only, channel_priorities=channel_priorities)
2395 kinds = ['waveform']
2396 if self.downloads_enabled:
2397 kinds.append('waveform_promise')
2399 self_tmin, self_tmax = self.get_time_span(kinds)
2401 if None in (self_tmin, self_tmax):
2402 logger.warning(
2403 'No waveforms available.')
2404 return []
2406 tmin = tmin if tmin is not None else self_tmin
2407 tmax = tmax if tmax is not None else self_tmax
2409 if codes is not None and len(codes) == 1:
2410 # TODO: fix for multiple / mixed codes
2411 operator = self.get_operator(codes[0])
2412 if operator is not None:
2413 return operator.get_waveforms(
2414 self, codes[0],
2415 tmin=tmin, tmax=tmax,
2416 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2417 maxgap=maxgap, maxlap=maxlap, snap=snap,
2418 include_last=include_last, load_data=load_data,
2419 accessor_id=accessor_id, params=operator_params)
2421 nuts = self.get_waveform_nuts(
2422 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min,
2423 sample_rate_max, order_only=order_only)
2425 if order_only:
2426 return []
2428 if load_data:
2429 traces = [
2430 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2432 else:
2433 traces = [
2434 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2436 if uncut:
2437 return traces
2439 if snap is None:
2440 snap = (round, round)
2442 chopped = []
2443 for tr in traces:
2444 if not load_data and tr.ydata is not None:
2445 tr = tr.copy(data=False)
2446 tr.ydata = None
2448 try:
2449 chopped.append(tr.chop(
2450 tmin, tmax,
2451 inplace=False,
2452 snap=snap,
2453 include_last=include_last))
2455 except trace.NoData:
2456 pass
2458 processed = self._process_chopped(
2459 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2461 return processed
2463 def _get_waveforms_prioritized(
2464 self, tmin=None, tmax=None, codes=None, codes_exclude=None,
2465 channel_priorities=None, **kwargs):
2467 trs_all = []
2468 codes_have = set()
2469 for channel in channel_priorities:
2470 assert len(channel) == 2
2471 if codes is not None:
2472 codes_now = [
2473 codes_.replace(channel=channel+'?') for codes_ in codes]
2474 else:
2475 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?')
2477 codes_exclude_now = list(set(
2478 codes_.replace(channel=channel+codes_.channel[-1])
2479 for codes_ in codes_have))
2481 if codes_exclude:
2482 codes_exclude_now.extend(codes_exclude)
2484 trs = self.get_waveforms(
2485 tmin=tmin,
2486 tmax=tmax,
2487 codes=codes_now,
2488 codes_exclude=codes_exclude_now,
2489 **kwargs)
2491 codes_have.update(set(tr.codes for tr in trs))
2492 trs_all.extend(trs)
2494 return trs_all
2496 @filldocs
2497 def chopper_waveforms(
2498 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2499 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2500 tinc=None, tpad=0.,
2501 want_incomplete=True, snap_window=False,
2502 degap=True, maxgap=5, maxlap=None,
2503 snap=None, include_last=False, load_data=True,
2504 accessor_id=None, clear_accessor=True, operator_params=None,
2505 grouping=None, channel_priorities=None):
2507 '''
2508 Iterate window-wise over waveform archive.
2510 %(query_args)s
2512 :param tinc:
2513 Time increment (window shift time) (default uses ``tmax-tmin``).
2514 :type tinc:
2515 :py:func:`~pyrocko.util.get_time_float`
2517 :param tpad:
2518 Padding time appended on either side of the data window (window
2519 overlap is ``2*tpad``).
2520 :type tpad:
2521 :py:func:`~pyrocko.util.get_time_float`
2523 :param want_incomplete:
2524 If ``True``, gappy/incomplete traces are included in the result.
2525 :type want_incomplete:
2526 bool
2528 :param snap_window:
2529 If ``True``, start time windows at multiples of tinc with respect
2530 to system time zero.
2531 :type snap_window:
2532 bool
2534 :param degap:
2535 If ``True``, connect traces and remove gaps and overlaps.
2536 :type degap:
2537 bool
2539 :param maxgap:
2540 Maximum gap size in samples which is filled with interpolated
2541 samples when ``degap`` is ``True``.
2542 :type maxgap:
2543 int
2545 :param maxlap:
2546 Maximum overlap size in samples which is removed when ``degap`` is
2547 ``True``.
2548 :type maxlap:
2549 int
2551 :param snap:
2552 Rounding functions used when computing sample index from time
2553 instance, for trace start and trace end, respectively. By default,
2554 ``(round, round)`` is used.
2555 :type snap:
2556 :py:class:`tuple` of 2 callables
2558 :param include_last:
2559 If ``True``, add one more sample to the returned traces (the sample
2560 which would be the first sample of a query with ``tmin`` set to the
2561 current value of ``tmax``).
2562 :type include_last:
2563 bool
2565 :param load_data:
2566 If ``True``, waveform data samples are read from files (or cache).
2567 If ``False``, meta-information-only traces are returned (dummy
2568 traces with no data samples).
2569 :type load_data:
2570 bool
2572 :param accessor_id:
2573 Name of consumer on who's behalf data is accessed. Used in cache
2574 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2575 to distinguish different points of extraction for the decision of
2576 when to release cached waveform data. Should be used when data is
2577 alternately extracted from more than one region / selection.
2578 :type accessor_id:
2579 str
2581 :param clear_accessor:
2582 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2583 chopper finishes. Set to ``False`` to keep loaded waveforms in
2584 memory when the generator returns.
2585 :type clear_accessor:
2586 bool
2588 :param grouping:
2589 By default, traversal over the data is over time and all matching
2590 traces of a time window are yielded. Using this option, it is
2591 possible to traverse the data first by group (e.g. station or
2592 network) and second by time. This can reduce the number of traces
2593 in each batch and thus reduce the memory footprint of the process.
2594 :type grouping:
2595 :py:class:`~pyrocko.squirrel.operators.base.Grouping`
2597 :yields:
2598 For each extracted time window or waveform group a
2599 :py:class:`Batch` object is yielded.
2601 See :py:meth:`iter_nuts` for details on time span matching.
2602 '''
2604 tmin, tmax, codes = self._get_selection_args(
2605 WAVEFORM, obj, tmin, tmax, time, codes)
2607 kinds = ['waveform']
2608 if self.downloads_enabled:
2609 kinds.append('waveform_promise')
2611 self_tmin, self_tmax = self.get_time_span(kinds)
2613 if None in (self_tmin, self_tmax):
2614 logger.warning(
2615 'Content has undefined time span. No waveforms and no '
2616 'waveform promises?')
2617 return
2619 if snap_window and tinc is not None:
2620 tmin = tmin if tmin is not None else self_tmin
2621 tmax = tmax if tmax is not None else self_tmax
2622 tmin = math.floor(tmin / tinc) * tinc
2623 tmax = math.ceil(tmax / tinc) * tinc
2624 else:
2625 tmin = tmin if tmin is not None else self_tmin + tpad
2626 tmax = tmax if tmax is not None else self_tmax - tpad
2628 if tinc is None:
2629 tinc = tmax - tmin
2630 nwin = 1
2631 elif tinc == 0.0:
2632 nwin = 1
2633 else:
2634 eps = 1e-6
2635 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1)
2637 try:
2638 if accessor_id is None:
2639 accessor_id = 'chopper%i' % self._n_choppers_active
2641 self._n_choppers_active += 1
2643 if grouping is None:
2644 codes_list = [codes]
2645 else:
2646 operator = Operator(
2647 filtering=CodesPatternFiltering(codes=codes),
2648 grouping=grouping)
2650 available = set(self.get_codes(kind='waveform'))
2651 if self.downloads_enabled:
2652 available.update(self.get_codes(kind='waveform_promise'))
2653 operator.update_mappings(sorted(available))
2655 codes_list = [
2656 codes_patterns_list(scl)
2657 for scl in operator.iter_in_codes()]
2659 ngroups = len(codes_list)
2660 for igroup, scl in enumerate(codes_list):
2661 for iwin in range(nwin):
2662 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2664 chopped = self.get_waveforms(
2665 tmin=wmin-tpad,
2666 tmax=wmax+tpad,
2667 codes=scl,
2668 codes_exclude=codes_exclude,
2669 sample_rate_min=sample_rate_min,
2670 sample_rate_max=sample_rate_max,
2671 snap=snap,
2672 include_last=include_last,
2673 load_data=load_data,
2674 want_incomplete=want_incomplete,
2675 degap=degap,
2676 maxgap=maxgap,
2677 maxlap=maxlap,
2678 accessor_id=accessor_id,
2679 operator_params=operator_params,
2680 channel_priorities=channel_priorities)
2682 self.advance_accessor(accessor_id)
2684 yield Batch(
2685 tmin=wmin,
2686 tmax=wmax,
2687 i=iwin,
2688 n=nwin,
2689 igroup=igroup,
2690 ngroups=ngroups,
2691 traces=chopped)
2693 finally:
2694 self._n_choppers_active -= 1
2695 if clear_accessor:
2696 self.clear_accessor(accessor_id, 'waveform')
2698 def _process_chopped(
2699 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2701 chopped.sort(key=lambda a: a.full_id)
2702 if degap:
2703 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2705 if not want_incomplete:
2706 chopped_weeded = []
2707 for tr in chopped:
2708 emin = tr.tmin - tmin
2709 emax = tr.tmax + tr.deltat - tmax
2710 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2711 chopped_weeded.append(tr)
2713 elif degap:
2714 if (0. < emin <= 5. * tr.deltat
2715 and -5. * tr.deltat <= emax < 0.):
2717 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2718 chopped_weeded.append(tr)
2720 chopped = chopped_weeded
2722 return chopped
2724 def _get_pyrocko_stations(
2725 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2726 on_error='raise'):
2728 from pyrocko import model as pmodel
2730 if codes is not None:
2731 codes = codes_patterns_for_kind(STATION, codes)
2733 by_nsl = defaultdict(lambda: (list(), list()))
2734 for station in self.get_stations(obj, tmin, tmax, time, codes):
2735 sargs = station._get_pyrocko_station_args()
2736 by_nsl[station.codes.nsl][0].append(sargs)
2738 if codes is not None:
2739 codes = [model.CodesNSLCE(c) for c in codes]
2741 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2742 sargs = channel._get_pyrocko_station_args()
2743 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2744 sargs_list.append(sargs)
2745 channels_list.append(channel)
2747 pstations = []
2748 nsls = list(by_nsl.keys())
2749 nsls.sort()
2750 for nsl in nsls:
2751 sargs_list, channels_list = by_nsl[nsl]
2752 sargs = util.consistency_merge(
2753 [('',) + x for x in sargs_list],
2754 error=on_error)
2756 by_c = defaultdict(list)
2757 for ch in channels_list:
2758 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2760 chas = list(by_c.keys())
2761 chas.sort()
2762 pchannels = []
2763 for cha in chas:
2764 list_of_cargs = by_c[cha]
2765 cargs = util.consistency_merge(
2766 [('',) + x for x in list_of_cargs],
2767 error=on_error)
2768 pchannels.append(pmodel.Channel(*cargs))
2770 pstations.append(
2771 pmodel.Station(*sargs, channels=pchannels))
2773 return pstations
2775 @property
2776 def pile(self):
2778 '''
2779 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2781 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2782 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2783 but uses the fluffy power of the Squirrel under the hood.
2785 This interface can be used as a drop-in replacement for piles which are
2786 used in existing scripts and programs for efficient waveform data
2787 access. The Squirrel-based pile scales better for large datasets. Newer
2788 scripts should use Squirrel's native methods to avoid the emulation
2789 overhead.
2790 '''
2791 from . import pile
2793 if self._pile is None:
2794 self._pile = pile.Pile(self)
2796 return self._pile
2798 def snuffle(self, **kwargs):
2799 '''
2800 Look at dataset in Snuffler.
2801 '''
2802 self.pile.snuffle(**kwargs)
2804 def _gather_codes_keys(self, kind, gather, selector):
2805 return set(
2806 gather(codes)
2807 for codes in self.iter_codes(kind)
2808 if selector is None or selector(codes))
2810 def __str__(self):
2811 return str(self.get_stats())
2813 def get_coverage(
2814 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2816 '''
2817 Get coverage information.
2819 Get information about strips of gapless data coverage.
2821 :param kind:
2822 Content kind to be queried.
2823 :type kind:
2824 str
2826 :param tmin:
2827 Start time of query interval.
2828 :type tmin:
2829 :py:func:`~pyrocko.util.get_time_float`
2831 :param tmax:
2832 End time of query interval.
2833 :type tmax:
2834 :py:func:`~pyrocko.util.get_time_float`
2836 :param codes:
2837 If given, restrict query to given content codes patterns.
2838 :type codes:
2839 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2840 objects appropriate for the queried content type, or anything which
2841 can be converted to such objects.
2843 :param limit:
2844 Limit query to return only up to a given maximum number of entries
2845 per matching time series (without setting this option, very gappy
2846 data could cause the query to execute for a very long time).
2847 :type limit:
2848 int
2850 :returns:
2851 Information about time spans covered by the requested time series
2852 data.
2853 :rtype:
2854 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage`
2855 '''
2857 tmin_seconds, tmin_offset = model.tsplit(tmin)
2858 tmax_seconds, tmax_offset = model.tsplit(tmax)
2859 kind_id = to_kind_id(kind)
2861 codes_info = list(self._iter_codes_info(kind=kind))
2863 kdata_all = []
2864 if codes is None:
2865 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2866 kdata_all.append(
2867 (codes_entry, kind_codes_id, codes_entry, deltat))
2869 else:
2870 for codes_entry in codes:
2871 pattern = to_codes(kind_id, codes_entry)
2872 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2873 if model.match_codes(pattern, codes_entry):
2874 kdata_all.append(
2875 (pattern, kind_codes_id, codes_entry, deltat))
2877 kind_codes_ids = [x[1] for x in kdata_all]
2879 counts_at_tmin = {}
2880 if tmin is not None:
2881 for nut in self.iter_nuts(
2882 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2884 k = nut.codes, nut.deltat
2885 if k not in counts_at_tmin:
2886 counts_at_tmin[k] = 0
2888 counts_at_tmin[k] += 1
2890 coverages = []
2891 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2892 entry = [pattern, codes_entry, deltat, None, None, []]
2893 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2894 sql = self._sql('''
2895 SELECT
2896 time_seconds,
2897 time_offset
2898 FROM %(db)s.%(coverage)s
2899 WHERE
2900 kind_codes_id == ?
2901 ORDER BY
2902 kind_codes_id ''' + order + ''',
2903 time_seconds ''' + order + ''',
2904 time_offset ''' + order + '''
2905 LIMIT 1
2906 ''')
2908 for row in self._conn.execute(sql, [kind_codes_id]):
2909 entry[3+i] = model.tjoin(row[0], row[1])
2911 if None in entry[3:5]:
2912 continue
2914 args = [kind_codes_id]
2916 sql_time = ''
2917 if tmin is not None:
2918 # intentionally < because (== tmin) is queried from nuts
2919 sql_time += ' AND ( ? < time_seconds ' \
2920 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2921 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2923 if tmax is not None:
2924 sql_time += ' AND ( time_seconds < ? ' \
2925 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2926 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2928 sql_limit = ''
2929 if limit is not None:
2930 sql_limit = ' LIMIT ?'
2931 args.append(limit)
2933 sql = self._sql('''
2934 SELECT
2935 time_seconds,
2936 time_offset,
2937 step
2938 FROM %(db)s.%(coverage)s
2939 WHERE
2940 kind_codes_id == ?
2941 ''' + sql_time + '''
2942 ORDER BY
2943 kind_codes_id,
2944 time_seconds,
2945 time_offset
2946 ''' + sql_limit)
2948 rows = list(self._conn.execute(sql, args))
2950 if limit is not None and len(rows) == limit:
2951 entry[-1] = None
2952 else:
2953 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2954 tlast = None
2955 if tmin is not None:
2956 entry[-1].append((tmin, counts))
2957 tlast = tmin
2959 for row in rows:
2960 t = model.tjoin(row[0], row[1])
2961 counts += row[2]
2962 entry[-1].append((t, counts))
2963 tlast = t
2965 if tmax is not None and (tlast is None or tlast != tmax):
2966 entry[-1].append((tmax, counts))
2968 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2970 return coverages
2972 def get_stationxml(
2973 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2974 level='response', on_error='raise'):
2976 '''
2977 Get station/channel/response metadata in StationXML representation.
2979 %(query_args)s
2981 :returns:
2982 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2983 '''
2985 if level not in ('network', 'station', 'channel', 'response'):
2986 raise ValueError('Invalid level: %s' % level)
2988 tmin, tmax, codes = self._get_selection_args(
2989 CHANNEL, obj, tmin, tmax, time, codes)
2991 def tts(t):
2992 if t is None:
2993 return '<none>'
2994 else:
2995 return util.tts(t, format='%Y-%m-%d %H:%M:%S')
2997 if on_error == 'ignore':
2998 def handle_error(exc):
2999 pass
3001 elif on_error == 'warn':
3002 def handle_error(exc):
3003 logger.warning(str(exc))
3005 elif on_error == 'raise':
3006 def handle_error(exc):
3007 raise exc
3009 def use_first(node_type_name, codes, k, group):
3010 if on_error == 'warn':
3011 logger.warning(
3012 'Duplicates for %s %s, %s - %s -> using first' % (
3013 node_type_name,
3014 '.'.join(codes),
3015 tts(k[0]), tts(k[1])))
3017 return group[0]
3019 def deduplicate(node_type_name, codes, nodes):
3020 groups = defaultdict(list)
3021 for node in nodes:
3022 k = (node.start_date, node.end_date)
3023 groups[k].append(node)
3025 return [
3026 use_first(node_type_name, codes, k, group)
3027 for (k, group) in groups.items()]
3029 filtering = CodesPatternFiltering(codes=codes)
3031 nslcs = list(set(
3032 codes.nslc for codes in
3033 filtering.filter(self.get_codes(kind='channel'))))
3035 from pyrocko.io import stationxml as sx
3037 networks = []
3038 for net, stas in prefix_tree(nslcs):
3039 network = sx.Network(code=net)
3040 networks.append(network)
3042 if level not in ('station', 'channel', 'response'):
3043 continue
3045 for sta, locs in stas:
3046 stations = self.get_stations(
3047 tmin=tmin,
3048 tmax=tmax,
3049 codes=(net, sta, '*'),
3050 model='stationxml')
3052 if on_error != 'raise':
3053 stations = deduplicate(
3054 'Station', (net, sta), stations)
3056 errors = sx.check_overlaps(
3057 'Station', (net, sta), stations)
3059 if errors:
3060 handle_error(error.Duplicate(
3061 'Overlapping/duplicate station info:\n %s'
3062 % '\n '.join(errors)))
3064 network.station_list.extend(stations)
3066 if level not in ('channel', 'response'):
3067 continue
3069 for loc, chas in locs:
3070 for cha, _ in chas:
3071 channels = self.get_channels(
3072 tmin=tmin,
3073 tmax=tmax,
3074 codes=(net, sta, loc, cha),
3075 model='stationxml')
3077 if on_error != 'raise':
3078 channels = deduplicate(
3079 'Channel', (net, sta, loc, cha), channels)
3081 errors = sx.check_overlaps(
3082 'Channel', (net, sta, loc, cha), channels)
3084 if errors:
3085 handle_error(error.Duplicate(
3086 'Overlapping/duplicate channel info:\n %s'
3087 % '\n '.join(errors)))
3089 for channel in channels:
3090 station = sx.find_containing(stations, channel)
3091 if station is not None:
3092 station.channel_list.append(channel)
3093 else:
3094 handle_error(error.NotAvailable(
3095 'No station or station epoch found '
3096 'for channel: %s' % '.'.join(
3097 (net, sta, loc, cha))))
3099 continue
3101 if level != 'response':
3102 continue
3104 try:
3105 response_sq, response_sx = self.get_response(
3106 codes=(net, sta, loc, cha),
3107 tmin=channel.start_date,
3108 tmax=channel.end_date,
3109 model='stationxml+',
3110 on_duplicate=on_error)
3112 except error.NotAvailable as e:
3113 handle_error(e)
3114 continue
3116 if not (
3117 sx.eq_open(
3118 channel.start_date, response_sq.tmin)
3119 and sx.eq_open(
3120 channel.end_date, response_sq.tmax)):
3122 handle_error(error.Inconsistencies(
3123 'Response time span does not match '
3124 'channel time span: %s' % '.'.join(
3125 (net, sta, loc, cha))))
3127 channel.response = response_sx
3129 return sx.FDSNStationXML(
3130 source='Generated by Pyrocko Squirrel.',
3131 network_list=networks)
3133 def add_operator(self, op):
3134 self._operators.append(op)
3136 def update_operator_mappings(self):
3137 available = self.get_codes(kind=('channel'))
3139 for operator in self._operators:
3140 operator.update_mappings(available, self._operator_registry)
3142 def iter_operator_mappings(self):
3143 for operator in self._operators:
3144 for in_codes, out_codes in operator.iter_mappings():
3145 yield operator, in_codes, out_codes
3147 def get_operator_mappings(self):
3148 return list(self.iter_operator_mappings())
3150 def get_operator(self, codes):
3151 try:
3152 return self._operator_registry[codes][0]
3153 except KeyError:
3154 return None
3156 def get_operator_group(self, codes):
3157 try:
3158 return self._operator_registry[codes]
3159 except KeyError:
3160 return None, (None, None, None)
3162 def iter_operator_codes(self):
3163 for _, _, out_codes in self.iter_operator_mappings():
3164 for codes in out_codes:
3165 yield codes
3167 def get_operator_codes(self):
3168 return list(self.iter_operator_codes())
3170 def print_tables(self, table_names=None, stream=None):
3171 '''
3172 Dump raw database tables in textual form (for debugging purposes).
3174 :param table_names:
3175 Names of tables to be dumped or ``None`` to dump all.
3176 :type table_names:
3177 :py:class:`list` of :py:class:`str`
3179 :param stream:
3180 Open file or ``None`` to dump to standard output.
3181 '''
3183 if stream is None:
3184 stream = sys.stdout
3186 if isinstance(table_names, str):
3187 table_names = [table_names]
3189 if table_names is None:
3190 table_names = [
3191 'selection_file_states',
3192 'selection_nuts',
3193 'selection_kind_codes_count',
3194 'files', 'nuts', 'kind_codes', 'kind_codes_count']
3196 m = {
3197 'selection_file_states': '%(db)s.%(file_states)s',
3198 'selection_nuts': '%(db)s.%(nuts)s',
3199 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
3200 'files': 'files',
3201 'nuts': 'nuts',
3202 'kind_codes': 'kind_codes',
3203 'kind_codes_count': 'kind_codes_count'}
3205 for table_name in table_names:
3206 self._database.print_table(
3207 m[table_name] % self._names, stream=stream)
3210class SquirrelStats(Object):
3211 '''
3212 Container to hold statistics about contents available from a Squirrel.
3214 See also :py:meth:`Squirrel.get_stats`.
3215 '''
3217 nfiles = Int.T(
3218 help='Number of files in selection.')
3219 nnuts = Int.T(
3220 help='Number of index nuts in selection.')
3221 codes = List.T(
3222 Tuple.T(content_t=String.T()),
3223 help='Available code sequences in selection, e.g. '
3224 '(agency, network, station, location) for stations nuts.')
3225 kinds = List.T(
3226 String.T(),
3227 help='Available content types in selection.')
3228 total_size = Int.T(
3229 help='Aggregated file size of files is selection.')
3230 counts = Dict.T(
3231 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
3232 help='Breakdown of how many nuts of any content type and code '
3233 'sequence are available in selection, ``counts[kind][codes]``.')
3234 time_spans = Dict.T(
3235 String.T(), Tuple.T(content_t=Timestamp.T()),
3236 help='Time spans by content type.')
3237 sources = List.T(
3238 String.T(),
3239 help='Descriptions of attached sources.')
3240 operators = List.T(
3241 String.T(),
3242 help='Descriptions of attached operators.')
3244 def __str__(self):
3245 kind_counts = dict(
3246 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
3248 scodes = model.codes_to_str_abbreviated(self.codes)
3250 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
3251 ' ' + s for s in self.sources)
3253 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
3254 ' ' + s for s in self.operators)
3256 def stime(t):
3257 return util.tts(t) if t is not None and t not in (
3258 model.g_tmin, model.g_tmax) else '<none>'
3260 def stable(rows):
3261 ns = [max(len(w) for w in col) for col in zip(*rows)]
3262 return '\n'.join(
3263 ' '.join(w.ljust(n) for n, w in zip(ns, row))
3264 for row in rows)
3266 def indent(s):
3267 return '\n'.join(' '+line for line in s.splitlines())
3269 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
3270 kind + ':',
3271 str(kind_counts[kind]),
3272 stime(self.time_spans[kind][0]),
3273 '-',
3274 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
3276 s = '''
3277Number of files: %i
3278Total size of known files: %s
3279Number of index nuts: %i
3280Available content kinds: %s
3281Available codes: %s
3282Sources: %s
3283Operators: %s''' % (
3284 self.nfiles,
3285 util.human_bytesize(self.total_size),
3286 self.nnuts,
3287 stspans, scodes, ssources, soperators)
3289 return s.lstrip()
3292__all__ = [
3293 'Squirrel',
3294 'SquirrelStats',
3295]