Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/base.py: 85%
878 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-27 10:58 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-27 10:58 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Squirrel main classes.
8'''
10import sys
11import os
12import time
13import math
14import logging
15import threading
16import queue
17from collections import defaultdict
19from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
20from pyrocko import util, trace
21from pyrocko import progress
22from pyrocko.plot import nice_time_tick_inc_approx_secs
24from . import model, io, cache, dataset
26from .model import to_kind_id, WaveformOrder, to_kind, to_codes, \
27 STATION, CHANNEL, RESPONSE, EVENT, WAVEFORM, codes_patterns_list, \
28 codes_patterns_for_kind
29from .client import fdsn, catalog
30from .selection import Selection, filldocs
31from .database import abspath
32from .operators.base import Operator, CodesPatternFiltering
33from . import client, environment, error
35logger = logging.getLogger('psq.base')
37guts_prefix = 'squirrel'
40def nonef(f, xs):
41 xs_ = [x for x in xs if x is not None]
42 if xs_:
43 return f(xs_)
44 else:
45 return None
48def make_task(*args):
49 return progress.task(*args, logger=logger)
52def lpick(condition, seq):
53 ft = [], []
54 for ele in seq:
55 ft[int(bool(condition(ele)))].append(ele)
57 return ft
60def len_plural(obj):
61 return len(obj), '' if len(obj) == 1 else 's'
64def blocks(tmin, tmax, deltat, nsamples_block=100000):
65 tblock = nice_time_tick_inc_approx_secs(
66 util.to_time_float(deltat * nsamples_block))
67 iblock_min = int(math.floor(tmin / tblock))
68 iblock_max = int(math.ceil(tmax / tblock))
69 for iblock in range(iblock_min, iblock_max):
70 yield iblock * tblock, (iblock+1) * tblock
73def gaps(avail, tmin, tmax):
74 assert tmin < tmax
76 data = [(tmax, 1), (tmin, -1)]
77 for (tmin_a, tmax_a) in avail:
78 assert tmin_a < tmax_a
79 data.append((tmin_a, 1))
80 data.append((tmax_a, -1))
82 data.sort()
83 s = 1
84 gaps = []
85 tmin_g = None
86 for t, x in data:
87 if s == 1 and x == -1:
88 tmin_g = t
89 elif s == 0 and x == 1 and tmin_g is not None:
90 tmax_g = t
91 if tmin_g != tmax_g:
92 gaps.append((tmin_g, tmax_g))
94 s += x
96 return gaps
99def order_key(order):
100 return (order.codes, order.tmin, order.tmax)
103def _is_exact(pat):
104 return not ('*' in pat or '?' in pat or ']' in pat or '[' in pat)
107def prefix_tree(tups):
108 if not tups:
109 return []
111 if len(tups[0]) == 1:
112 return sorted((tup[0], []) for tup in tups)
114 d = defaultdict(list)
115 for tup in tups:
116 d[tup[0]].append(tup[1:])
118 sub = []
119 for k in sorted(d.keys()):
120 sub.append((k, prefix_tree(d[k])))
122 return sub
125def match_time_span(tmin, tmax, obj):
126 return (obj.tmin is None or tmax is None or obj.tmin <= tmax) \
127 and (tmin is None or obj.tmax is None or tmin < obj.tmax)
130class Batch(object):
131 '''
132 Batch of waveforms from window-wise data extraction.
134 Encapsulates state and results yielded for each window in window-wise
135 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
137 *Attributes:*
139 .. py:attribute:: tmin
141 Start of this time window.
143 .. py:attribute:: tmax
145 End of this time window.
147 .. py:attribute:: tpad
149 Padding time length.
151 .. py:attribute:: i
153 Index of this time window in sequence.
155 .. py:attribute:: n
157 Total number of time windows in sequence.
159 .. py:attribute:: igroup
161 Index of this time window's sequence group.
163 .. py:attribute:: ngroups
165 Total number of sequence groups.
167 .. py:attribute:: traces
169 Extracted waveforms for this time window.
170 '''
172 def __init__(self, tmin, tmax, tpad, i, n, igroup, ngroups, traces):
173 self.tmin = tmin
174 self.tmax = tmax
175 self.tpad = tpad
176 self.i = i
177 self.n = n
178 self.igroup = igroup
179 self.ngroups = ngroups
180 self.traces = traces
182 def as_multitrace(self):
183 from pyrocko import multitrace
185 data, codes, tmin, deltat = trace.merge_traces_data_as_array(
186 self.traces, tmin=self.tmin-self.tpad, tmax=self.tmax+self.tpad)
188 return multitrace.MultiTrace(
189 data=data,
190 codes=codes,
191 tmin=tmin,
192 deltat=deltat)
195class Squirrel(Selection):
196 '''
197 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
199 :param env:
200 Squirrel environment instance or directory path to use as starting
201 point for its detection. By default, the current directory is used as
202 starting point. When searching for a usable environment the directory
203 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
204 directory is used if it exists, otherwise the parent directories are
205 search upwards for the existence of such a directory. If no such
206 directory is found, the user's global Squirrel environment
207 ``'$HOME/.pyrocko/squirrel'`` is used.
208 :type env:
209 :py:class:`~pyrocko.squirrel.environment.Environment` or
210 :py:class:`str`
212 :param database:
213 Database instance or path to database. By default the
214 database found in the detected Squirrel environment is used.
215 :type database:
216 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
218 :param cache_path:
219 Directory path to use for data caching. By default, the ``'cache'``
220 directory in the detected Squirrel environment is used.
221 :type cache_path:
222 :py:class:`str`
224 :param persistent:
225 If given a name, create a persistent selection.
226 :type persistent:
227 :py:class:`str`
229 This is the central class of the Squirrel framework. It provides a unified
230 interface to query and access seismic waveforms, station meta-data and
231 event information from local file collections and remote data sources. For
232 prompt responses, a profound database setup is used under the hood. To
233 speed up assemblage of ad-hoc data selections, files are indexed on first
234 use and the extracted meta-data is remembered in the database for
235 subsequent accesses. Bulk data is lazily loaded from disk and remote
236 sources, just when requested. Once loaded, data is cached in memory to
237 expedite typical access patterns. Files and data sources can be dynamically
238 added to and removed from the Squirrel selection at runtime.
240 Queries are restricted to the contents of the files currently added to the
241 Squirrel selection (usually a subset of the file meta-information
242 collection in the database). This list of files is referred to here as the
243 "selection". By default, temporary tables are created in the attached
244 database to hold the names of the files in the selection as well as various
245 indices and counters. These tables are only visible inside the application
246 which created them and are deleted when the database connection is closed
247 or the application exits. To create a selection which is not deleted at
248 exit, supply a name to the ``persistent`` argument of the Squirrel
249 constructor. Persistent selections are shared among applications using the
250 same database.
252 **Method summary**
254 Some of the methods are implemented in :py:class:`Squirrel`'s base class
255 :py:class:`~pyrocko.squirrel.selection.Selection`.
257 .. autosummary::
259 ~Squirrel.add
260 ~Squirrel.add_source
261 ~Squirrel.add_fdsn
262 ~Squirrel.add_catalog
263 ~Squirrel.add_dataset
264 ~Squirrel.add_virtual
265 ~Squirrel.update
266 ~Squirrel.update_waveform_promises
267 ~Squirrel.advance_accessor
268 ~Squirrel.clear_accessor
269 ~Squirrel.reload
270 ~pyrocko.squirrel.selection.Selection.iter_paths
271 ~Squirrel.iter_nuts
272 ~Squirrel.iter_kinds
273 ~Squirrel.iter_deltats
274 ~Squirrel.iter_codes
275 ~pyrocko.squirrel.selection.Selection.get_paths
276 ~Squirrel.get_nuts
277 ~Squirrel.get_kinds
278 ~Squirrel.get_deltats
279 ~Squirrel.get_codes
280 ~Squirrel.get_counts
281 ~Squirrel.get_time_span
282 ~Squirrel.get_deltat_span
283 ~Squirrel.get_nfiles
284 ~Squirrel.get_nnuts
285 ~Squirrel.get_total_size
286 ~Squirrel.get_stats
287 ~Squirrel.get_content
288 ~Squirrel.get_stations
289 ~Squirrel.get_channels
290 ~Squirrel.get_responses
291 ~Squirrel.get_events
292 ~Squirrel.get_waveform_nuts
293 ~Squirrel.get_waveforms
294 ~Squirrel.chopper_waveforms
295 ~Squirrel.get_coverage
296 ~Squirrel.pile
297 ~Squirrel.snuffle
298 ~Squirrel.glob_codes
299 ~pyrocko.squirrel.selection.Selection.get_database
300 ~Squirrel.print_tables
301 '''
303 def __init__(
304 self, env=None, database=None, cache_path=None, persistent=None):
306 if not isinstance(env, environment.Environment):
307 env = environment.get_environment(env)
309 if database is None:
310 database = env.expand_path(env.database_path)
312 if cache_path is None:
313 cache_path = env.expand_path(env.cache_path)
315 if persistent is None:
316 persistent = env.persistent
318 Selection.__init__(
319 self, database=database, persistent=persistent)
321 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
323 self._content_caches = {
324 'waveform': cache.ContentCache(),
325 'default': cache.ContentCache()}
327 self._cache_path = cache_path
329 self._sources = []
330 self._operators = []
331 self._operator_registry = {}
333 self._pending_orders = []
335 self._pile = None
336 self._n_choppers_active = 0
338 self.downloads_enabled = True
340 self._names.update({
341 'nuts': self.name + '_nuts',
342 'kind_codes_count': self.name + '_kind_codes_count',
343 'coverage': self.name + '_coverage'})
345 with self.transaction('create tables') as cursor:
346 self._create_tables_squirrel(cursor)
348 def _create_tables_squirrel(self, cursor):
350 cursor.execute(self._register_table(self._sql(
351 '''
352 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
353 nut_id integer PRIMARY KEY,
354 file_id integer,
355 file_segment integer,
356 file_element integer,
357 kind_id integer,
358 kind_codes_id integer,
359 tmin_seconds integer,
360 tmin_offset integer,
361 tmax_seconds integer,
362 tmax_offset integer,
363 kscale integer)
364 ''')))
366 cursor.execute(self._register_table(self._sql(
367 '''
368 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
369 kind_codes_id integer PRIMARY KEY,
370 count integer)
371 ''')))
373 cursor.execute(self._sql(
374 '''
375 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
376 ON %(nuts)s (file_id, file_segment, file_element)
377 '''))
379 cursor.execute(self._sql(
380 '''
381 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
382 ON %(nuts)s (file_id)
383 '''))
385 cursor.execute(self._sql(
386 '''
387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
388 ON %(nuts)s (kind_id, tmin_seconds)
389 '''))
391 cursor.execute(self._sql(
392 '''
393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
394 ON %(nuts)s (kind_id, tmax_seconds)
395 '''))
397 cursor.execute(self._sql(
398 '''
399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
400 ON %(nuts)s (kind_id, kscale, tmin_seconds)
401 '''))
403 cursor.execute(self._sql(
404 '''
405 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
406 BEFORE DELETE ON main.files FOR EACH ROW
407 BEGIN
408 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
409 END
410 '''))
412 # trigger only on size to make silent update of mtime possible
413 cursor.execute(self._sql(
414 '''
415 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
416 BEFORE UPDATE OF size ON main.files FOR EACH ROW
417 BEGIN
418 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
419 END
420 '''))
422 cursor.execute(self._sql(
423 '''
424 CREATE TRIGGER IF NOT EXISTS
425 %(db)s.%(file_states)s_delete_files
426 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
427 BEGIN
428 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
429 END
430 '''))
432 cursor.execute(self._sql(
433 '''
434 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
435 BEFORE INSERT ON %(nuts)s FOR EACH ROW
436 BEGIN
437 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
438 (new.kind_codes_id, 0);
439 UPDATE %(kind_codes_count)s
440 SET count = count + 1
441 WHERE new.kind_codes_id
442 == %(kind_codes_count)s.kind_codes_id;
443 END
444 '''))
446 cursor.execute(self._sql(
447 '''
448 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
449 BEFORE DELETE ON %(nuts)s FOR EACH ROW
450 BEGIN
451 UPDATE %(kind_codes_count)s
452 SET count = count - 1
453 WHERE old.kind_codes_id
454 == %(kind_codes_count)s.kind_codes_id;
455 END
456 '''))
458 cursor.execute(self._register_table(self._sql(
459 '''
460 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
461 kind_codes_id integer,
462 time_seconds integer,
463 time_offset integer,
464 step integer)
465 ''')))
467 cursor.execute(self._sql(
468 '''
469 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
470 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
471 '''))
473 cursor.execute(self._sql(
474 '''
475 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
476 AFTER INSERT ON %(nuts)s FOR EACH ROW
477 BEGIN
478 INSERT OR IGNORE INTO %(coverage)s VALUES
479 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
480 ;
481 UPDATE %(coverage)s
482 SET step = step + 1
483 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
484 AND new.tmin_seconds == %(coverage)s.time_seconds
485 AND new.tmin_offset == %(coverage)s.time_offset
486 ;
487 INSERT OR IGNORE INTO %(coverage)s VALUES
488 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
489 ;
490 UPDATE %(coverage)s
491 SET step = step - 1
492 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
493 AND new.tmax_seconds == %(coverage)s.time_seconds
494 AND new.tmax_offset == %(coverage)s.time_offset
495 ;
496 DELETE FROM %(coverage)s
497 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
498 AND new.tmin_seconds == %(coverage)s.time_seconds
499 AND new.tmin_offset == %(coverage)s.time_offset
500 AND step == 0
501 ;
502 DELETE FROM %(coverage)s
503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
504 AND new.tmax_seconds == %(coverage)s.time_seconds
505 AND new.tmax_offset == %(coverage)s.time_offset
506 AND step == 0
507 ;
508 END
509 '''))
511 cursor.execute(self._sql(
512 '''
513 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
514 BEFORE DELETE ON %(nuts)s FOR EACH ROW
515 BEGIN
516 INSERT OR IGNORE INTO %(coverage)s VALUES
517 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
518 ;
519 UPDATE %(coverage)s
520 SET step = step - 1
521 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
522 AND old.tmin_seconds == %(coverage)s.time_seconds
523 AND old.tmin_offset == %(coverage)s.time_offset
524 ;
525 INSERT OR IGNORE INTO %(coverage)s VALUES
526 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
527 ;
528 UPDATE %(coverage)s
529 SET step = step + 1
530 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
531 AND old.tmax_seconds == %(coverage)s.time_seconds
532 AND old.tmax_offset == %(coverage)s.time_offset
533 ;
534 DELETE FROM %(coverage)s
535 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
536 AND old.tmin_seconds == %(coverage)s.time_seconds
537 AND old.tmin_offset == %(coverage)s.time_offset
538 AND step == 0
539 ;
540 DELETE FROM %(coverage)s
541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
542 AND old.tmax_seconds == %(coverage)s.time_seconds
543 AND old.tmax_offset == %(coverage)s.time_offset
544 AND step == 0
545 ;
546 END
547 '''))
549 def _delete(self):
550 '''Delete database tables associated with this Squirrel.'''
552 with self.transaction('delete tables') as cursor:
553 for s in '''
554 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
555 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
556 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
557 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
558 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
559 DROP TABLE %(db)s.%(nuts)s;
560 DROP TABLE %(db)s.%(kind_codes_count)s;
561 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
562 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
563 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
564 '''.strip().splitlines():
566 cursor.execute(self._sql(s))
568 Selection._delete(self)
570 @filldocs
571 def add(self,
572 paths,
573 kinds=None,
574 format='detect',
575 include=None,
576 exclude=None,
577 check=True):
579 '''
580 Add files to the selection.
582 :param paths:
583 Iterator yielding paths to files or directories to be added to the
584 selection. Recurses into directories. If given a ``str``, it
585 is treated as a single path to be added.
586 :type paths:
587 :py:class:`list` of :py:class:`str`
589 :param kinds:
590 Content types to be made available through the Squirrel selection.
591 By default, all known content types are accepted.
592 :type kinds:
593 :py:class:`list` of :py:class:`str`
595 :param format:
596 File format identifier or ``'detect'`` to enable auto-detection
597 (available: %(file_formats)s).
598 :type format:
599 str
601 :param include:
602 If not ``None``, files are only included if their paths match the
603 given regular expression pattern.
604 :type format:
605 str
607 :param exclude:
608 If not ``None``, files are only included if their paths do not
609 match the given regular expression pattern.
610 :type format:
611 str
613 :param check:
614 If ``True``, all file modification times are checked to see if
615 cached information has to be updated (slow). If ``False``, only
616 previously unknown files are indexed and cached information is used
617 for known files, regardless of file state (fast, corrresponds to
618 Squirrel's ``--optimistic`` mode). File deletions will go
619 undetected in the latter case.
620 :type check:
621 bool
623 :Complexity:
624 O(log N)
625 '''
627 if isinstance(kinds, str):
628 kinds = (kinds,)
630 if isinstance(paths, str):
631 paths = [paths]
633 kind_mask = model.to_kind_mask(kinds)
635 Selection.add(
636 self, util.iter_select_files(
637 paths,
638 show_progress=False,
639 include=include,
640 exclude=exclude,
641 pass_through=lambda path: path.startswith('virtual:')
642 ), kind_mask, format)
644 self._load(check)
645 self._update_nuts()
647 def reload(self):
648 '''
649 Check for modifications and reindex modified files.
651 Based on file modification times.
652 '''
654 self._set_file_states_force_check()
655 self._load(check=True)
656 self._update_nuts()
658 def add_virtual(self, nuts, virtual_paths=None):
659 '''
660 Add content which is not backed by files.
662 :param nuts:
663 Content pieces to be added.
664 :type nuts:
665 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
667 :param virtual_paths:
668 List of virtual paths to prevent creating a temporary list of the
669 nuts while aggregating the file paths for the selection.
670 :type virtual_paths:
671 :py:class:`list` of :py:class:`str`
673 Stores to the main database and the selection.
674 '''
676 if isinstance(virtual_paths, str):
677 virtual_paths = [virtual_paths]
679 if virtual_paths is None:
680 if not isinstance(nuts, list):
681 nuts = list(nuts)
682 virtual_paths = set(nut.file_path for nut in nuts)
684 Selection.add(self, virtual_paths)
685 self.get_database().dig(nuts)
686 self._update_nuts()
688 def add_volatile(self, nuts):
689 if not isinstance(nuts, list):
690 nuts = list(nuts)
692 paths = list(set(nut.file_path for nut in nuts))
693 io.backends.virtual.add_nuts(nuts)
694 self.add_virtual(nuts, paths)
695 self._volatile_paths.extend(paths)
697 def add_volatile_waveforms(self, traces):
698 '''
699 Add in-memory waveforms which will be removed when the app closes.
700 '''
702 name = model.random_name()
704 path = 'virtual:volatile:%s' % name
706 nuts = []
707 for itr, tr in enumerate(traces):
708 assert tr.tmin <= tr.tmax
709 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
710 tmax_seconds, tmax_offset = model.tsplit(
711 tr.tmin + tr.data_len()*tr.deltat)
713 nuts.append(model.Nut(
714 file_path=path,
715 file_format='virtual',
716 file_segment=itr,
717 file_element=0,
718 file_mtime=0,
719 codes=tr.codes,
720 tmin_seconds=tmin_seconds,
721 tmin_offset=tmin_offset,
722 tmax_seconds=tmax_seconds,
723 tmax_offset=tmax_offset,
724 deltat=tr.deltat,
725 kind_id=to_kind_id('waveform'),
726 content=tr))
728 self.add_volatile(nuts)
729 return path
731 def _load(self, check):
732 for _ in io.iload(
733 self,
734 content=[],
735 skip_unchanged=True,
736 check=check):
737 pass
739 def _update_nuts(self, transaction=None):
740 transaction = transaction or self.transaction('update nuts')
741 with make_task('Aggregating selection') as task, \
742 transaction as cursor:
744 self._conn.set_progress_handler(task.update, 100000)
745 nrows = cursor.execute(self._sql(
746 '''
747 INSERT INTO %(db)s.%(nuts)s
748 SELECT NULL,
749 nuts.file_id, nuts.file_segment, nuts.file_element,
750 nuts.kind_id, nuts.kind_codes_id,
751 nuts.tmin_seconds, nuts.tmin_offset,
752 nuts.tmax_seconds, nuts.tmax_offset,
753 nuts.kscale
754 FROM %(db)s.%(file_states)s
755 INNER JOIN nuts
756 ON %(db)s.%(file_states)s.file_id == nuts.file_id
757 INNER JOIN kind_codes
758 ON nuts.kind_codes_id ==
759 kind_codes.kind_codes_id
760 WHERE %(db)s.%(file_states)s.file_state != 2
761 AND (((1 << kind_codes.kind_id)
762 & %(db)s.%(file_states)s.kind_mask) != 0)
763 ''')).rowcount
765 task.update(nrows)
766 self._set_file_states_known(transaction)
767 self._conn.set_progress_handler(None, 0)
769 def add_source(self, source, check=True):
770 '''
771 Add remote resource.
773 :param source:
774 Remote data access client instance.
775 :type source:
776 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
777 '''
779 self._sources.append(source)
780 source.setup(self, check=check)
782 def add_fdsn(self, *args, **kwargs):
783 '''
784 Add FDSN site for transparent remote data access.
786 Arguments are passed to
787 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
788 '''
790 self.add_source(fdsn.FDSNSource(*args, **kwargs))
792 def add_catalog(self, *args, **kwargs):
793 '''
794 Add online catalog for transparent event data access.
796 Arguments are passed to
797 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
798 '''
800 self.add_source(catalog.CatalogSource(*args, **kwargs))
802 def add_dataset(self, ds, check=True):
803 '''
804 Read dataset description from file and add its contents.
806 :param ds:
807 Path to dataset description file, dataset description object
808 or name of a built-in dataset. See
809 :py:mod:`~pyrocko.squirrel.dataset`.
810 :type ds:
811 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
813 :param check:
814 If ``True``, all file modification times are checked to see if
815 cached information has to be updated (slow). If ``False``, only
816 previously unknown files are indexed and cached information is used
817 for known files, regardless of file state (fast, corrresponds to
818 Squirrel's ``--optimistic`` mode). File deletions will go
819 undetected in the latter case.
820 :type check:
821 bool
822 '''
823 if isinstance(ds, str):
824 ds = dataset.read_dataset(ds)
826 ds.setup(self, check=check)
828 def _get_selection_args(
829 self, kind_id,
830 obj=None, tmin=None, tmax=None, time=None, codes=None):
832 if codes is not None:
833 codes = codes_patterns_for_kind(kind_id, codes)
835 if time is not None:
836 tmin = time
837 tmax = time
839 if obj is not None:
840 tmin = tmin if tmin is not None else obj.tmin
841 tmax = tmax if tmax is not None else obj.tmax
842 codes = codes if codes is not None else codes_patterns_for_kind(
843 kind_id, obj.codes)
845 return tmin, tmax, codes
847 def _get_selection_args_str(self, *args, **kwargs):
849 tmin, tmax, codes = self._get_selection_args(*args, **kwargs)
850 return 'tmin: %s, tmax: %s, codes: %s' % (
851 util.time_to_str(tmin) if tmin is not None else 'none',
852 util.time_to_str(tmax) if tmax is not None else 'none',
853 ','.join(str(entry) for entry in codes))
855 def _selection_args_to_kwargs(
856 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
858 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
860 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
862 tmin_seconds, tmin_offset = model.tsplit(tmin)
863 tmax_seconds, tmax_offset = model.tsplit(tmax)
864 if naiv:
865 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
866 args.append(tmax_seconds)
867 else:
868 tscale_edges = model.tscale_edges
869 tmin_cond = []
870 for kscale in range(tscale_edges.size + 1):
871 if kscale != tscale_edges.size:
872 tscale = int(tscale_edges[kscale])
873 tmin_cond.append('''
874 (%(db)s.%(nuts)s.kind_id = ?
875 AND %(db)s.%(nuts)s.kscale == ?
876 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
877 ''')
878 args.extend(
879 (to_kind_id(kind), kscale,
880 tmin_seconds - tscale - 1, tmax_seconds + 1))
882 else:
883 tmin_cond.append('''
884 (%(db)s.%(nuts)s.kind_id == ?
885 AND %(db)s.%(nuts)s.kscale == ?
886 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
887 ''')
889 args.extend(
890 (to_kind_id(kind), kscale, tmax_seconds + 1))
891 if tmin_cond:
892 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
894 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
895 args.append(tmin_seconds)
897 def _codes_match_sql(self, positive, kind_id, codes, cond, args):
898 pats = codes_patterns_for_kind(kind_id, codes)
899 if pats is None:
900 return
902 pats_exact = []
903 pats_nonexact = []
904 for pat in pats:
905 spat = pat.safe_str
906 (pats_exact if _is_exact(spat) else pats_nonexact).append(spat)
908 codes_cond = []
909 if pats_exact:
910 codes_cond.append(' ( kind_codes.codes IN ( %s ) ) ' % ', '.join(
911 '?'*len(pats_exact)))
913 args.extend(pats_exact)
915 if pats_nonexact:
916 codes_cond.append(' ( %s ) ' % ' OR '.join(
917 ('kind_codes.codes GLOB ?',) * len(pats_nonexact)))
919 args.extend(pats_nonexact)
921 if codes_cond:
922 cond.append('%s ( %s )' % (
923 'NOT' if not positive else '',
924 ' OR '.join(codes_cond)))
926 def iter_nuts(
927 self, kind=None, tmin=None, tmax=None, codes=None,
928 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
929 naiv=False, kind_codes_ids=None, path=None, limit=None):
931 '''
932 Iterate over content entities matching given constraints.
934 :param kind:
935 Content kind (or kinds) to extract.
936 :type kind:
937 :py:class:`str`, :py:class:`list` of :py:class:`str`
939 :param tmin:
940 Start time of query interval.
941 :type tmin:
942 :py:func:`~pyrocko.util.get_time_float`
944 :param tmax:
945 End time of query interval.
946 :type tmax:
947 :py:func:`~pyrocko.util.get_time_float`
949 :param codes:
950 List of code patterns to query.
951 :type codes:
952 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
953 objects appropriate for the queried content type, or anything which
954 can be converted to such objects.
956 :param naiv:
957 Bypass time span lookup through indices (slow, for testing).
958 :type naiv:
959 :py:class:`bool`
961 :param kind_codes_ids:
962 Kind-codes IDs of contents to be retrieved (internal use).
963 :type kind_codes_ids:
964 :py:class:`list` of :py:class:`int`
966 :yields:
967 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
968 intersecting content.
970 :complexity:
971 O(log N) for the time selection part due to heavy use of database
972 indices.
974 Query time span is treated as a half-open interval ``[tmin, tmax)``.
975 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
976 closed-interval so that content intersecting with the time instant ``t
977 = tmin = tmax`` is returned (otherwise nothing would be returned as
978 ``[t, t)`` never matches anything).
980 Time spans of content entities to be matched are also treated as half
981 open intervals, e.g. content span ``[0, 1)`` is matched by query span
982 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
983 modified to closed-interval when the content time span is an empty
984 interval, i.e. to indicate a time instant. E.g. time instant 0 is
985 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
986 '''
988 if not isinstance(kind, str):
989 if kind is None:
990 kind = model.g_content_kinds
991 for kind_ in kind:
992 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
993 yield nut
995 return
997 kind_id = to_kind_id(kind)
999 cond = []
1000 args = []
1001 if tmin is not None or tmax is not None:
1002 assert kind is not None
1003 if tmin is None:
1004 tmin = self.get_time_span()[0]
1005 if tmax is None:
1006 tmax = self.get_time_span()[1] + 1.0
1008 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
1010 cond.append('kind_codes.kind_id == ?')
1011 args.append(kind_id)
1013 if codes is not None:
1014 self._codes_match_sql(True, kind_id, codes, cond, args)
1016 if codes_exclude is not None:
1017 self._codes_match_sql(False, kind_id, codes_exclude, cond, args)
1019 if sample_rate_min is not None:
1020 cond.append('kind_codes.deltat <= ?')
1021 args.append(1.0/sample_rate_min)
1023 if sample_rate_max is not None:
1024 cond.append('? <= kind_codes.deltat')
1025 args.append(1.0/sample_rate_max)
1027 if kind_codes_ids is not None:
1028 cond.append(
1029 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
1030 '?'*len(kind_codes_ids)))
1032 args.extend(kind_codes_ids)
1034 db = self.get_database()
1035 if path is not None:
1036 cond.append('files.path == ?')
1037 args.append(db.relpath(abspath(path)))
1039 sql = ('''
1040 SELECT
1041 files.path,
1042 files.format,
1043 files.mtime,
1044 files.size,
1045 %(db)s.%(nuts)s.file_segment,
1046 %(db)s.%(nuts)s.file_element,
1047 kind_codes.kind_id,
1048 kind_codes.codes,
1049 %(db)s.%(nuts)s.tmin_seconds,
1050 %(db)s.%(nuts)s.tmin_offset,
1051 %(db)s.%(nuts)s.tmax_seconds,
1052 %(db)s.%(nuts)s.tmax_offset,
1053 kind_codes.deltat
1054 FROM files
1055 INNER JOIN %(db)s.%(nuts)s
1056 ON files.file_id == %(db)s.%(nuts)s.file_id
1057 INNER JOIN kind_codes
1058 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1059 ''')
1061 if cond:
1062 sql += ''' WHERE ''' + ' AND '.join(cond)
1064 if limit is not None:
1065 sql += ''' LIMIT %i''' % limit
1067 sql = self._sql(sql)
1068 if tmin is None and tmax is None:
1069 for row in self._conn.execute(sql, args):
1070 row = (db.abspath(row[0]),) + row[1:]
1071 nut = model.Nut(values_nocheck=row)
1072 yield nut
1073 else:
1074 assert tmin is not None and tmax is not None
1075 if tmin == tmax:
1076 for row in self._conn.execute(sql, args):
1077 row = (db.abspath(row[0]),) + row[1:]
1078 nut = model.Nut(values_nocheck=row)
1079 if (nut.tmin <= tmin < nut.tmax) \
1080 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1082 yield nut
1083 else:
1084 for row in self._conn.execute(sql, args):
1085 row = (db.abspath(row[0]),) + row[1:]
1086 nut = model.Nut(values_nocheck=row)
1087 if (tmin < nut.tmax and nut.tmin < tmax) \
1088 or (nut.tmin == nut.tmax
1089 and tmin <= nut.tmin < tmax):
1091 yield nut
1093 def get_nuts(self, *args, **kwargs):
1094 '''
1095 Get content entities matching given constraints.
1097 Like :py:meth:`iter_nuts` but returns results as a list.
1098 '''
1100 return list(self.iter_nuts(*args, **kwargs))
1102 def _split_nuts(
1103 self, kind, tmin=None, tmax=None, codes=None, path=None):
1105 kind_id = to_kind_id(kind)
1106 tmin_seconds, tmin_offset = model.tsplit(tmin)
1107 tmax_seconds, tmax_offset = model.tsplit(tmax)
1109 names_main_nuts = dict(self._names)
1110 names_main_nuts.update(db='main', nuts='nuts')
1112 db = self.get_database()
1114 def main_nuts(s):
1115 return s % names_main_nuts
1117 with self.transaction('split nuts') as cursor:
1118 # modify selection and main
1119 for sql_subst in [
1120 self._sql, main_nuts]:
1122 cond = []
1123 args = []
1125 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1127 if codes is not None:
1128 self._codes_match_sql(True, kind_id, codes, cond, args)
1130 if path is not None:
1131 cond.append('files.path == ?')
1132 args.append(db.relpath(abspath(path)))
1134 sql = sql_subst('''
1135 SELECT
1136 %(db)s.%(nuts)s.nut_id,
1137 %(db)s.%(nuts)s.tmin_seconds,
1138 %(db)s.%(nuts)s.tmin_offset,
1139 %(db)s.%(nuts)s.tmax_seconds,
1140 %(db)s.%(nuts)s.tmax_offset,
1141 kind_codes.deltat
1142 FROM files
1143 INNER JOIN %(db)s.%(nuts)s
1144 ON files.file_id == %(db)s.%(nuts)s.file_id
1145 INNER JOIN kind_codes
1146 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1147 WHERE ''' + ' AND '.join(cond)) # noqa
1149 insert = []
1150 delete = []
1151 for row in cursor.execute(sql, args):
1152 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1153 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1155 nut_tmin = model.tjoin(
1156 nut_tmin_seconds, nut_tmin_offset)
1157 nut_tmax = model.tjoin(
1158 nut_tmax_seconds, nut_tmax_offset)
1160 if nut_tmin < tmax and tmin < nut_tmax:
1161 if nut_tmin < tmin:
1162 insert.append((
1163 nut_tmin_seconds, nut_tmin_offset,
1164 tmin_seconds, tmin_offset,
1165 model.tscale_to_kscale(
1166 tmin_seconds - nut_tmin_seconds),
1167 nut_id))
1169 if tmax < nut_tmax:
1170 insert.append((
1171 tmax_seconds, tmax_offset,
1172 nut_tmax_seconds, nut_tmax_offset,
1173 model.tscale_to_kscale(
1174 nut_tmax_seconds - tmax_seconds),
1175 nut_id))
1177 delete.append((nut_id,))
1179 sql_add = '''
1180 INSERT INTO %(db)s.%(nuts)s (
1181 file_id, file_segment, file_element, kind_id,
1182 kind_codes_id, tmin_seconds, tmin_offset,
1183 tmax_seconds, tmax_offset, kscale )
1184 SELECT
1185 file_id, file_segment, file_element,
1186 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1187 FROM %(db)s.%(nuts)s
1188 WHERE nut_id == ?
1189 '''
1190 cursor.executemany(sql_subst(sql_add), insert)
1192 sql_delete = '''
1193 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1194 '''
1195 cursor.executemany(sql_subst(sql_delete), delete)
1197 def get_time_span(self, kinds=None, tight=True, dummy_limits=True):
1198 '''
1199 Get time interval over all content in selection.
1201 :param kinds:
1202 If not ``None``, restrict query to given content kinds.
1203 :type kind:
1204 list of str
1206 :complexity:
1207 O(1), independent of the number of nuts.
1209 :returns:
1210 ``(tmin, tmax)``, combined time interval of queried content kinds.
1211 '''
1213 sql_min = self._sql('''
1214 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1215 FROM %(db)s.%(nuts)s
1216 WHERE kind_id == ?
1217 AND tmin_seconds == (
1218 SELECT MIN(tmin_seconds)
1219 FROM %(db)s.%(nuts)s
1220 WHERE kind_id == ?)
1221 ''')
1223 sql_max = self._sql('''
1224 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1225 FROM %(db)s.%(nuts)s
1226 WHERE kind_id == ?
1227 AND tmax_seconds == (
1228 SELECT MAX(tmax_seconds)
1229 FROM %(db)s.%(nuts)s
1230 WHERE kind_id == ?)
1231 ''')
1233 gtmin = None
1234 gtmax = None
1236 if isinstance(kinds, str):
1237 kinds = [kinds]
1239 if kinds is None:
1240 kind_ids = model.g_content_kind_ids
1241 else:
1242 kind_ids = model.to_kind_ids(kinds)
1244 tmins = []
1245 tmaxs = []
1246 for kind_id in kind_ids:
1247 for tmin_seconds, tmin_offset in self._conn.execute(
1248 sql_min, (kind_id, kind_id)):
1249 tmins.append(model.tjoin(tmin_seconds, tmin_offset))
1251 for (tmax_seconds, tmax_offset) in self._conn.execute(
1252 sql_max, (kind_id, kind_id)):
1253 tmaxs.append(model.tjoin(tmax_seconds, tmax_offset))
1255 tmins = [tmin if tmin != model.g_tmin else None for tmin in tmins]
1256 tmaxs = [tmax if tmax != model.g_tmax else None for tmax in tmaxs]
1258 if tight:
1259 gtmin = nonef(min, tmins)
1260 gtmax = nonef(max, tmaxs)
1261 else:
1262 gtmin = None if None in tmins else nonef(min, tmins)
1263 gtmax = None if None in tmaxs else nonef(max, tmaxs)
1265 if dummy_limits:
1266 if gtmin is None:
1267 gtmin = model.g_tmin
1268 if gtmax is None:
1269 gtmax = model.g_tmax
1271 return gtmin, gtmax
1273 def has(self, kinds):
1274 '''
1275 Check availability of given content kinds.
1277 :param kinds:
1278 Content kinds to query.
1279 :type kind:
1280 list of str
1282 :returns:
1283 ``True`` if any of the queried content kinds is available
1284 in the selection.
1285 '''
1286 self_tmin, self_tmax = self.get_time_span(
1287 kinds, dummy_limits=False)
1289 return None not in (self_tmin, self_tmax)
1291 def get_deltat_span(self, kind):
1292 '''
1293 Get min and max sampling interval of all content of given kind.
1295 :param kind:
1296 Content kind
1297 :type kind:
1298 str
1300 :returns: ``(deltat_min, deltat_max)``
1301 '''
1303 deltats = [
1304 deltat for deltat in self.get_deltats(kind)
1305 if deltat is not None]
1307 if deltats:
1308 return min(deltats), max(deltats)
1309 else:
1310 return None, None
1312 def iter_kinds(self, codes=None):
1313 '''
1314 Iterate over content types available in selection.
1316 :param codes:
1317 If given, get kinds only for selected codes identifier.
1318 Only a single identifier may be given here and no pattern matching
1319 is done, currently.
1320 :type codes:
1321 :py:class:`~pyrocko.squirrel.model.Codes`
1323 :yields:
1324 Available content kinds as :py:class:`str`.
1326 :complexity:
1327 O(1), independent of number of nuts.
1328 '''
1330 return self._database._iter_kinds(
1331 codes=codes,
1332 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1334 def iter_deltats(self, kind=None):
1335 '''
1336 Iterate over sampling intervals available in selection.
1338 :param kind:
1339 If given, get sampling intervals only for a given content type.
1340 :type kind:
1341 str
1343 :yields:
1344 :py:class:`float` values.
1346 :complexity:
1347 O(1), independent of number of nuts.
1348 '''
1349 return self._database._iter_deltats(
1350 kind=kind,
1351 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1353 def iter_codes(self, kind=None):
1354 '''
1355 Iterate over content identifier code sequences available in selection.
1357 :param kind:
1358 If given, get codes only for a given content type.
1359 :type kind:
1360 str
1362 :yields:
1363 :py:class:`tuple` of :py:class:`str`
1365 :complexity:
1366 O(1), independent of number of nuts.
1367 '''
1368 return self._database._iter_codes(
1369 kind=kind,
1370 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1372 def _iter_codes_info(self, kind=None, codes=None):
1373 '''
1374 Iterate over number of occurrences of any (kind, codes) combination.
1376 :param kind:
1377 If given, get counts only for selected content type.
1378 :type kind:
1379 str
1381 :yields:
1382 Tuples of the form ``(kind, codes, deltat, kind_codes_id, count)``.
1384 :complexity:
1385 O(1), independent of number of nuts.
1386 '''
1387 return self._database._iter_codes_info(
1388 kind=kind,
1389 codes=codes,
1390 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1392 def get_kinds(self, codes=None):
1393 '''
1394 Get content types available in selection.
1396 :param codes:
1397 If given, get kinds only for selected codes identifier.
1398 Only a single identifier may be given here and no pattern matching
1399 is done, currently.
1400 :type codes:
1401 :py:class:`~pyrocko.squirrel.model.Codes`
1403 :returns:
1404 Sorted list of available content types.
1405 :rtype:
1406 py:class:`list` of :py:class:`str`
1408 :complexity:
1409 O(1), independent of number of nuts.
1411 '''
1412 return sorted(list(self.iter_kinds(codes=codes)))
1414 def get_deltats(self, kind=None):
1415 '''
1416 Get sampling intervals available in selection.
1418 :param kind:
1419 If given, get sampling intervals only for selected content type.
1420 :type kind:
1421 str
1423 :complexity:
1424 O(1), independent of number of nuts.
1426 :returns: Sorted list of available sampling intervals.
1427 '''
1428 return sorted(list(self.iter_deltats(kind=kind)))
1430 def get_codes(self, kind=None):
1431 '''
1432 Get identifier code sequences available in selection.
1434 :param kind:
1435 If given, get codes only for selected content type.
1436 :type kind:
1437 str
1439 :complexity:
1440 O(1), independent of number of nuts.
1442 :returns: Sorted list of available codes as tuples of strings.
1443 '''
1444 return sorted(list(self.iter_codes(kind=kind)))
1446 def get_counts(self, kind=None):
1447 '''
1448 Get number of occurrences of any (kind, codes) combination.
1450 :param kind:
1451 If given, get codes only for selected content type.
1452 :type kind:
1453 str
1455 :complexity:
1456 O(1), independent of number of nuts.
1458 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1459 if kind is not ``None``
1460 '''
1461 d = {}
1462 for kind_id, codes, _, _, count in self._iter_codes_info(kind=kind):
1463 if kind_id not in d:
1464 v = d[kind_id] = {}
1465 else:
1466 v = d[kind_id]
1468 if codes not in v:
1469 v[codes] = 0
1471 v[codes] += count
1473 if kind is not None:
1474 return d[to_kind_id(kind)]
1475 else:
1476 return dict((to_kind(kind_id), v) for (kind_id, v) in d.items())
1478 def glob_codes(self, kind, codes):
1479 '''
1480 Find codes matching given patterns.
1482 :param kind:
1483 Content kind to be queried.
1484 :type kind:
1485 str
1487 :param codes:
1488 List of code patterns to query.
1489 :type codes:
1490 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
1491 objects appropriate for the queried content type, or anything which
1492 can be converted to such objects.
1494 :returns:
1495 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1496 '''
1498 kind_id = to_kind_id(kind)
1499 args = [kind_id]
1500 pats = codes_patterns_for_kind(kind_id, codes)
1502 if pats:
1503 codes_cond = 'AND ( %s ) ' % ' OR '.join(
1504 ('kind_codes.codes GLOB ?',) * len(pats))
1506 args.extend(pat.safe_str for pat in pats)
1507 else:
1508 codes_cond = ''
1510 sql = self._sql('''
1511 SELECT kind_codes_id, codes, deltat FROM kind_codes
1512 WHERE
1513 kind_id == ? ''' + codes_cond)
1515 return list(map(list, self._conn.execute(sql, args)))
1517 def update(self, constraint=None, **kwargs):
1518 '''
1519 Update or partially update channel and event inventories.
1521 :param constraint:
1522 Selection of times or areas to be brought up to date.
1523 :type constraint:
1524 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1526 :param \\*\\*kwargs:
1527 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1529 This function triggers all attached remote sources, to check for
1530 updates in the meta-data. The sources will only submit queries when
1531 their expiration date has passed, or if the selection spans into
1532 previously unseen times or areas.
1533 '''
1535 if constraint is None:
1536 constraint = client.Constraint(**kwargs)
1538 task = make_task('Updating sources')
1539 for source in task(self._sources):
1540 source.update_channel_inventory(self, constraint)
1541 source.update_event_inventory(self, constraint)
1543 def update_waveform_promises(self, constraint=None, **kwargs):
1544 '''
1545 Permit downloading of remote waveforms.
1547 :param constraint:
1548 Remote waveforms compatible with the given constraint are enabled
1549 for download.
1550 :type constraint:
1551 :py:class:`~pyrocko.squirrel.client.base.Constraint`
1553 :param \\*\\*kwargs:
1554 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1556 Calling this method permits Squirrel to download waveforms from remote
1557 sources when processing subsequent waveform requests. This works by
1558 inserting so called waveform promises into the database. It will look
1559 into the available channels for each remote source and create a promise
1560 for each channel compatible with the given constraint. If the promise
1561 then matches in a waveform request, Squirrel tries to download the
1562 waveform. If the download is successful, the downloaded waveform is
1563 added to the Squirrel and the promise is deleted. If the download
1564 fails, the promise is kept if the reason of failure looks like being
1565 temporary, e.g. because of a network failure. If the cause of failure
1566 however seems to be permanent, the promise is deleted so that no
1567 further attempts are made to download a waveform which might not be
1568 available from that server at all. To force re-scheduling after a
1569 permanent failure, call :py:meth:`update_waveform_promises`
1570 yet another time.
1571 '''
1573 if constraint is None:
1574 constraint = client.Constraint(**kwargs)
1576 for source in self._sources:
1577 source.update_waveform_promises(self, constraint)
1579 def remove_waveform_promises(self, from_database='selection'):
1580 '''
1581 Remove waveform promises from live selection or global database.
1583 Calling this function removes all waveform promises provided by the
1584 attached sources.
1586 :param from_database:
1587 Remove from live selection ``'selection'`` or global database
1588 ``'global'``.
1589 '''
1590 for source in self._sources:
1591 source.remove_waveform_promises(self, from_database=from_database)
1593 def update_responses(self, constraint=None, **kwargs):
1594 if constraint is None:
1595 constraint = client.Constraint(**kwargs)
1597 for source in self._sources:
1598 source.update_response_inventory(self, constraint)
1600 def get_nfiles(self):
1601 '''
1602 Get number of files in selection.
1603 '''
1605 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1606 for row in self._conn.execute(sql):
1607 return row[0]
1609 def get_nnuts(self):
1610 '''
1611 Get number of nuts in selection.
1612 '''
1614 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1615 for row in self._conn.execute(sql):
1616 return row[0]
1618 def get_total_size(self):
1619 '''
1620 Get aggregated file size available in selection.
1621 '''
1623 sql = self._sql('''
1624 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1625 INNER JOIN files
1626 ON %(db)s.%(file_states)s.file_id = files.file_id
1627 ''')
1629 for row in self._conn.execute(sql):
1630 return row[0] or 0
1632 def get_stats(self):
1633 '''
1634 Get statistics on contents available through this selection.
1635 '''
1637 kinds = self.get_kinds()
1638 time_spans = {}
1639 for kind in kinds:
1640 time_spans[kind] = self.get_time_span([kind])
1642 return SquirrelStats(
1643 nfiles=self.get_nfiles(),
1644 nnuts=self.get_nnuts(),
1645 kinds=kinds,
1646 codes=self.get_codes(),
1647 total_size=self.get_total_size(),
1648 counts=self.get_counts(),
1649 time_spans=time_spans,
1650 sources=[s.describe() for s in self._sources],
1651 operators=[op.describe() for op in self._operators])
1653 @filldocs
1654 def check(
1655 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1656 ignore=[]):
1657 '''
1658 Check for common data/metadata problems.
1660 %(query_args)s
1662 :param ignore:
1663 Problem types to be ignored.
1664 :type ignore:
1665 :class:`list` of :class:`str`
1666 (:py:class:`~pyrocko.squirrel.check.SquirrelCheckProblemType`)
1668 :returns:
1669 :py:class:`~pyrocko.squirrel.check.SquirrelCheck` object
1670 containing the results of the check.
1672 See :py:func:`~pyrocko.squirrel.check.do_check`.
1673 '''
1675 from .check import do_check
1676 tmin, tmax, codes = self._get_selection_args(
1677 CHANNEL, obj, tmin, tmax, time, codes)
1679 return do_check(self, tmin=tmin, tmax=tmax, codes=codes, ignore=ignore)
1681 def get_content(
1682 self,
1683 nut,
1684 cache_id='default',
1685 accessor_id='default',
1686 show_progress=False,
1687 model='squirrel'):
1689 '''
1690 Get and possibly load full content for a given index entry from file.
1692 Loads the actual content objects (channel, station, waveform, ...) from
1693 file. For efficiency, sibling content (all stuff in the same file
1694 segment) will also be loaded as a side effect. The loaded contents are
1695 cached in the Squirrel object.
1696 '''
1698 content_cache = self._content_caches[cache_id]
1699 if not content_cache.has(nut):
1701 for nut_loaded in io.iload(
1702 nut.file_path,
1703 segment=nut.file_segment,
1704 format=nut.file_format,
1705 database=self._database,
1706 update_selection=self,
1707 show_progress=show_progress):
1709 content_cache.put(nut_loaded)
1711 try:
1712 return content_cache.get(nut, accessor_id, model)
1714 except KeyError:
1715 raise error.NotAvailable(
1716 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1718 def advance_accessor(self, accessor_id='default', cache_id=None):
1719 '''
1720 Notify memory caches about consumer moving to a new data batch.
1722 :param accessor_id:
1723 Name of accessing consumer to be advanced.
1724 :type accessor_id:
1725 str
1727 :param cache_id:
1728 Name of cache to for which the accessor should be advanced. By
1729 default the named accessor is advanced in all registered caches.
1730 By default, two caches named ``'default'`` and ``'waveform'`` are
1731 available.
1732 :type cache_id:
1733 str
1735 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1736 Squirrel's memory caching works and can be tuned. Default behaviour is
1737 to release data when it has not been used in the latest data
1738 window/batch. If the accessor is never advanced, data is cached
1739 indefinitely - which is often desired e.g. for station meta-data.
1740 Methods for consecutive data traversal, like
1741 :py:meth:`chopper_waveforms` automatically advance and clear
1742 their accessor.
1743 '''
1744 for cache_ in (
1745 self._content_caches.keys()
1746 if cache_id is None
1747 else [cache_id]):
1749 self._content_caches[cache_].advance_accessor(accessor_id)
1751 def clear_accessor(self, accessor_id, cache_id=None):
1752 '''
1753 Notify memory caches about a consumer having finished.
1755 :param accessor_id:
1756 Name of accessor to be cleared.
1757 :type accessor_id:
1758 str
1760 :param cache_id:
1761 Name of cache for which the accessor should be cleared. By default
1762 the named accessor is cleared from all registered caches. By
1763 default, two caches named ``'default'`` and ``'waveform'`` are
1764 available.
1765 :type cache_id:
1766 str
1768 Calling this method clears all references to cache entries held by the
1769 named accessor. Cache entries are then freed if not referenced by any
1770 other accessor.
1771 '''
1773 for cache_ in (
1774 self._content_caches.keys()
1775 if cache_id is None
1776 else [cache_id]):
1778 self._content_caches[cache_].clear_accessor(accessor_id)
1780 def get_cache_stats(self, cache_id):
1781 return self._content_caches[cache_id].get_stats()
1783 @filldocs
1784 def get_stations(
1785 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1786 model='squirrel', on_error='raise'):
1788 '''
1789 Get stations matching given constraints.
1791 %(query_args)s
1793 :param model:
1794 Select object model for returned values: ``'squirrel'`` to get
1795 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1796 objects with channel information attached.
1797 :type model:
1798 str
1800 :returns:
1801 List of :py:class:`pyrocko.squirrel.Station
1802 <pyrocko.squirrel.model.Station>` objects by default or list of
1803 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1804 objects if ``model='pyrocko'`` is requested.
1806 See :py:meth:`iter_nuts` for details on time span matching.
1807 '''
1809 if model == 'pyrocko':
1810 return self._get_pyrocko_stations(
1811 obj, tmin, tmax, time, codes, on_error=on_error)
1812 elif model in ('squirrel', 'stationxml', 'stationxml+'):
1813 args = self._get_selection_args(
1814 STATION, obj, tmin, tmax, time, codes)
1816 nuts = sorted(
1817 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1819 return [self.get_content(nut, model=model) for nut in nuts]
1820 else:
1821 raise ValueError('Invalid station model: %s' % model)
1823 @filldocs
1824 def get_channels(
1825 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1826 model='squirrel'):
1828 '''
1829 Get channels matching given constraints.
1831 %(query_args)s
1833 :returns:
1834 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1836 See :py:meth:`iter_nuts` for details on time span matching.
1837 '''
1839 args = self._get_selection_args(
1840 CHANNEL, obj, tmin, tmax, time, codes)
1842 nuts = sorted(
1843 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1845 return [self.get_content(nut, model=model) for nut in nuts]
1847 @filldocs
1848 def get_sensors(
1849 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1851 '''
1852 Get sensors matching given constraints.
1854 %(query_args)s
1856 :returns:
1857 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1859 See :py:meth:`iter_nuts` for details on time span matching.
1860 '''
1862 tmin, tmax, codes = self._get_selection_args(
1863 CHANNEL, obj, tmin, tmax, time, codes)
1865 if codes is not None:
1866 codes = codes_patterns_list(
1867 (entry.replace(channel=entry.channel[:-1] + '?')
1868 if entry.channel != '*' else entry)
1869 for entry in codes)
1871 nuts = sorted(
1872 self.iter_nuts(
1873 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1875 return [
1876 sensor for sensor in model.Sensor.from_channels(
1877 self.get_content(nut) for nut in nuts)
1878 if match_time_span(tmin, tmax, sensor)]
1880 @filldocs
1881 def get_responses(
1882 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1883 model='squirrel'):
1885 '''
1886 Get instrument responses matching given constraints.
1888 %(query_args)s
1890 :param model:
1891 Select data model for returned objects. Choices: ``'squirrel'``,
1892 ``'stationxml'``, ``'stationxml+'``. See return value description.
1893 :type model:
1894 str
1896 :returns:
1897 List of :py:class:`~pyrocko.squirrel.model.Response` if ``model ==
1898 'squirrel'`` or list of
1899 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`
1900 if ``model == 'stationxml'`` or list of
1901 (:py:class:`~pyrocko.squirrel.model.Response`,
1902 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1903 'stationxml+'``.
1905 See :py:meth:`iter_nuts` for details on time span matching.
1906 '''
1908 args = self._get_selection_args(
1909 RESPONSE, obj, tmin, tmax, time, codes)
1911 nuts = sorted(
1912 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1914 return [self.get_content(nut, model=model) for nut in nuts]
1916 @filldocs
1917 def get_response(
1918 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1919 model='squirrel', on_duplicate='raise'):
1921 '''
1922 Get instrument response matching given constraints.
1924 %(query_args)s
1926 :param model:
1927 Select data model for returned object. Choices: ``'squirrel'``,
1928 ``'stationxml'``, ``'stationxml+'``. See return value description.
1929 :type model:
1930 str
1932 :param on_duplicate:
1933 Determines how duplicates/multiple matching responses are handled.
1934 Choices: ``'raise'`` - raise
1935 :py:exc:`~pyrocko.squirrel.error.Duplicate`, ``'warn'`` - emit a
1936 warning and return first match, ``'ignore'`` - silently return
1937 first match.
1938 :type on_duplicate:
1939 str
1941 :returns:
1942 :py:class:`~pyrocko.squirrel.model.Response` if
1943 ``model == 'squirrel'`` or
1944 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` if ``model ==
1945 'stationxml'`` or
1946 (:py:class:`~pyrocko.squirrel.model.Response`,
1947 :py:class:`~pyrocko.io.stationxml.FDSNStationXML`) if ``model ==
1948 'stationxml+'``.
1950 Same as :py:meth:`get_responses` but returning exactly one response.
1951 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if none is
1952 available. Duplicates are handled according to the ``on_duplicate``
1953 argument.
1955 See :py:meth:`iter_nuts` for details on time span matching.
1956 '''
1958 if model == 'stationxml':
1959 model_ = 'stationxml+'
1960 else:
1961 model_ = model
1963 responses = self.get_responses(
1964 obj, tmin, tmax, time, codes, model=model_)
1965 if len(responses) == 0:
1966 raise error.NotAvailable(
1967 'No instrument response available (%s).'
1968 % self._get_selection_args_str(
1969 RESPONSE, obj, tmin, tmax, time, codes))
1971 elif len(responses) > 1:
1973 if on_duplicate in ('raise', 'warn'):
1974 if model_ == 'squirrel':
1975 resps_sq = responses
1976 elif model_ == 'stationxml+':
1977 resps_sq = [resp[0] for resp in responses]
1978 else:
1979 raise ValueError('Invalid response model: %s' % model)
1981 rinfo = ':\n' + '\n'.join(
1982 ' ' + resp.summary for resp in resps_sq)
1984 message = \
1985 'Multiple instrument responses matching given ' \
1986 'constraints (%s)%s%s' % (
1987 self._get_selection_args_str(
1988 RESPONSE, obj, tmin, tmax, time, codes),
1989 ' -> using first' if on_duplicate == 'warn' else '',
1990 rinfo)
1992 if on_duplicate == 'raise':
1993 raise error.Duplicate(message)
1995 elif on_duplicate == 'warn':
1996 logger.warning(message)
1998 elif on_duplicate == 'ignore':
1999 pass
2001 else:
2002 ValueError(
2003 'Invalid argument for on_duplicate: %s' % on_duplicate)
2005 if model == 'stationxml':
2006 return responses[0][1]
2007 else:
2008 return responses[0]
2010 @filldocs
2011 def get_events(
2012 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2014 '''
2015 Get events matching given constraints.
2017 %(query_args)s
2019 :returns:
2020 List of :py:class:`~pyrocko.model.event.Event` objects.
2022 See :py:meth:`iter_nuts` for details on time span matching.
2023 '''
2025 args = self._get_selection_args(EVENT, obj, tmin, tmax, time, codes)
2026 nuts = sorted(
2027 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
2029 return [self.get_content(nut) for nut in nuts]
2031 def _redeem_promises(self, *args, order_only=False):
2033 def split_promise(order, tmax=None):
2034 self._split_nuts(
2035 'waveform_promise',
2036 order.tmin, tmax if tmax is not None else order.tmax,
2037 codes=order.codes,
2038 path=order.source_id)
2040 tmin, tmax = args[:2]
2042 waveforms = list(self.iter_nuts('waveform', *args))
2043 promises = list(self.iter_nuts('waveform_promise', *args))
2045 codes_to_avail = defaultdict(list)
2046 for nut in waveforms:
2047 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax))
2049 def tts(x):
2050 if isinstance(x, tuple):
2051 return tuple(tts(e) for e in x)
2052 elif isinstance(x, list):
2053 return list(tts(e) for e in x)
2054 else:
2055 return util.time_to_str(x)
2057 now = time.time()
2058 orders = []
2059 for promise in promises:
2060 waveforms_avail = codes_to_avail[promise.codes]
2061 for block_tmin, block_tmax in blocks(
2062 max(tmin, promise.tmin),
2063 min(tmax, promise.tmax),
2064 promise.deltat):
2066 if block_tmin > now:
2067 continue
2069 orders.append(
2070 WaveformOrder(
2071 source_id=promise.file_path,
2072 codes=promise.codes,
2073 tmin=block_tmin,
2074 tmax=block_tmax,
2075 deltat=promise.deltat,
2076 gaps=gaps(waveforms_avail, block_tmin, block_tmax),
2077 time_created=now))
2079 orders_noop, orders = lpick(lambda order: order.gaps, orders)
2081 order_keys_noop = set(order_key(order) for order in orders_noop)
2082 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
2083 logger.info(
2084 'Waveform orders already satisified with cached/local data: '
2085 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
2087 for order in orders_noop:
2088 split_promise(order)
2090 if order_only:
2091 if orders:
2092 self._pending_orders.extend(orders)
2093 logger.info(
2094 'Enqueuing %i waveform order%s.'
2095 % len_plural(orders))
2096 return
2097 else:
2098 if self._pending_orders:
2099 orders.extend(self._pending_orders)
2100 logger.info(
2101 'Adding %i previously enqueued order%s.'
2102 % len_plural(self._pending_orders))
2104 self._pending_orders = []
2106 source_ids = []
2107 sources = {}
2108 for source in self._sources:
2109 if isinstance(source, fdsn.FDSNSource):
2110 source_ids.append(source._source_id)
2111 sources[source._source_id] = source
2113 source_priority = dict(
2114 (source_id, i) for (i, source_id) in enumerate(source_ids))
2116 order_groups = defaultdict(list)
2117 for order in orders:
2118 order_groups[order_key(order)].append(order)
2120 for k, order_group in order_groups.items():
2121 order_group.sort(
2122 key=lambda order: source_priority[order.source_id])
2124 n_order_groups = len(order_groups)
2126 if len(order_groups) != 0 or len(orders) != 0:
2127 logger.info(
2128 'Waveform orders standing for download: %i (%i)'
2129 % (len(order_groups), len(orders)))
2131 task = make_task('Waveform orders processed', n_order_groups)
2132 else:
2133 task = None
2135 def release_order_group(order):
2136 okey = order_key(order)
2137 for followup in order_groups[okey]:
2138 if followup is not order:
2139 split_promise(followup)
2141 del order_groups[okey]
2143 if task:
2144 task.update(n_order_groups - len(order_groups))
2146 def noop(order):
2147 pass
2149 def success(order, trs):
2150 release_order_group(order)
2151 if order.is_near_real_time():
2152 if not trs:
2153 return # keep promise when no data received at real time
2154 else:
2155 tmax = max(tr.tmax+tr.deltat for tr in trs)
2156 tmax = order.tmin \
2157 + round((tmax - order.tmin) / order.deltat) \
2158 * order.deltat
2159 split_promise(order, tmax)
2160 else:
2161 split_promise(order)
2163 def batch_add(paths):
2164 self.add(paths)
2166 calls = queue.Queue()
2168 def enqueue(f):
2169 def wrapper(*args):
2170 calls.put((f, args))
2172 return wrapper
2174 while order_groups:
2176 orders_now = []
2177 empty = []
2178 for k, order_group in order_groups.items():
2179 try:
2180 orders_now.append(order_group.pop(0))
2181 except IndexError:
2182 empty.append(k)
2184 for k in empty:
2185 del order_groups[k]
2187 by_source_id = defaultdict(list)
2188 for order in orders_now:
2189 by_source_id[order.source_id].append(order)
2191 threads = []
2192 for source_id in by_source_id:
2193 def download():
2194 try:
2195 sources[source_id].download_waveforms(
2196 by_source_id[source_id],
2197 success=enqueue(success),
2198 error_permanent=enqueue(split_promise),
2199 error_temporary=noop,
2200 batch_add=enqueue(batch_add))
2202 finally:
2203 calls.put(None)
2205 if len(by_source_id) > 1:
2206 thread = threading.Thread(target=download)
2207 thread.start()
2208 threads.append(thread)
2209 else:
2210 download()
2211 calls.put(None)
2213 ndone = 0
2214 while ndone < len(by_source_id):
2215 ret = calls.get()
2216 if ret is None:
2217 ndone += 1
2218 else:
2219 ret[0](*ret[1])
2221 for thread in threads:
2222 thread.join()
2224 if task:
2225 task.update(n_order_groups - len(order_groups))
2227 if task:
2228 task.done()
2230 @filldocs
2231 def get_waveform_nuts(
2232 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2233 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2234 order_only=False):
2236 '''
2237 Get waveform content entities matching given constraints.
2239 %(query_args)s
2241 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
2242 resolves matching waveform promises (downloads waveforms from remote
2243 sources).
2245 See :py:meth:`iter_nuts` for details on time span matching.
2246 '''
2248 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2250 if self.downloads_enabled:
2251 self._redeem_promises(
2252 *args,
2253 codes_exclude,
2254 sample_rate_min,
2255 sample_rate_max,
2256 order_only=order_only)
2258 nuts = sorted(
2259 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2261 return nuts
2263 @filldocs
2264 def have_waveforms(
2265 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2267 '''
2268 Check if any waveforms or waveform promises are available for given
2269 constraints.
2271 %(query_args)s
2272 '''
2274 args = self._get_selection_args(WAVEFORM, obj, tmin, tmax, time, codes)
2275 return bool(list(
2276 self.iter_nuts('waveform', *args, limit=1))) \
2277 or (self.downloads_enabled and bool(list(
2278 self.iter_nuts('waveform_promise', *args, limit=1))))
2280 @filldocs
2281 def get_waveforms(
2282 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2283 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2284 uncut=False, want_incomplete=True, degap=True,
2285 maxgap=5, maxlap=None, snap=None, include_last=False,
2286 load_data=True, accessor_id='default', operator_params=None,
2287 order_only=False, channel_priorities=None):
2289 '''
2290 Get waveforms matching given constraints.
2292 %(query_args)s
2294 :param sample_rate_min:
2295 Consider only waveforms with a sampling rate equal to or greater
2296 than the given value [Hz].
2297 :type sample_rate_min:
2298 float
2300 :param sample_rate_max:
2301 Consider only waveforms with a sampling rate equal to or less than
2302 the given value [Hz].
2303 :type sample_rate_max:
2304 float
2306 :param uncut:
2307 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2308 and to disable degapping/deoverlapping. Returns untouched traces as
2309 they are read from file segment. File segments are always read in
2310 their entirety.
2311 :type uncut:
2312 bool
2314 :param want_incomplete:
2315 If ``True``, gappy/incomplete traces are included in the result.
2316 :type want_incomplete:
2317 bool
2319 :param degap:
2320 If ``True``, connect traces and remove gaps and overlaps.
2321 :type degap:
2322 bool
2324 :param maxgap:
2325 Maximum gap size in samples which is filled with interpolated
2326 samples when ``degap`` is ``True``.
2327 :type maxgap:
2328 int
2330 :param maxlap:
2331 Maximum overlap size in samples which is removed when ``degap`` is
2332 ``True``.
2333 :type maxlap:
2334 int
2336 :param snap:
2337 Rounding functions used when computing sample index from time
2338 instance, for trace start and trace end, respectively. By default,
2339 ``(round, round)`` is used.
2340 :type snap:
2341 :py:class:`tuple` of 2 callables
2343 :param include_last:
2344 If ``True``, add one more sample to the returned traces (the sample
2345 which would be the first sample of a query with ``tmin`` set to the
2346 current value of ``tmax``).
2347 :type include_last:
2348 bool
2350 :param load_data:
2351 If ``True``, waveform data samples are read from files (or cache).
2352 If ``False``, meta-information-only traces are returned (dummy
2353 traces with no data samples).
2354 :type load_data:
2355 bool
2357 :param accessor_id:
2358 Name of consumer on who's behalf data is accessed. Used in cache
2359 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2360 to distinguish different points of extraction for the decision of
2361 when to release cached waveform data. Should be used when data is
2362 alternately extracted from more than one region / selection.
2363 :type accessor_id:
2364 str
2366 :param channel_priorities:
2367 List of band/instrument code combinations to try. For example,
2368 giving ``['HH', 'BH']`` would first try to get ``HH?`` channels and
2369 then fallback to ``BH?`` if these are not available. The first
2370 matching waveforms are returned. Use in combination with
2371 ``sample_rate_min`` and ``sample_rate_max`` to constrain the sample
2372 rate.
2373 :type channel_priorities:
2374 :py:class:`list` of :py:class:`str`
2376 See :py:meth:`iter_nuts` for details on time span matching.
2378 Loaded data is kept in memory (at least) until
2379 :py:meth:`clear_accessor` has been called or
2380 :py:meth:`advance_accessor` has been called two consecutive times
2381 without data being accessed between the two calls (by this accessor).
2382 Data may still be further kept in the memory cache if held alive by
2383 consumers with a different ``accessor_id``.
2384 '''
2386 tmin, tmax, codes = self._get_selection_args(
2387 WAVEFORM, obj, tmin, tmax, time, codes)
2389 if channel_priorities is not None:
2390 return self._get_waveforms_prioritized(
2391 tmin=tmin, tmax=tmax, codes=codes, codes_exclude=codes_exclude,
2392 sample_rate_min=sample_rate_min,
2393 sample_rate_max=sample_rate_max,
2394 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2395 maxgap=maxgap, maxlap=maxlap, snap=snap,
2396 include_last=include_last, load_data=load_data,
2397 accessor_id=accessor_id, operator_params=operator_params,
2398 order_only=order_only, channel_priorities=channel_priorities)
2400 kinds = ['waveform']
2401 if self.downloads_enabled:
2402 kinds.append('waveform_promise')
2404 self_tmin, self_tmax = self.get_time_span(kinds)
2406 if None in (self_tmin, self_tmax):
2407 logger.warning(
2408 'No waveforms available.')
2409 return []
2411 tmin = tmin if tmin is not None else self_tmin
2412 tmax = tmax if tmax is not None else self_tmax
2414 if codes is not None and len(codes) == 1:
2415 # TODO: fix for multiple / mixed codes
2416 operator = self.get_operator(codes[0])
2417 if operator is not None:
2418 return operator.get_waveforms(
2419 self, codes[0],
2420 tmin=tmin, tmax=tmax,
2421 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2422 maxgap=maxgap, maxlap=maxlap, snap=snap,
2423 include_last=include_last, load_data=load_data,
2424 accessor_id=accessor_id, params=operator_params)
2426 nuts = self.get_waveform_nuts(
2427 obj, tmin, tmax, time, codes, codes_exclude, sample_rate_min,
2428 sample_rate_max, order_only=order_only)
2430 if order_only:
2431 return []
2433 if load_data:
2434 traces = [
2435 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2437 else:
2438 traces = [
2439 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2441 if uncut:
2442 return traces
2444 if snap is None:
2445 snap = (round, round)
2447 chopped = []
2448 for tr in traces:
2449 if not load_data and tr.ydata is not None:
2450 tr = tr.copy(data=False)
2451 tr.ydata = None
2453 try:
2454 chopped.append(tr.chop(
2455 tmin, tmax,
2456 inplace=False,
2457 snap=snap,
2458 include_last=include_last))
2460 except trace.NoData:
2461 pass
2463 processed = self._process_chopped(
2464 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2466 return processed
2468 def _get_waveforms_prioritized(
2469 self, tmin=None, tmax=None, codes=None, codes_exclude=None,
2470 channel_priorities=None, **kwargs):
2472 trs_all = []
2473 codes_have = set()
2474 for channel in channel_priorities:
2475 assert len(channel) == 2
2476 if codes is not None:
2477 codes_now = [
2478 codes_.replace(channel=channel+'?') for codes_ in codes]
2479 else:
2480 codes_now = model.CodesNSLCE('*', '*', '*', channel+'?')
2482 codes_exclude_now = list(set(
2483 codes_.replace(channel=channel+codes_.channel[-1])
2484 for codes_ in codes_have))
2486 if codes_exclude:
2487 codes_exclude_now.extend(codes_exclude)
2489 trs = self.get_waveforms(
2490 tmin=tmin,
2491 tmax=tmax,
2492 codes=codes_now,
2493 codes_exclude=codes_exclude_now,
2494 **kwargs)
2496 codes_have.update(set(tr.codes for tr in trs))
2497 trs_all.extend(trs)
2499 return trs_all
2501 @filldocs
2502 def chopper_waveforms(
2503 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2504 codes_exclude=None, sample_rate_min=None, sample_rate_max=None,
2505 tinc=None, tpad=0.,
2506 want_incomplete=True, snap_window=False,
2507 degap=True, maxgap=5, maxlap=None,
2508 snap=None, include_last=False, load_data=True,
2509 accessor_id=None, clear_accessor=True, operator_params=None,
2510 grouping=None, channel_priorities=None):
2512 '''
2513 Iterate window-wise over waveform archive.
2515 %(query_args)s
2517 :param tinc:
2518 Time increment (window shift time) (default uses ``tmax-tmin``).
2519 :type tinc:
2520 :py:func:`~pyrocko.util.get_time_float`
2522 :param tpad:
2523 Padding time appended on either side of the data window (window
2524 overlap is ``2*tpad``).
2525 :type tpad:
2526 :py:func:`~pyrocko.util.get_time_float`
2528 :param want_incomplete:
2529 If ``True``, gappy/incomplete traces are included in the result.
2530 :type want_incomplete:
2531 bool
2533 :param snap_window:
2534 If ``True``, start time windows at multiples of tinc with respect
2535 to system time zero.
2536 :type snap_window:
2537 bool
2539 :param degap:
2540 If ``True``, connect traces and remove gaps and overlaps.
2541 :type degap:
2542 bool
2544 :param maxgap:
2545 Maximum gap size in samples which is filled with interpolated
2546 samples when ``degap`` is ``True``.
2547 :type maxgap:
2548 int
2550 :param maxlap:
2551 Maximum overlap size in samples which is removed when ``degap`` is
2552 ``True``.
2553 :type maxlap:
2554 int
2556 :param snap:
2557 Rounding functions used when computing sample index from time
2558 instance, for trace start and trace end, respectively. By default,
2559 ``(round, round)`` is used.
2560 :type snap:
2561 :py:class:`tuple` of 2 callables
2563 :param include_last:
2564 If ``True``, add one more sample to the returned traces (the sample
2565 which would be the first sample of a query with ``tmin`` set to the
2566 current value of ``tmax``).
2567 :type include_last:
2568 bool
2570 :param load_data:
2571 If ``True``, waveform data samples are read from files (or cache).
2572 If ``False``, meta-information-only traces are returned (dummy
2573 traces with no data samples).
2574 :type load_data:
2575 bool
2577 :param accessor_id:
2578 Name of consumer on who's behalf data is accessed. Used in cache
2579 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2580 to distinguish different points of extraction for the decision of
2581 when to release cached waveform data. Should be used when data is
2582 alternately extracted from more than one region / selection.
2583 :type accessor_id:
2584 str
2586 :param clear_accessor:
2587 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2588 chopper finishes. Set to ``False`` to keep loaded waveforms in
2589 memory when the generator returns.
2590 :type clear_accessor:
2591 bool
2593 :param grouping:
2594 By default, traversal over the data is over time and all matching
2595 traces of a time window are yielded. Using this option, it is
2596 possible to traverse the data first by group (e.g. station or
2597 network) and second by time. This can reduce the number of traces
2598 in each batch and thus reduce the memory footprint of the process.
2599 :type grouping:
2600 :py:class:`~pyrocko.squirrel.operators.base.Grouping`
2602 :yields:
2603 For each extracted time window or waveform group a
2604 :py:class:`Batch` object is yielded.
2606 See :py:meth:`iter_nuts` for details on time span matching.
2607 '''
2609 tmin, tmax, codes = self._get_selection_args(
2610 WAVEFORM, obj, tmin, tmax, time, codes)
2612 kinds = ['waveform']
2613 if self.downloads_enabled:
2614 kinds.append('waveform_promise')
2616 self_tmin, self_tmax = self.get_time_span(kinds)
2618 if None in (self_tmin, self_tmax):
2619 logger.warning(
2620 'Content has undefined time span. No waveforms and no '
2621 'waveform promises?')
2622 return
2624 if snap_window and tinc is not None:
2625 tmin = tmin if tmin is not None else self_tmin
2626 tmax = tmax if tmax is not None else self_tmax
2627 tmin = math.floor(tmin / tinc) * tinc
2628 tmax = math.ceil(tmax / tinc) * tinc
2629 else:
2630 tmin = tmin if tmin is not None else self_tmin + tpad
2631 tmax = tmax if tmax is not None else self_tmax - tpad
2633 if tinc is None:
2634 tinc = tmax - tmin
2635 nwin = 1
2636 elif tinc == 0.0:
2637 nwin = 1
2638 else:
2639 eps = 1e-6
2640 nwin = max(1, int((tmax - tmin) / tinc - eps) + 1)
2642 try:
2643 if accessor_id is None:
2644 accessor_id = 'chopper%i' % self._n_choppers_active
2646 self._n_choppers_active += 1
2648 if grouping is None:
2649 codes_list = [codes]
2650 else:
2651 operator = Operator(
2652 filtering=CodesPatternFiltering(codes=codes),
2653 grouping=grouping)
2655 available = set(self.get_codes(kind='waveform'))
2656 if self.downloads_enabled:
2657 available.update(self.get_codes(kind='waveform_promise'))
2658 operator.update_mappings(sorted(available))
2660 codes_list = [
2661 codes_patterns_list(scl)
2662 for scl in operator.iter_in_codes()]
2664 ngroups = len(codes_list)
2665 for igroup, scl in enumerate(codes_list):
2666 for iwin in range(nwin):
2667 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2669 chopped = self.get_waveforms(
2670 tmin=wmin-tpad,
2671 tmax=wmax+tpad,
2672 codes=scl,
2673 codes_exclude=codes_exclude,
2674 sample_rate_min=sample_rate_min,
2675 sample_rate_max=sample_rate_max,
2676 snap=snap,
2677 include_last=include_last,
2678 load_data=load_data,
2679 want_incomplete=want_incomplete,
2680 degap=degap,
2681 maxgap=maxgap,
2682 maxlap=maxlap,
2683 accessor_id=accessor_id,
2684 operator_params=operator_params,
2685 channel_priorities=channel_priorities)
2687 self.advance_accessor(accessor_id)
2689 yield Batch(
2690 tmin=wmin,
2691 tmax=wmax,
2692 tpad=tpad,
2693 i=iwin,
2694 n=nwin,
2695 igroup=igroup,
2696 ngroups=ngroups,
2697 traces=chopped)
2699 finally:
2700 self._n_choppers_active -= 1
2701 if clear_accessor:
2702 self.clear_accessor(accessor_id, 'waveform')
2704 def _process_chopped(
2705 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2707 chopped.sort(key=lambda a: a.full_id)
2708 if degap:
2709 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2711 if not want_incomplete:
2712 chopped_weeded = []
2713 for tr in chopped:
2714 emin = tr.tmin - tmin
2715 emax = tr.tmax + tr.deltat - tmax
2716 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2717 chopped_weeded.append(tr)
2719 elif degap:
2720 if (0. < emin <= 5. * tr.deltat
2721 and -5. * tr.deltat <= emax < 0.):
2723 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2724 chopped_weeded.append(tr)
2726 chopped = chopped_weeded
2728 return chopped
2730 def _get_pyrocko_stations(
2731 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2732 on_error='raise'):
2734 from pyrocko import model as pmodel
2736 if codes is not None:
2737 codes = codes_patterns_for_kind(STATION, codes)
2739 by_nsl = defaultdict(lambda: (list(), list()))
2740 for station in self.get_stations(obj, tmin, tmax, time, codes):
2741 sargs = station._get_pyrocko_station_args()
2742 by_nsl[station.codes.nsl][0].append(sargs)
2744 if codes is not None:
2745 codes = [model.CodesNSLCE(c) for c in codes]
2747 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2748 sargs = channel._get_pyrocko_station_args()
2749 sargs_list, channels_list = by_nsl[channel.codes.nsl]
2750 sargs_list.append(sargs)
2751 channels_list.append(channel)
2753 pstations = []
2754 nsls = list(by_nsl.keys())
2755 nsls.sort()
2756 for nsl in nsls:
2757 sargs_list, channels_list = by_nsl[nsl]
2758 sargs = util.consistency_merge(
2759 [('',) + x for x in sargs_list],
2760 error=on_error)
2762 by_c = defaultdict(list)
2763 for ch in channels_list:
2764 by_c[ch.codes.channel].append(ch._get_pyrocko_channel_args())
2766 chas = list(by_c.keys())
2767 chas.sort()
2768 pchannels = []
2769 for cha in chas:
2770 list_of_cargs = by_c[cha]
2771 cargs = util.consistency_merge(
2772 [('',) + x for x in list_of_cargs],
2773 error=on_error)
2774 pchannels.append(pmodel.Channel(*cargs))
2776 pstations.append(
2777 pmodel.Station(*sargs, channels=pchannels))
2779 return pstations
2781 @property
2782 def pile(self):
2784 '''
2785 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2787 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2788 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2789 but uses the fluffy power of the Squirrel under the hood.
2791 This interface can be used as a drop-in replacement for piles which are
2792 used in existing scripts and programs for efficient waveform data
2793 access. The Squirrel-based pile scales better for large datasets. Newer
2794 scripts should use Squirrel's native methods to avoid the emulation
2795 overhead.
2796 '''
2797 from . import pile
2799 if self._pile is None:
2800 self._pile = pile.Pile(self)
2802 return self._pile
2804 def snuffle(self, **kwargs):
2805 '''
2806 Look at dataset in Snuffler.
2807 '''
2808 self.pile.snuffle(**kwargs)
2810 def _gather_codes_keys(self, kind, gather, selector):
2811 return set(
2812 gather(codes)
2813 for codes in self.iter_codes(kind)
2814 if selector is None or selector(codes))
2816 def __str__(self):
2817 return str(self.get_stats())
2819 def get_coverage(
2820 self, kind, tmin=None, tmax=None, codes=None, limit=None):
2822 '''
2823 Get coverage information.
2825 Get information about strips of gapless data coverage.
2827 :param kind:
2828 Content kind to be queried.
2829 :type kind:
2830 str
2832 :param tmin:
2833 Start time of query interval.
2834 :type tmin:
2835 :py:func:`~pyrocko.util.get_time_float`
2837 :param tmax:
2838 End time of query interval.
2839 :type tmax:
2840 :py:func:`~pyrocko.util.get_time_float`
2842 :param codes:
2843 If given, restrict query to given content codes patterns.
2844 :type codes:
2845 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Codes`
2846 objects appropriate for the queried content type, or anything which
2847 can be converted to such objects.
2849 :param limit:
2850 Limit query to return only up to a given maximum number of entries
2851 per matching time series (without setting this option, very gappy
2852 data could cause the query to execute for a very long time).
2853 :type limit:
2854 int
2856 :returns:
2857 Information about time spans covered by the requested time series
2858 data.
2859 :rtype:
2860 :py:class:`list` of :py:class:`~pyrocko.squirrel.model.Coverage`
2861 '''
2863 tmin_seconds, tmin_offset = model.tsplit(tmin)
2864 tmax_seconds, tmax_offset = model.tsplit(tmax)
2865 kind_id = to_kind_id(kind)
2867 codes_info = list(self._iter_codes_info(kind=kind))
2869 kdata_all = []
2870 if codes is None:
2871 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2872 kdata_all.append(
2873 (codes_entry, kind_codes_id, codes_entry, deltat))
2875 else:
2876 for codes_entry in codes:
2877 pattern = to_codes(kind_id, codes_entry)
2878 for _, codes_entry, deltat, kind_codes_id, _ in codes_info:
2879 if model.match_codes(pattern, codes_entry):
2880 kdata_all.append(
2881 (pattern, kind_codes_id, codes_entry, deltat))
2883 kind_codes_ids = [x[1] for x in kdata_all]
2885 counts_at_tmin = {}
2886 if tmin is not None:
2887 for nut in self.iter_nuts(
2888 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2890 k = nut.codes, nut.deltat
2891 if k not in counts_at_tmin:
2892 counts_at_tmin[k] = 0
2894 counts_at_tmin[k] += 1
2896 coverages = []
2897 for pattern, kind_codes_id, codes_entry, deltat in kdata_all:
2898 entry = [pattern, codes_entry, deltat, None, None, []]
2899 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2900 sql = self._sql('''
2901 SELECT
2902 time_seconds,
2903 time_offset
2904 FROM %(db)s.%(coverage)s
2905 WHERE
2906 kind_codes_id == ?
2907 ORDER BY
2908 kind_codes_id ''' + order + ''',
2909 time_seconds ''' + order + ''',
2910 time_offset ''' + order + '''
2911 LIMIT 1
2912 ''')
2914 for row in self._conn.execute(sql, [kind_codes_id]):
2915 entry[3+i] = model.tjoin(row[0], row[1])
2917 if None in entry[3:5]:
2918 continue
2920 args = [kind_codes_id]
2922 sql_time = ''
2923 if tmin is not None:
2924 # intentionally < because (== tmin) is queried from nuts
2925 sql_time += ' AND ( ? < time_seconds ' \
2926 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2927 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2929 if tmax is not None:
2930 sql_time += ' AND ( time_seconds < ? ' \
2931 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2932 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2934 sql_limit = ''
2935 if limit is not None:
2936 sql_limit = ' LIMIT ?'
2937 args.append(limit)
2939 sql = self._sql('''
2940 SELECT
2941 time_seconds,
2942 time_offset,
2943 step
2944 FROM %(db)s.%(coverage)s
2945 WHERE
2946 kind_codes_id == ?
2947 ''' + sql_time + '''
2948 ORDER BY
2949 kind_codes_id,
2950 time_seconds,
2951 time_offset
2952 ''' + sql_limit)
2954 rows = list(self._conn.execute(sql, args))
2956 if limit is not None and len(rows) == limit:
2957 entry[-1] = None
2958 else:
2959 counts = counts_at_tmin.get((codes_entry, deltat), 0)
2960 tlast = None
2961 if tmin is not None:
2962 entry[-1].append((tmin, counts))
2963 tlast = tmin
2965 for row in rows:
2966 t = model.tjoin(row[0], row[1])
2967 counts += row[2]
2968 entry[-1].append((t, counts))
2969 tlast = t
2971 if tmax is not None and (tlast is None or tlast != tmax):
2972 entry[-1].append((tmax, counts))
2974 coverages.append(model.Coverage.from_values(entry + [kind_id]))
2976 return coverages
2978 def get_stationxml(
2979 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2980 level='response', on_error='raise'):
2982 '''
2983 Get station/channel/response metadata in StationXML representation.
2985 %(query_args)s
2987 :returns:
2988 :py:class:`~pyrocko.io.stationxml.FDSNStationXML` object.
2989 '''
2991 if level not in ('network', 'station', 'channel', 'response'):
2992 raise ValueError('Invalid level: %s' % level)
2994 tmin, tmax, codes = self._get_selection_args(
2995 CHANNEL, obj, tmin, tmax, time, codes)
2997 def tts(t):
2998 if t is None:
2999 return '<none>'
3000 else:
3001 return util.tts(t, format='%Y-%m-%d %H:%M:%S')
3003 if on_error == 'ignore':
3004 def handle_error(exc):
3005 pass
3007 elif on_error == 'warn':
3008 def handle_error(exc):
3009 logger.warning(str(exc))
3011 elif on_error == 'raise':
3012 def handle_error(exc):
3013 raise exc
3015 def use_first(node_type_name, codes, k, group):
3016 if on_error == 'warn':
3017 logger.warning(
3018 'Duplicates for %s %s, %s - %s -> using first' % (
3019 node_type_name,
3020 '.'.join(codes),
3021 tts(k[0]), tts(k[1])))
3023 return group[0]
3025 def deduplicate(node_type_name, codes, nodes):
3026 groups = defaultdict(list)
3027 for node in nodes:
3028 k = (node.start_date, node.end_date)
3029 groups[k].append(node)
3031 return [
3032 use_first(node_type_name, codes, k, group)
3033 for (k, group) in groups.items()]
3035 filtering = CodesPatternFiltering(codes=codes)
3037 nslcs = list(set(
3038 codes.nslc for codes in
3039 filtering.filter(self.get_codes(kind='channel'))))
3041 from pyrocko.io import stationxml as sx
3043 networks = []
3044 for net, stas in prefix_tree(nslcs):
3045 network = sx.Network(code=net)
3046 networks.append(network)
3048 if level not in ('station', 'channel', 'response'):
3049 continue
3051 for sta, locs in stas:
3052 stations = self.get_stations(
3053 tmin=tmin,
3054 tmax=tmax,
3055 codes=(net, sta, '*'),
3056 model='stationxml')
3058 if on_error != 'raise':
3059 stations = deduplicate(
3060 'Station', (net, sta), stations)
3062 errors = sx.check_overlaps(
3063 'Station', (net, sta), stations)
3065 if errors:
3066 handle_error(error.Duplicate(
3067 'Overlapping/duplicate station info:\n %s'
3068 % '\n '.join(errors)))
3070 network.station_list.extend(stations)
3072 if level not in ('channel', 'response'):
3073 continue
3075 for loc, chas in locs:
3076 for cha, _ in chas:
3077 channels = self.get_channels(
3078 tmin=tmin,
3079 tmax=tmax,
3080 codes=(net, sta, loc, cha),
3081 model='stationxml')
3083 if on_error != 'raise':
3084 channels = deduplicate(
3085 'Channel', (net, sta, loc, cha), channels)
3087 errors = sx.check_overlaps(
3088 'Channel', (net, sta, loc, cha), channels)
3090 if errors:
3091 handle_error(error.Duplicate(
3092 'Overlapping/duplicate channel info:\n %s'
3093 % '\n '.join(errors)))
3095 for channel in channels:
3096 station = sx.find_containing(stations, channel)
3097 if station is not None:
3098 station.channel_list.append(channel)
3099 else:
3100 handle_error(error.NotAvailable(
3101 'No station or station epoch found '
3102 'for channel: %s' % '.'.join(
3103 (net, sta, loc, cha))))
3105 continue
3107 if level != 'response':
3108 continue
3110 try:
3111 response_sq, response_sx = self.get_response(
3112 codes=(net, sta, loc, cha),
3113 tmin=channel.start_date,
3114 tmax=channel.end_date,
3115 model='stationxml+',
3116 on_duplicate=on_error)
3118 except error.NotAvailable as e:
3119 handle_error(e)
3120 continue
3122 if not (
3123 sx.eq_open(
3124 channel.start_date, response_sq.tmin)
3125 and sx.eq_open(
3126 channel.end_date, response_sq.tmax)):
3128 handle_error(error.Inconsistencies(
3129 'Response time span does not match '
3130 'channel time span: %s' % '.'.join(
3131 (net, sta, loc, cha))))
3133 channel.response = response_sx
3135 return sx.FDSNStationXML(
3136 source='Generated by Pyrocko Squirrel.',
3137 network_list=networks)
3139 def add_operator(self, op):
3140 self._operators.append(op)
3142 def update_operator_mappings(self):
3143 available = self.get_codes(kind=('channel'))
3145 for operator in self._operators:
3146 operator.update_mappings(available, self._operator_registry)
3148 def iter_operator_mappings(self):
3149 for operator in self._operators:
3150 for in_codes, out_codes in operator.iter_mappings():
3151 yield operator, in_codes, out_codes
3153 def get_operator_mappings(self):
3154 return list(self.iter_operator_mappings())
3156 def get_operator(self, codes):
3157 try:
3158 return self._operator_registry[codes][0]
3159 except KeyError:
3160 return None
3162 def get_operator_group(self, codes):
3163 try:
3164 return self._operator_registry[codes]
3165 except KeyError:
3166 return None, (None, None, None)
3168 def iter_operator_codes(self):
3169 for _, _, out_codes in self.iter_operator_mappings():
3170 for codes in out_codes:
3171 yield codes
3173 def get_operator_codes(self):
3174 return list(self.iter_operator_codes())
3176 def print_tables(self, table_names=None, stream=None):
3177 '''
3178 Dump raw database tables in textual form (for debugging purposes).
3180 :param table_names:
3181 Names of tables to be dumped or ``None`` to dump all.
3182 :type table_names:
3183 :py:class:`list` of :py:class:`str`
3185 :param stream:
3186 Open file or ``None`` to dump to standard output.
3187 '''
3189 if stream is None:
3190 stream = sys.stdout
3192 if isinstance(table_names, str):
3193 table_names = [table_names]
3195 if table_names is None:
3196 table_names = [
3197 'selection_file_states',
3198 'selection_nuts',
3199 'selection_kind_codes_count',
3200 'files', 'nuts', 'kind_codes', 'kind_codes_count']
3202 m = {
3203 'selection_file_states': '%(db)s.%(file_states)s',
3204 'selection_nuts': '%(db)s.%(nuts)s',
3205 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
3206 'files': 'files',
3207 'nuts': 'nuts',
3208 'kind_codes': 'kind_codes',
3209 'kind_codes_count': 'kind_codes_count'}
3211 for table_name in table_names:
3212 self._database.print_table(
3213 m[table_name] % self._names, stream=stream)
3216class SquirrelStats(Object):
3217 '''
3218 Container to hold statistics about contents available from a Squirrel.
3220 See also :py:meth:`Squirrel.get_stats`.
3221 '''
3223 nfiles = Int.T(
3224 help='Number of files in selection.')
3225 nnuts = Int.T(
3226 help='Number of index nuts in selection.')
3227 codes = List.T(
3228 Tuple.T(content_t=String.T()),
3229 help='Available code sequences in selection, e.g. '
3230 '(agency, network, station, location) for stations nuts.')
3231 kinds = List.T(
3232 String.T(),
3233 help='Available content types in selection.')
3234 total_size = Int.T(
3235 help='Aggregated file size of files is selection.')
3236 counts = Dict.T(
3237 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
3238 help='Breakdown of how many nuts of any content type and code '
3239 'sequence are available in selection, ``counts[kind][codes]``.')
3240 time_spans = Dict.T(
3241 String.T(), Tuple.T(content_t=Timestamp.T()),
3242 help='Time spans by content type.')
3243 sources = List.T(
3244 String.T(),
3245 help='Descriptions of attached sources.')
3246 operators = List.T(
3247 String.T(),
3248 help='Descriptions of attached operators.')
3250 def __str__(self):
3251 kind_counts = dict(
3252 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
3254 scodes = model.codes_to_str_abbreviated(self.codes)
3256 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
3257 ' ' + s for s in self.sources)
3259 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
3260 ' ' + s for s in self.operators)
3262 def stime(t):
3263 return util.tts(t) if t is not None and t not in (
3264 model.g_tmin, model.g_tmax) else '<none>'
3266 def stable(rows):
3267 ns = [max(len(w) for w in col) for col in zip(*rows)]
3268 return '\n'.join(
3269 ' '.join(w.ljust(n) for n, w in zip(ns, row))
3270 for row in rows)
3272 def indent(s):
3273 return '\n'.join(' '+line for line in s.splitlines())
3275 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
3276 kind + ':',
3277 str(kind_counts[kind]),
3278 stime(self.time_spans[kind][0]),
3279 '-',
3280 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
3282 s = '''
3283Number of files: %i
3284Total size of known files: %s
3285Number of index nuts: %i
3286Available content kinds: %s
3287Available codes: %s
3288Sources: %s
3289Operators: %s''' % (
3290 self.nfiles,
3291 util.human_bytesize(self.total_size),
3292 self.nnuts,
3293 stspans, scodes, ssources, soperators)
3295 return s.lstrip()
3298__all__ = [
3299 'Squirrel',
3300 'SquirrelStats',
3301]