1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, print_function
8import sys
9import os
11import math
12import logging
13import threading
14import queue
15from collections import defaultdict
17from pyrocko.guts import Object, Int, List, Tuple, String, Timestamp, Dict
18from pyrocko import util, trace
19from pyrocko.progress import progress
21from . import model, io, cache, dataset
23from .model import to_kind_id, separator, WaveformOrder
24from .client import fdsn, catalog
25from .selection import Selection, filldocs
26from .database import abspath
27from . import client, environment, error
29logger = logging.getLogger('psq.base')
31guts_prefix = 'squirrel'
34def make_task(*args):
35 return progress.task(*args, logger=logger)
38def lpick(condition, seq):
39 ft = [], []
40 for ele in seq:
41 ft[int(bool(condition(ele)))].append(ele)
43 return ft
46def codes_fill(n, codes):
47 return codes[:n] + ('*',) * (n-len(codes))
50c_kind_to_ncodes = {
51 'station': 4,
52 'channel': 6,
53 'response': 6,
54 'waveform': 6,
55 'event': 1,
56 'waveform_promise': 6,
57 'undefined': 1}
60c_inflated = ['', '*', '*', '*', '*', '*']
61c_offsets = [0, 2, 1, 1, 1, 1, 0]
64def codes_inflate(codes):
65 codes = codes[:6]
66 inflated = list(c_inflated)
67 ncodes = len(codes)
68 offset = c_offsets[ncodes]
69 inflated[offset:offset+ncodes] = codes
70 return inflated
73def codes_inflate2(codes):
74 inflated = list(c_inflated)
75 ncodes = len(codes)
76 inflated[:ncodes] = codes
77 return tuple(inflated)
80def codes_patterns_for_kind(kind, codes):
81 if not codes:
82 return []
84 if not isinstance(codes[0], str):
85 out = []
86 for subcodes in codes:
87 out.extend(codes_patterns_for_kind(kind, subcodes))
88 return out
90 if kind in ('event', 'undefined'):
91 return [codes]
93 cfill = codes_inflate(codes)[:c_kind_to_ncodes[kind]]
95 if kind == 'station':
96 cfill2 = list(cfill)
97 cfill2[3] = '[*]'
98 return [cfill, cfill2]
100 return [cfill]
103def group_channels(channels):
104 groups = defaultdict(list)
105 for channel in channels:
106 codes = channel.codes
107 gcodes = codes[:-1] + (codes[-1][:-1],)
108 groups[gcodes].append(channel)
110 return groups
113def pyrocko_station_from_channel_group(group, extra_args):
114 list_of_args = [channel._get_pyrocko_station_args() for channel in group]
115 args = util.consistency_merge(list_of_args + extra_args)
116 from pyrocko import model as pmodel
117 return pmodel.Station(
118 network=args[0],
119 station=args[1],
120 location=args[2],
121 lat=args[3],
122 lon=args[4],
123 elevation=args[5],
124 depth=args[6],
125 channels=[ch.get_pyrocko_channel() for ch in group])
128def blocks(tmin, tmax, deltat, nsamples_block=100000):
129 tblock = deltat * nsamples_block
130 iblock_min = int(math.floor(tmin / tblock))
131 iblock_max = int(math.ceil(tmax / tblock))
132 for iblock in range(iblock_min, iblock_max):
133 yield iblock * tblock, (iblock+1) * tblock
136def gaps(avail, tmin, tmax):
137 assert tmin < tmax
139 data = [(tmax, 1), (tmin, -1)]
140 for (tmin_a, tmax_a) in avail:
141 assert tmin_a < tmax_a
142 data.append((tmin_a, 1))
143 data.append((tmax_a, -1))
145 data.sort()
146 s = 1
147 gaps = []
148 tmin_g = None
149 for t, x in data:
150 if s == 1 and x == -1:
151 tmin_g = t
152 elif s == 0 and x == 1 and tmin_g is not None:
153 tmax_g = t
154 if tmin_g != tmax_g:
155 gaps.append((tmin_g, tmax_g))
157 s += x
159 return gaps
162def order_key(order):
163 return (order.codes, order.tmin, order.tmax)
166class Batch(object):
167 '''
168 Batch of waveforms from window wise data extraction.
170 Encapsulates state and results yielded for each window in window-wise
171 waveform extraction with the :py:meth:`Squirrel.chopper_waveforms` method.
173 *Attributes:*
175 .. py:attribute:: tmin
177 Start of this time window.
179 .. py:attribute:: tmax
181 End of this time window.
183 .. py:attribute:: i
185 Index of this time window in sequence.
187 .. py:attribute:: n
189 Total number of time windows in sequence.
191 .. py:attribute:: traces
193 Extracted waveforms for this time window.
194 '''
196 def __init__(self, tmin, tmax, i, n, traces):
197 self.tmin = tmin
198 self.tmax = tmax
199 self.i = i
200 self.n = n
201 self.traces = traces
204class Squirrel(Selection):
205 '''
206 Prompt, lazy, indexing, caching, dynamic seismological dataset access.
208 :param env:
209 Squirrel environment instance or directory path to use as starting
210 point for its detection. By default, the current directory is used as
211 starting point. When searching for a usable environment the directory
212 ``'.squirrel'`` or ``'squirrel'`` in the current (or starting point)
213 directory is used if it exists, otherwise the parent directories are
214 search upwards for the existence of such a directory. If no such
215 directory is found, the user's global Squirrel environment
216 ``'$HOME/.pyrocko/squirrel'`` is used.
217 :type env:
218 :py:class:`~pyrocko.squirrel.environment.Environment` or
219 :py:class:`str`
221 :param database:
222 Database instance or path to database. By default the
223 database found in the detected Squirrel environment is used.
224 :type database:
225 :py:class:`~pyrocko.squirrel.database.Database` or :py:class:`str`
227 :param cache_path:
228 Directory path to use for data caching. By default, the ``'cache'``
229 directory in the detected Squirrel environment is used.
230 :type cache_path:
231 :py:class:`str`
233 :param persistent:
234 If given a name, create a persistent selection.
235 :type persistent:
236 :py:class:`str`
238 This is the central class of the Squirrel framework. It provides a unified
239 interface to query and access seismic waveforms, station meta-data and
240 event information from local file collections and remote data sources. For
241 prompt responses, a profound database setup is used under the hood. To
242 speed up assemblage of ad-hoc data selections, files are indexed on first
243 use and the extracted meta-data is remembered in the database for
244 subsequent accesses. Bulk data is lazily loaded from disk and remote
245 sources, just when requested. Once loaded, data is cached in memory to
246 expedite typical access patterns. Files and data sources can be dynamically
247 added to and removed from the Squirrel selection at runtime.
249 Queries are restricted to the contents of the files currently added to the
250 Squirrel selection (usually a subset of the file meta-information
251 collection in the database). This list of files is referred to here as the
252 "selection". By default, temporary tables are created in the attached
253 database to hold the names of the files in the selection as well as various
254 indices and counters. These tables are only visible inside the application
255 which created them and are deleted when the database connection is closed
256 or the application exits. To create a selection which is not deleted at
257 exit, supply a name to the ``persistent`` argument of the Squirrel
258 constructor. Persistent selections are shared among applications using the
259 same database.
261 **Method summary**
263 Some of the methods are implemented in :py:class:`Squirrel`'s base class
264 :py:class:`~pyrocko.squirrel.selection.Selection`.
266 .. autosummary::
268 ~Squirrel.add
269 ~Squirrel.add_source
270 ~Squirrel.add_fdsn
271 ~Squirrel.add_catalog
272 ~Squirrel.add_dataset
273 ~Squirrel.add_virtual
274 ~Squirrel.update
275 ~Squirrel.update_waveform_promises
276 ~Squirrel.advance_accessor
277 ~Squirrel.clear_accessor
278 ~Squirrel.reload
279 ~pyrocko.squirrel.selection.Selection.iter_paths
280 ~Squirrel.iter_nuts
281 ~Squirrel.iter_kinds
282 ~Squirrel.iter_deltats
283 ~Squirrel.iter_codes
284 ~Squirrel.iter_counts
285 ~pyrocko.squirrel.selection.Selection.get_paths
286 ~Squirrel.get_nuts
287 ~Squirrel.get_kinds
288 ~Squirrel.get_deltats
289 ~Squirrel.get_codes
290 ~Squirrel.get_counts
291 ~Squirrel.get_time_span
292 ~Squirrel.get_deltat_span
293 ~Squirrel.get_nfiles
294 ~Squirrel.get_nnuts
295 ~Squirrel.get_total_size
296 ~Squirrel.get_stats
297 ~Squirrel.get_content
298 ~Squirrel.get_stations
299 ~Squirrel.get_channels
300 ~Squirrel.get_responses
301 ~Squirrel.get_events
302 ~Squirrel.get_waveform_nuts
303 ~Squirrel.get_waveforms
304 ~Squirrel.chopper_waveforms
305 ~Squirrel.get_coverage
306 ~Squirrel.pile
307 ~Squirrel.snuffle
308 ~Squirrel.glob_codes
309 ~pyrocko.squirrel.selection.Selection.get_database
310 ~Squirrel.print_tables
311 '''
313 def __init__(
314 self, env=None, database=None, cache_path=None, persistent=None):
316 if not isinstance(env, environment.Environment):
317 env = environment.get_environment(env)
319 if database is None:
320 database = env.expand_path(env.database_path)
322 if cache_path is None:
323 cache_path = env.expand_path(env.cache_path)
325 if persistent is None:
326 persistent = env.persistent
328 Selection.__init__(
329 self, database=database, persistent=persistent)
331 self.get_database().set_basepath(os.path.dirname(env.get_basepath()))
333 self._content_caches = {
334 'waveform': cache.ContentCache(),
335 'default': cache.ContentCache()}
337 self._cache_path = cache_path
339 self._sources = []
340 self._operators = []
341 self._operator_registry = {}
343 self._pile = None
344 self._n_choppers_active = 0
346 self._names.update({
347 'nuts': self.name + '_nuts',
348 'kind_codes_count': self.name + '_kind_codes_count',
349 'coverage': self.name + '_coverage'})
351 with self.transaction() as cursor:
352 self._create_tables_squirrel(cursor)
354 def _create_tables_squirrel(self, cursor):
356 cursor.execute(self._register_table(self._sql(
357 '''
358 CREATE TABLE IF NOT EXISTS %(db)s.%(nuts)s (
359 nut_id integer PRIMARY KEY,
360 file_id integer,
361 file_segment integer,
362 file_element integer,
363 kind_id integer,
364 kind_codes_id integer,
365 tmin_seconds integer,
366 tmin_offset integer,
367 tmax_seconds integer,
368 tmax_offset integer,
369 kscale integer)
370 ''')))
372 cursor.execute(self._register_table(self._sql(
373 '''
374 CREATE TABLE IF NOT EXISTS %(db)s.%(kind_codes_count)s (
375 kind_codes_id integer PRIMARY KEY,
376 count integer)
377 ''')))
379 cursor.execute(self._sql(
380 '''
381 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(nuts)s_file_element
382 ON %(nuts)s (file_id, file_segment, file_element)
383 '''))
385 cursor.execute(self._sql(
386 '''
387 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_file_id
388 ON %(nuts)s (file_id)
389 '''))
391 cursor.execute(self._sql(
392 '''
393 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmin_seconds
394 ON %(nuts)s (kind_id, tmin_seconds)
395 '''))
397 cursor.execute(self._sql(
398 '''
399 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_tmax_seconds
400 ON %(nuts)s (kind_id, tmax_seconds)
401 '''))
403 cursor.execute(self._sql(
404 '''
405 CREATE INDEX IF NOT EXISTS %(db)s.%(nuts)s_index_kscale
406 ON %(nuts)s (kind_id, kscale, tmin_seconds)
407 '''))
409 cursor.execute(self._sql(
410 '''
411 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts
412 BEFORE DELETE ON main.files FOR EACH ROW
413 BEGIN
414 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
415 END
416 '''))
418 # trigger only on size to make silent update of mtime possible
419 cursor.execute(self._sql(
420 '''
421 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_delete_nuts2
422 BEFORE UPDATE OF size ON main.files FOR EACH ROW
423 BEGIN
424 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
425 END
426 '''))
428 cursor.execute(self._sql(
429 '''
430 CREATE TRIGGER IF NOT EXISTS
431 %(db)s.%(file_states)s_delete_files
432 BEFORE DELETE ON %(db)s.%(file_states)s FOR EACH ROW
433 BEGIN
434 DELETE FROM %(nuts)s WHERE file_id == old.file_id;
435 END
436 '''))
438 cursor.execute(self._sql(
439 '''
440 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_inc_kind_codes
441 BEFORE INSERT ON %(nuts)s FOR EACH ROW
442 BEGIN
443 INSERT OR IGNORE INTO %(kind_codes_count)s VALUES
444 (new.kind_codes_id, 0);
445 UPDATE %(kind_codes_count)s
446 SET count = count + 1
447 WHERE new.kind_codes_id
448 == %(kind_codes_count)s.kind_codes_id;
449 END
450 '''))
452 cursor.execute(self._sql(
453 '''
454 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_dec_kind_codes
455 BEFORE DELETE ON %(nuts)s FOR EACH ROW
456 BEGIN
457 UPDATE %(kind_codes_count)s
458 SET count = count - 1
459 WHERE old.kind_codes_id
460 == %(kind_codes_count)s.kind_codes_id;
461 END
462 '''))
464 cursor.execute(self._register_table(self._sql(
465 '''
466 CREATE TABLE IF NOT EXISTS %(db)s.%(coverage)s (
467 kind_codes_id integer,
468 time_seconds integer,
469 time_offset integer,
470 step integer)
471 ''')))
473 cursor.execute(self._sql(
474 '''
475 CREATE UNIQUE INDEX IF NOT EXISTS %(db)s.%(coverage)s_time
476 ON %(coverage)s (kind_codes_id, time_seconds, time_offset)
477 '''))
479 cursor.execute(self._sql(
480 '''
481 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_add_coverage
482 AFTER INSERT ON %(nuts)s FOR EACH ROW
483 BEGIN
484 INSERT OR IGNORE INTO %(coverage)s VALUES
485 (new.kind_codes_id, new.tmin_seconds, new.tmin_offset, 0)
486 ;
487 UPDATE %(coverage)s
488 SET step = step + 1
489 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
490 AND new.tmin_seconds == %(coverage)s.time_seconds
491 AND new.tmin_offset == %(coverage)s.time_offset
492 ;
493 INSERT OR IGNORE INTO %(coverage)s VALUES
494 (new.kind_codes_id, new.tmax_seconds, new.tmax_offset, 0)
495 ;
496 UPDATE %(coverage)s
497 SET step = step - 1
498 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
499 AND new.tmax_seconds == %(coverage)s.time_seconds
500 AND new.tmax_offset == %(coverage)s.time_offset
501 ;
502 DELETE FROM %(coverage)s
503 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
504 AND new.tmin_seconds == %(coverage)s.time_seconds
505 AND new.tmin_offset == %(coverage)s.time_offset
506 AND step == 0
507 ;
508 DELETE FROM %(coverage)s
509 WHERE new.kind_codes_id == %(coverage)s.kind_codes_id
510 AND new.tmax_seconds == %(coverage)s.time_seconds
511 AND new.tmax_offset == %(coverage)s.time_offset
512 AND step == 0
513 ;
514 END
515 '''))
517 cursor.execute(self._sql(
518 '''
519 CREATE TRIGGER IF NOT EXISTS %(db)s.%(nuts)s_remove_coverage
520 BEFORE DELETE ON %(nuts)s FOR EACH ROW
521 BEGIN
522 INSERT OR IGNORE INTO %(coverage)s VALUES
523 (old.kind_codes_id, old.tmin_seconds, old.tmin_offset, 0)
524 ;
525 UPDATE %(coverage)s
526 SET step = step - 1
527 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
528 AND old.tmin_seconds == %(coverage)s.time_seconds
529 AND old.tmin_offset == %(coverage)s.time_offset
530 ;
531 INSERT OR IGNORE INTO %(coverage)s VALUES
532 (old.kind_codes_id, old.tmax_seconds, old.tmax_offset, 0)
533 ;
534 UPDATE %(coverage)s
535 SET step = step + 1
536 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
537 AND old.tmax_seconds == %(coverage)s.time_seconds
538 AND old.tmax_offset == %(coverage)s.time_offset
539 ;
540 DELETE FROM %(coverage)s
541 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
542 AND old.tmin_seconds == %(coverage)s.time_seconds
543 AND old.tmin_offset == %(coverage)s.time_offset
544 AND step == 0
545 ;
546 DELETE FROM %(coverage)s
547 WHERE old.kind_codes_id == %(coverage)s.kind_codes_id
548 AND old.tmax_seconds == %(coverage)s.time_seconds
549 AND old.tmax_offset == %(coverage)s.time_offset
550 AND step == 0
551 ;
552 END
553 '''))
555 def _delete(self):
556 '''Delete database tables associated with this Squirrel.'''
558 for s in '''
559 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts;
560 DROP TRIGGER %(db)s.%(nuts)s_delete_nuts2;
561 DROP TRIGGER %(db)s.%(file_states)s_delete_files;
562 DROP TRIGGER %(db)s.%(nuts)s_inc_kind_codes;
563 DROP TRIGGER %(db)s.%(nuts)s_dec_kind_codes;
564 DROP TABLE %(db)s.%(nuts)s;
565 DROP TABLE %(db)s.%(kind_codes_count)s;
566 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_add_coverage;
567 DROP TRIGGER IF EXISTS %(db)s.%(nuts)s_remove_coverage;
568 DROP TABLE IF EXISTS %(db)s.%(coverage)s;
569 '''.strip().splitlines():
571 self._conn.execute(self._sql(s))
573 Selection._delete(self)
575 @filldocs
576 def add(self,
577 paths,
578 kinds=None,
579 format='detect',
580 regex=None,
581 check=True,
582 progress_viewer='terminal'):
584 '''
585 Add files to the selection.
587 :param paths:
588 Iterator yielding paths to files or directories to be added to the
589 selection. Recurses into directories. If given a ``str``, it
590 is treated as a single path to be added.
591 :type paths:
592 :py:class:`list` of :py:class:`str`
594 :param kinds:
595 Content types to be made available through the Squirrel selection.
596 By default, all known content types are accepted.
597 :type kinds:
598 :py:class:`list` of :py:class:`str`
600 :param format:
601 File format identifier or ``'detect'`` to enable auto-detection
602 (available: %(file_formats)s).
603 :type format:
604 str
606 :param regex:
607 If not ``None``, files are only included if their paths match the
608 given regular expression pattern.
609 :type format:
610 str
612 :param check:
613 If ``True``, all file modification times are checked to see if
614 cached information has to be updated (slow). If ``False``, only
615 previously unknown files are indexed and cached information is used
616 for known files, regardless of file state (fast, corrresponds to
617 Squirrel's ``--optimistic`` mode). File deletions will go
618 undetected in the latter case.
619 :type check:
620 bool
622 :Complexity:
623 O(log N)
624 '''
626 if isinstance(kinds, str):
627 kinds = (kinds,)
629 if isinstance(paths, str):
630 paths = [paths]
632 kind_mask = model.to_kind_mask(kinds)
634 with progress.view(progress_viewer):
635 Selection.add(
636 self, util.iter_select_files(
637 paths,
638 show_progress=False,
639 regex=regex,
640 pass_through=lambda path: path.startswith('virtual:')
641 ), kind_mask, format)
643 self._load(check)
644 self._update_nuts()
646 def reload(self):
647 '''
648 Check for modifications and reindex modified files.
650 Based on file modification times.
651 '''
653 self._set_file_states_force_check()
654 self._load(check=True)
655 self._update_nuts()
657 def add_virtual(self, nuts, virtual_paths=None):
658 '''
659 Add content which is not backed by files.
661 :param nuts:
662 Content pieces to be added.
663 :type nuts:
664 iterator yielding :py:class:`~pyrocko.squirrel.model.Nut` objects
666 :param virtual_paths:
667 List of virtual paths to prevent creating a temporary list of the
668 nuts while aggregating the file paths for the selection.
669 :type virtual_paths:
670 :py:class:`list` of :py:class:`str`
672 Stores to the main database and the selection.
673 '''
675 if isinstance(virtual_paths, str):
676 virtual_paths = [virtual_paths]
678 if virtual_paths is None:
679 if not isinstance(nuts, list):
680 nuts = list(nuts)
681 virtual_paths = set(nut.file_path for nut in nuts)
683 Selection.add(self, virtual_paths)
684 self.get_database().dig(nuts)
685 self._update_nuts()
687 def add_volatile(self, nuts):
688 if not isinstance(nuts, list):
689 nuts = list(nuts)
691 paths = list(set(nut.file_path for nut in nuts))
692 io.backends.virtual.add_nuts(nuts)
693 self.add_virtual(nuts, paths)
694 self._volatile_paths.extend(paths)
696 def add_volatile_waveforms(self, traces):
697 '''
698 Add in-memory waveforms which will be removed when the app closes.
699 '''
701 name = model.random_name()
703 path = 'virtual:volatile:%s' % name
705 nuts = []
706 for itr, tr in enumerate(traces):
707 assert tr.tmin <= tr.tmax
708 tmin_seconds, tmin_offset = model.tsplit(tr.tmin)
709 tmax_seconds, tmax_offset = model.tsplit(tr.tmax)
710 nuts.append(model.Nut(
711 file_path=path,
712 file_format='virtual',
713 file_segment=itr,
714 file_element=0,
715 codes=separator.join(tr.codes),
716 tmin_seconds=tmin_seconds,
717 tmin_offset=tmin_offset,
718 tmax_seconds=tmax_seconds,
719 tmax_offset=tmax_offset,
720 deltat=tr.deltat,
721 kind_id=to_kind_id('waveform'),
722 content=tr))
724 self.add_volatile(nuts)
725 return path
727 def _load(self, check):
728 for _ in io.iload(
729 self,
730 content=[],
731 skip_unchanged=True,
732 check=check):
733 pass
735 def _update_nuts(self):
736 transaction = self.transaction()
737 with make_task('Aggregating selection') as task, \
738 transaction as cursor:
740 self._conn.set_progress_handler(task.update, 100000)
741 nrows = cursor.execute(self._sql(
742 '''
743 INSERT INTO %(db)s.%(nuts)s
744 SELECT NULL,
745 nuts.file_id, nuts.file_segment, nuts.file_element,
746 nuts.kind_id, nuts.kind_codes_id,
747 nuts.tmin_seconds, nuts.tmin_offset,
748 nuts.tmax_seconds, nuts.tmax_offset,
749 nuts.kscale
750 FROM %(db)s.%(file_states)s
751 INNER JOIN nuts
752 ON %(db)s.%(file_states)s.file_id == nuts.file_id
753 INNER JOIN kind_codes
754 ON nuts.kind_codes_id ==
755 kind_codes.kind_codes_id
756 WHERE %(db)s.%(file_states)s.file_state != 2
757 AND (((1 << kind_codes.kind_id)
758 & %(db)s.%(file_states)s.kind_mask) != 0)
759 ''')).rowcount
761 task.update(nrows)
762 self._set_file_states_known(transaction)
763 self._conn.set_progress_handler(None, 0)
765 def add_source(self, source, check=True, progress_viewer='terminal'):
766 '''
767 Add remote resource.
769 :param source:
770 Remote data access client instance.
771 :type source:
772 subclass of :py:class:`~pyrocko.squirrel.client.base.Source`
773 '''
775 self._sources.append(source)
776 source.setup(self, check=check, progress_viewer=progress_viewer)
778 def add_fdsn(self, *args, **kwargs):
779 '''
780 Add FDSN site for transparent remote data access.
782 Arguments are passed to
783 :py:class:`~pyrocko.squirrel.client.fdsn.FDSNSource`.
784 '''
786 self.add_source(fdsn.FDSNSource(*args, **kwargs))
788 def add_catalog(self, *args, **kwargs):
789 '''
790 Add online catalog for transparent event data access.
792 Arguments are passed to
793 :py:class:`~pyrocko.squirrel.client.catalog.CatalogSource`.
794 '''
796 self.add_source(catalog.CatalogSource(*args, **kwargs))
798 def add_dataset(
799 self, ds, check=True, progress_viewer='terminal',
800 warn_persistent=True):
802 '''
803 Read dataset description from file and add its contents.
805 :param ds:
806 Path to dataset description file or dataset description object
807 . See :py:mod:`~pyrocko.squirrel.dataset`.
808 :type ds:
809 :py:class:`str` or :py:class:`~pyrocko.squirrel.dataset.Dataset`
811 :param check:
812 If ``True``, all file modification times are checked to see if
813 cached information has to be updated (slow). If ``False``, only
814 previously unknown files are indexed and cached information is used
815 for known files, regardless of file state (fast, corrresponds to
816 Squirrel's ``--optimistic`` mode). File deletions will go
817 undetected in the latter case.
818 :type check:
819 bool
820 '''
821 if isinstance(ds, str):
822 ds = dataset.read_dataset(ds)
823 path = ds
824 else:
825 path = None
827 if warn_persistent and ds.persistent and (
828 not self._persistent or (self._persistent != ds.persistent)):
830 logger.warning(
831 'Dataset `persistent` flag ignored. Can not be set on already '
832 'existing Squirrel instance.%s' % (
833 ' Dataset: %s' % path if path else ''))
835 ds.setup(self, check=check, progress_viewer=progress_viewer)
837 def _get_selection_args(
838 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
840 if time is not None:
841 tmin = time
842 tmax = time
844 if obj is not None:
845 tmin = tmin if tmin is not None else obj.tmin
846 tmax = tmax if tmax is not None else obj.tmax
847 codes = codes if codes is not None else codes_inflate2(obj.codes)
849 if isinstance(codes, str):
850 codes = tuple(codes.split('.'))
852 return tmin, tmax, codes
854 def _selection_args_to_kwargs(
855 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
857 return dict(obj=obj, tmin=tmin, tmax=tmax, time=time, codes=codes)
859 def _timerange_sql(self, tmin, tmax, kind, cond, args, naiv):
861 tmin_seconds, tmin_offset = model.tsplit(tmin)
862 tmax_seconds, tmax_offset = model.tsplit(tmax)
863 if naiv:
864 cond.append('%(db)s.%(nuts)s.tmin_seconds <= ?')
865 args.append(tmax_seconds)
866 else:
867 tscale_edges = model.tscale_edges
868 tmin_cond = []
869 for kscale in range(tscale_edges.size + 1):
870 if kscale != tscale_edges.size:
871 tscale = int(tscale_edges[kscale])
872 tmin_cond.append('''
873 (%(db)s.%(nuts)s.kind_id = ?
874 AND %(db)s.%(nuts)s.kscale == ?
875 AND %(db)s.%(nuts)s.tmin_seconds BETWEEN ? AND ?)
876 ''')
877 args.extend(
878 (to_kind_id(kind), kscale,
879 tmin_seconds - tscale - 1, tmax_seconds + 1))
881 else:
882 tmin_cond.append('''
883 (%(db)s.%(nuts)s.kind_id == ?
884 AND %(db)s.%(nuts)s.kscale == ?
885 AND %(db)s.%(nuts)s.tmin_seconds <= ?)
886 ''')
888 args.extend(
889 (to_kind_id(kind), kscale, tmax_seconds + 1))
890 if tmin_cond:
891 cond.append(' ( ' + ' OR '.join(tmin_cond) + ' ) ')
893 cond.append('%(db)s.%(nuts)s.tmax_seconds >= ?')
894 args.append(tmin_seconds)
896 def iter_nuts(
897 self, kind=None, tmin=None, tmax=None, codes=None, naiv=False,
898 kind_codes_ids=None, path=None):
900 '''
901 Iterate over content entities matching given constraints.
903 :param kind:
904 Content kind (or kinds) to extract.
905 :type kind:
906 :py:class:`str`, :py:class:`list` of :py:class:`str`
908 :param tmin:
909 Start time of query interval.
910 :type tmin:
911 timestamp
913 :param tmax:
914 End time of query interval.
915 :type tmax:
916 timestamp
918 :param codes:
919 Pattern of content codes to query.
920 :type codes:
921 :py:class:`tuple` of :py:class:`str`
923 :param naiv:
924 Bypass time span lookup through indices (slow, for testing).
925 :type naiv:
926 :py:class:`bool`
928 :param kind_codes_ids:
929 Kind-codes IDs of contents to be retrieved (internal use).
930 :type kind_codes_ids:
931 :py:class:`list` of :py:class:`str`
933 :yields:
934 :py:class:`~pyrocko.squirrel.model.Nut` objects representing the
935 intersecting content.
937 :complexity:
938 O(log N) for the time selection part due to heavy use of database
939 indices.
941 Query time span is treated as a half-open interval ``[tmin, tmax)``.
942 However, if ``tmin`` equals ``tmax``, the edge logics are modified to
943 closed-interval so that content intersecting with the time instant ``t
944 = tmin = tmax`` is returned (otherwise nothing would be returned as
945 ``[t, t)`` never matches anything).
947 Time spans of content entities to be matched are also treated as half
948 open intervals, e.g. content span ``[0, 1)`` is matched by query span
949 ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``. Also here, logics are
950 modified to closed-interval when the content time span is an empty
951 interval, i.e. to indicate a time instant. E.g. time instant 0 is
952 matched by ``[0, 1)`` but not by ``[-1, 0)`` or ``[1, 2)``.
953 '''
955 if not isinstance(kind, str):
956 if kind is None:
957 kind = model.g_content_kinds
958 for kind_ in kind:
959 for nut in self.iter_nuts(kind_, tmin, tmax, codes):
960 yield nut
962 return
964 cond = []
965 args = []
966 if tmin is not None or tmax is not None:
967 assert kind is not None
968 if tmin is None:
969 tmin = self.get_time_span()[0]
970 if tmax is None:
971 tmax = self.get_time_span()[1] + 1.0
973 self._timerange_sql(tmin, tmax, kind, cond, args, naiv)
975 elif kind is not None:
976 cond.append('kind_codes.kind_id == ?')
977 args.append(to_kind_id(kind))
979 if codes is not None:
980 pats = codes_patterns_for_kind(kind, codes)
981 if pats:
982 cond.append(
983 ' ( %s ) ' % ' OR '.join(
984 ('kind_codes.codes GLOB ?',) * len(pats)))
985 args.extend(separator.join(pat) for pat in pats)
987 if kind_codes_ids is not None:
988 cond.append(
989 ' ( kind_codes.kind_codes_id IN ( %s ) ) ' % ', '.join(
990 '?'*len(kind_codes_ids)))
992 args.extend(kind_codes_ids)
994 db = self.get_database()
995 if path is not None:
996 cond.append('files.path == ?')
997 args.append(db.relpath(abspath(path)))
999 sql = ('''
1000 SELECT
1001 files.path,
1002 files.format,
1003 files.mtime,
1004 files.size,
1005 %(db)s.%(nuts)s.file_segment,
1006 %(db)s.%(nuts)s.file_element,
1007 kind_codes.kind_id,
1008 kind_codes.codes,
1009 %(db)s.%(nuts)s.tmin_seconds,
1010 %(db)s.%(nuts)s.tmin_offset,
1011 %(db)s.%(nuts)s.tmax_seconds,
1012 %(db)s.%(nuts)s.tmax_offset,
1013 kind_codes.deltat
1014 FROM files
1015 INNER JOIN %(db)s.%(nuts)s
1016 ON files.file_id == %(db)s.%(nuts)s.file_id
1017 INNER JOIN kind_codes
1018 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1019 ''')
1021 if cond:
1022 sql += ''' WHERE ''' + ' AND '.join(cond)
1024 sql = self._sql(sql)
1025 if tmin is None and tmax is None:
1026 for row in self._conn.execute(sql, args):
1027 row = (db.abspath(row[0]),) + row[1:]
1028 nut = model.Nut(values_nocheck=row)
1029 yield nut
1030 else:
1031 assert tmin is not None and tmax is not None
1032 if tmin == tmax:
1033 for row in self._conn.execute(sql, args):
1034 row = (db.abspath(row[0]),) + row[1:]
1035 nut = model.Nut(values_nocheck=row)
1036 if (nut.tmin <= tmin < nut.tmax) \
1037 or (nut.tmin == nut.tmax and tmin == nut.tmin):
1039 yield nut
1040 else:
1041 for row in self._conn.execute(sql, args):
1042 row = (db.abspath(row[0]),) + row[1:]
1043 nut = model.Nut(values_nocheck=row)
1044 if (tmin < nut.tmax and nut.tmin < tmax) \
1045 or (nut.tmin == nut.tmax
1046 and tmin <= nut.tmin < tmax):
1048 yield nut
1050 def get_nuts(self, *args, **kwargs):
1051 '''
1052 Get content entities matching given constraints.
1054 Like :py:meth:`iter_nuts` but returns results as a list.
1055 '''
1057 return list(self.iter_nuts(*args, **kwargs))
1059 def _split_nuts(
1060 self, kind, tmin=None, tmax=None, codes=None, path=None):
1062 tmin_seconds, tmin_offset = model.tsplit(tmin)
1063 tmax_seconds, tmax_offset = model.tsplit(tmax)
1065 names_main_nuts = dict(self._names)
1066 names_main_nuts.update(db='main', nuts='nuts')
1068 db = self.get_database()
1070 def main_nuts(s):
1071 return s % names_main_nuts
1073 with self.transaction() as cursor:
1074 # modify selection and main
1075 for sql_subst in [
1076 self._sql, main_nuts]:
1078 cond = []
1079 args = []
1081 self._timerange_sql(tmin, tmax, kind, cond, args, False)
1083 if codes is not None:
1084 pats = codes_patterns_for_kind(kind, codes)
1085 if pats:
1086 cond.append(
1087 ' ( %s ) ' % ' OR '.join(
1088 ('kind_codes.codes GLOB ?',) * len(pats)))
1089 args.extend(separator.join(pat) for pat in pats)
1091 if path is not None:
1092 cond.append('files.path == ?')
1093 args.append(db.relpath(abspath(path)))
1095 sql = sql_subst('''
1096 SELECT
1097 %(db)s.%(nuts)s.nut_id,
1098 %(db)s.%(nuts)s.tmin_seconds,
1099 %(db)s.%(nuts)s.tmin_offset,
1100 %(db)s.%(nuts)s.tmax_seconds,
1101 %(db)s.%(nuts)s.tmax_offset,
1102 kind_codes.deltat
1103 FROM files
1104 INNER JOIN %(db)s.%(nuts)s
1105 ON files.file_id == %(db)s.%(nuts)s.file_id
1106 INNER JOIN kind_codes
1107 ON %(db)s.%(nuts)s.kind_codes_id == kind_codes.kind_codes_id
1108 WHERE ''' + ' AND '.join(cond)) # noqa
1110 insert = []
1111 delete = []
1112 for row in cursor.execute(sql, args):
1113 nut_id, nut_tmin_seconds, nut_tmin_offset, \
1114 nut_tmax_seconds, nut_tmax_offset, nut_deltat = row
1116 nut_tmin = model.tjoin(
1117 nut_tmin_seconds, nut_tmin_offset)
1118 nut_tmax = model.tjoin(
1119 nut_tmax_seconds, nut_tmax_offset)
1121 if nut_tmin < tmax and tmin < nut_tmax:
1122 if nut_tmin < tmin:
1123 insert.append((
1124 nut_tmin_seconds, nut_tmin_offset,
1125 tmin_seconds, tmin_offset,
1126 model.tscale_to_kscale(
1127 tmin_seconds - nut_tmin_seconds),
1128 nut_id))
1130 if tmax < nut_tmax:
1131 insert.append((
1132 tmax_seconds, tmax_offset,
1133 nut_tmax_seconds, nut_tmax_offset,
1134 model.tscale_to_kscale(
1135 nut_tmax_seconds - tmax_seconds),
1136 nut_id))
1138 delete.append((nut_id,))
1140 sql_add = '''
1141 INSERT INTO %(db)s.%(nuts)s (
1142 file_id, file_segment, file_element, kind_id,
1143 kind_codes_id, tmin_seconds, tmin_offset,
1144 tmax_seconds, tmax_offset, kscale )
1145 SELECT
1146 file_id, file_segment, file_element,
1147 kind_id, kind_codes_id, ?, ?, ?, ?, ?
1148 FROM %(db)s.%(nuts)s
1149 WHERE nut_id == ?
1150 '''
1151 cursor.executemany(sql_subst(sql_add), insert)
1153 sql_delete = '''
1154 DELETE FROM %(db)s.%(nuts)s WHERE nut_id == ?
1155 '''
1156 cursor.executemany(sql_subst(sql_delete), delete)
1158 def get_time_span(self, kinds=None):
1159 '''
1160 Get time interval over all content in selection.
1162 :complexity:
1163 O(1), independent of the number of nuts.
1165 :returns: (tmin, tmax)
1166 '''
1168 sql_min = self._sql('''
1169 SELECT MIN(tmin_seconds), MIN(tmin_offset)
1170 FROM %(db)s.%(nuts)s
1171 WHERE kind_id == ?
1172 AND tmin_seconds == (
1173 SELECT MIN(tmin_seconds)
1174 FROM %(db)s.%(nuts)s
1175 WHERE kind_id == ?)
1176 ''')
1178 sql_max = self._sql('''
1179 SELECT MAX(tmax_seconds), MAX(tmax_offset)
1180 FROM %(db)s.%(nuts)s
1181 WHERE kind_id == ?
1182 AND tmax_seconds == (
1183 SELECT MAX(tmax_seconds)
1184 FROM %(db)s.%(nuts)s
1185 WHERE kind_id == ?)
1186 ''')
1188 gtmin = None
1189 gtmax = None
1191 if isinstance(kinds, str):
1192 kinds = [kinds]
1194 if kinds is None:
1195 kind_ids = model.g_content_kind_ids
1196 else:
1197 kind_ids = model.to_kind_ids(kinds)
1199 for kind_id in kind_ids:
1200 for tmin_seconds, tmin_offset in self._conn.execute(
1201 sql_min, (kind_id, kind_id)):
1202 tmin = model.tjoin(tmin_seconds, tmin_offset)
1203 if tmin is not None and (gtmin is None or tmin < gtmin):
1204 gtmin = tmin
1206 for (tmax_seconds, tmax_offset) in self._conn.execute(
1207 sql_max, (kind_id, kind_id)):
1208 tmax = model.tjoin(tmax_seconds, tmax_offset)
1209 if tmax is not None and (gtmax is None or tmax > gtmax):
1210 gtmax = tmax
1212 return gtmin, gtmax
1214 def get_deltat_span(self, kind):
1215 '''
1216 Get min and max sampling interval of all content of given kind.
1218 :param kind:
1219 Content kind
1220 :type kind:
1221 str
1223 :returns: (deltat_min, deltat_max)
1224 '''
1226 deltats = [
1227 deltat for deltat in self.get_deltats(kind)
1228 if deltat is not None]
1230 if deltats:
1231 return min(deltats), max(deltats)
1232 else:
1233 return None, None
1235 def iter_kinds(self, codes=None):
1236 '''
1237 Iterate over content types available in selection.
1239 :param codes:
1240 If given, get kinds only for selected codes identifier.
1241 :type codes:
1242 :py:class:`tuple` of :py:class:`str`
1244 :yields:
1245 Available content kinds as :py:class:`str`.
1247 :complexity:
1248 O(1), independent of number of nuts.
1249 '''
1251 return self._database._iter_kinds(
1252 codes=codes,
1253 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1255 def iter_deltats(self, kind=None):
1256 '''
1257 Iterate over sampling intervals available in selection.
1259 :param kind:
1260 If given, get sampling intervals only for a given content type.
1261 :type kind:
1262 str
1264 :yields:
1265 :py:class:`float` values.
1267 :complexity:
1268 O(1), independent of number of nuts.
1269 '''
1270 return self._database._iter_deltats(
1271 kind=kind,
1272 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1274 def iter_codes(self, kind=None):
1275 '''
1276 Iterate over content identifier code sequences available in selection.
1278 :param kind:
1279 If given, get codes only for a given content type.
1280 :type kind:
1281 str
1283 :yields:
1284 :py:class:`tuple` of :py:class:`str`
1286 :complexity:
1287 O(1), independent of number of nuts.
1288 '''
1289 return self._database._iter_codes(
1290 kind=kind,
1291 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1293 def iter_counts(self, kind=None):
1294 '''
1295 Iterate over number of occurrences of any (kind, codes) combination.
1297 :param kind:
1298 If given, get counts only for selected content type.
1299 :type kind:
1300 str
1302 :yields:
1303 Tuples of the form ``((kind, codes), count)``.
1305 :complexity:
1306 O(1), independent of number of nuts.
1307 '''
1308 return self._database._iter_counts(
1309 kind=kind,
1310 kind_codes_count='%(db)s.%(kind_codes_count)s' % self._names)
1312 def get_kinds(self, codes=None):
1313 '''
1314 Get content types available in selection.
1316 :param codes:
1317 If given, get kinds only for selected codes identifier.
1318 :type codes:
1319 :py:class:`tuple` of :py:class:`str`
1321 :returns:
1322 Sorted list of available content types.
1324 :complexity:
1325 O(1), independent of number of nuts.
1327 '''
1328 return sorted(list(self.iter_kinds(codes=codes)))
1330 def get_deltats(self, kind=None):
1331 '''
1332 Get sampling intervals available in selection.
1334 :param kind:
1335 If given, get codes only for selected content type.
1336 :type kind:
1337 str
1339 :complexity:
1340 O(1), independent of number of nuts.
1342 :returns: sorted list of available sampling intervals
1343 '''
1344 return sorted(list(self.iter_deltats(kind=kind)))
1346 def get_codes(self, kind=None):
1347 '''
1348 Get identifier code sequences available in selection.
1350 :param kind:
1351 If given, get codes only for selected content type.
1352 :type kind:
1353 str
1355 :complexity:
1356 O(1), independent of number of nuts.
1358 :returns: sorted list of available codes as tuples of strings
1359 '''
1360 return sorted(list(self.iter_codes(kind=kind)))
1362 def get_counts(self, kind=None):
1363 '''
1364 Get number of occurrences of any (kind, codes) combination.
1366 :param kind:
1367 If given, get codes only for selected content type.
1368 :type kind:
1369 str
1371 :complexity:
1372 O(1), independent of number of nuts.
1374 :returns: ``dict`` with ``counts[kind][codes]`` or ``counts[codes]``
1375 if kind is not ``None``
1376 '''
1377 d = {}
1378 for (k, codes, deltat), count in self.iter_counts():
1379 if k not in d:
1380 v = d[k] = {}
1381 else:
1382 v = d[k]
1384 if codes not in v:
1385 v[codes] = 0
1387 v[codes] += count
1389 if kind is not None:
1390 return d[kind]
1391 else:
1392 return d
1394 def glob_codes(self, kind, codes_list):
1395 '''
1396 Find codes matching given patterns.
1398 :param kind:
1399 Content kind to be queried.
1400 :type kind:
1401 str
1403 :param codes_list:
1404 List of code patterns to query. If not given or empty, an empty
1405 list is returned.
1406 :type codes_list:
1407 :py:class:`list` of :py:class:`tuple` of :py:class:`str`
1409 :returns:
1410 List of matches of the form ``[kind_codes_id, codes, deltat]``.
1411 '''
1413 args = [to_kind_id(kind)]
1414 pats = []
1415 for codes in codes_list:
1416 pats.extend(codes_patterns_for_kind(kind, codes))
1418 codes_cond = ' ( %s ) ' % ' OR '.join(
1419 ('kind_codes.codes GLOB ?',) * len(pats))
1421 args.extend(separator.join(pat) for pat in pats)
1423 sql = self._sql('''
1424 SELECT kind_codes_id, codes, deltat FROM kind_codes
1425 WHERE
1426 kind_id == ?
1427 AND ''' + codes_cond)
1429 return list(map(list, self._conn.execute(sql, args)))
1431 def update(self, constraint=None, **kwargs):
1432 '''
1433 Update or partially update channel and event inventories.
1435 :param constraint:
1436 Selection of times or areas to be brought up to date.
1437 :type constraint:
1438 :py:class:`~pyrocko.squirrel.client.Constraint`
1440 :param \\*\\*kwargs:
1441 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1443 This function triggers all attached remote sources, to check for
1444 updates in the meta-data. The sources will only submit queries when
1445 their expiration date has passed, or if the selection spans into
1446 previously unseen times or areas.
1447 '''
1449 if constraint is None:
1450 constraint = client.Constraint(**kwargs)
1452 for source in self._sources:
1453 source.update_channel_inventory(self, constraint)
1454 source.update_event_inventory(self, constraint)
1456 def update_waveform_promises(self, constraint=None, **kwargs):
1457 '''
1458 Permit downloading of remote waveforms.
1460 :param constraint:
1461 Remote waveforms compatible with the given constraint are enabled
1462 for download.
1463 :type constraint:
1464 :py:class:`~pyrocko.squirrel.client.Constraint`
1466 :param \\*\\*kwargs:
1467 Shortcut for setting ``constraint=Constraint(**kwargs)``.
1469 Calling this method permits Squirrel to download waveforms from remote
1470 sources when processing subsequent waveform requests. This works by
1471 inserting so called waveform promises into the database. It will look
1472 into the available channels for each remote source and create a promise
1473 for each channel compatible with the given constraint. If the promise
1474 then matches in a waveform request, Squirrel tries to download the
1475 waveform. If the download is successful, the downloaded waveform is
1476 added to the Squirrel and the promise is deleted. If the download
1477 fails, the promise is kept if the reason of failure looks like being
1478 temporary, e.g. because of a network failure. If the cause of failure
1479 however seems to be permanent, the promise is deleted so that no
1480 further attempts are made to download a waveform which might not be
1481 available from that server at all. To force re-scheduling after a
1482 permanent failure, call :py:meth:`update_waveform_promises`
1483 yet another time.
1484 '''
1486 if constraint is None:
1487 constraint = client.Constraint(**kwargs)
1489 # TODO
1490 print('contraint ignored atm')
1492 for source in self._sources:
1493 source.update_waveform_promises(self)
1495 def update_responses(self, constraint=None, **kwargs):
1496 # TODO
1497 if constraint is None:
1498 constraint = client.Constraint(**kwargs)
1500 print('contraint ignored atm')
1501 for source in self._sources:
1502 source.update_response_inventory(self, constraint)
1504 def get_nfiles(self):
1505 '''
1506 Get number of files in selection.
1507 '''
1509 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(file_states)s''')
1510 for row in self._conn.execute(sql):
1511 return row[0]
1513 def get_nnuts(self):
1514 '''
1515 Get number of nuts in selection.
1516 '''
1518 sql = self._sql('''SELECT COUNT(*) FROM %(db)s.%(nuts)s''')
1519 for row in self._conn.execute(sql):
1520 return row[0]
1522 def get_total_size(self):
1523 '''
1524 Get aggregated file size available in selection.
1525 '''
1527 sql = self._sql('''
1528 SELECT SUM(files.size) FROM %(db)s.%(file_states)s
1529 INNER JOIN files
1530 ON %(db)s.%(file_states)s.file_id = files.file_id
1531 ''')
1533 for row in self._conn.execute(sql):
1534 return row[0] or 0
1536 def get_stats(self):
1537 '''
1538 Get statistics on contents available through this selection.
1539 '''
1541 kinds = self.get_kinds()
1542 time_spans = {}
1543 for kind in kinds:
1544 time_spans[kind] = self.get_time_span([kind])
1546 return SquirrelStats(
1547 nfiles=self.get_nfiles(),
1548 nnuts=self.get_nnuts(),
1549 kinds=kinds,
1550 codes=self.get_codes(),
1551 total_size=self.get_total_size(),
1552 counts=self.get_counts(),
1553 time_spans=time_spans,
1554 sources=[s.describe() for s in self._sources],
1555 operators=[op.describe() for op in self._operators])
1557 def get_content(
1558 self,
1559 nut,
1560 cache_id='default',
1561 accessor_id='default',
1562 show_progress=False):
1564 '''
1565 Get and possibly load full content for a given index entry from file.
1567 Loads the actual content objects (channel, station, waveform, ...) from
1568 file. For efficiency sibling content (all stuff in the same file
1569 segment) will also be loaded as a side effect. The loaded contents are
1570 cached in the Squirrel object.
1571 '''
1573 content_cache = self._content_caches[cache_id]
1574 if not content_cache.has(nut):
1576 for nut_loaded in io.iload(
1577 nut.file_path,
1578 segment=nut.file_segment,
1579 format=nut.file_format,
1580 database=self._database,
1581 show_progress=show_progress):
1583 content_cache.put(nut_loaded)
1585 try:
1586 return content_cache.get(nut, accessor_id)
1587 except KeyError:
1588 raise error.NotAvailable(
1589 'Unable to retrieve content: %s, %s, %s, %s' % nut.key)
1591 def advance_accessor(self, accessor_id, cache_id=None):
1592 '''
1593 Notify memory caches about consumer moving to a new data batch.
1595 :param accessor_id:
1596 Name of accessing consumer to be advanced.
1597 :type accessor_id:
1598 str
1600 :param cache_id:
1601 Name of cache to for which the accessor should be advanced. By
1602 default the named accessor is advanced in all registered caches.
1603 By default, two caches named ``'default'`` and ``'waveforms'`` are
1604 available.
1605 :type cache_id:
1606 str
1608 See :py:class:`~pyrocko.squirrel.cache.ContentCache` for details on how
1609 Squirrel's memory caching works and can be tuned. Default behaviour is
1610 to release data when it has not been used in the latest data
1611 window/batch. If the accessor is never advanced, data is cached
1612 indefinitely - which is often desired e.g. for station meta-data.
1613 Methods for consecutive data traversal, like
1614 :py:meth:`chopper_waveforms` automatically advance and clear
1615 their accessor.
1616 '''
1617 for cache_ in (
1618 self._content_caches.keys()
1619 if cache_id is None
1620 else [cache_id]):
1622 self._content_caches[cache_].advance_accessor(accessor_id)
1624 def clear_accessor(self, accessor_id, cache_id=None):
1625 '''
1626 Notify memory caches about a consumer having finished.
1628 :param accessor_id:
1629 Name of accessor to be cleared.
1630 :type accessor_id:
1631 str
1633 :param cache_id:
1634 Name of cache to for which the accessor should be cleared. By
1635 default the named accessor is cleared from all registered caches.
1636 By default, two caches named ``'default'`` and ``'waveforms'`` are
1637 available.
1638 :type cache_id:
1639 str
1641 Calling this method clears all references to cache entries held by the
1642 named accessor. Cache entries are then freed if not referenced by any
1643 other accessor.
1644 '''
1646 for cache_ in (
1647 self._content_caches.keys()
1648 if cache_id is None
1649 else [cache_id]):
1651 self._content_caches[cache_].clear_accessor(accessor_id)
1653 def _check_duplicates(self, nuts):
1654 d = defaultdict(list)
1655 for nut in nuts:
1656 d[nut.codes].append(nut)
1658 for codes, group in d.items():
1659 if len(group) > 1:
1660 logger.warning(
1661 'Multiple entries matching codes: %s'
1662 % '.'.join(codes.split(separator)))
1664 @filldocs
1665 def get_stations(
1666 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
1667 model='squirrel'):
1669 '''
1670 Get stations matching given constraints.
1672 %(query_args)s
1674 :param model:
1675 Select object model for returned values: ``'squirrel'`` to get
1676 Squirrel station objects or ``'pyrocko'`` to get Pyrocko station
1677 objects with channel information attached.
1678 :type model:
1679 str
1681 :returns:
1682 List of :py:class:`pyrocko.squirrel.Station
1683 <pyrocko.squirrel.model.Station>` objects by default or list of
1684 :py:class:`pyrocko.model.Station <pyrocko.model.station.Station>`
1685 objects if ``model='pyrocko'`` is requested.
1687 See :py:meth:`iter_nuts` for details on time span matching.
1688 '''
1690 if model == 'pyrocko':
1691 return self._get_pyrocko_stations(obj, tmin, tmax, time, codes)
1692 elif model == 'squirrel':
1693 args = self._get_selection_args(obj, tmin, tmax, time, codes)
1694 nuts = sorted(
1695 self.iter_nuts('station', *args), key=lambda nut: nut.dkey)
1696 self._check_duplicates(nuts)
1697 return [self.get_content(nut) for nut in nuts]
1698 else:
1699 raise ValueError('Invalid station model: %s' % model)
1701 @filldocs
1702 def get_channels(
1703 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1705 '''
1706 Get channels matching given constraints.
1708 %(query_args)s
1710 :returns:
1711 List of :py:class:`~pyrocko.squirrel.model.Channel` objects.
1713 See :py:meth:`iter_nuts` for details on time span matching.
1714 '''
1716 args = self._get_selection_args(obj, tmin, tmax, time, codes)
1717 nuts = sorted(
1718 self.iter_nuts('channel', *args), key=lambda nut: nut.dkey)
1719 self._check_duplicates(nuts)
1720 return [self.get_content(nut) for nut in nuts]
1722 @filldocs
1723 def get_sensors(
1724 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1726 '''
1727 Get sensors matching given constraints.
1729 %(query_args)s
1731 :returns:
1732 List of :py:class:`~pyrocko.squirrel.model.Sensor` objects.
1734 See :py:meth:`iter_nuts` for details on time span matching.
1735 '''
1737 tmin, tmax, codes = self._get_selection_args(
1738 obj, tmin, tmax, time, codes)
1740 if codes is not None:
1741 if isinstance(codes, str):
1742 codes = codes.split('.')
1743 codes = tuple(codes_inflate(codes))
1744 if codes[4] != '*':
1745 codes = codes[:4] + (codes[4][:-1] + '?',) + codes[5:]
1747 nuts = sorted(
1748 self.iter_nuts(
1749 'channel', tmin, tmax, codes), key=lambda nut: nut.dkey)
1750 self._check_duplicates(nuts)
1751 return model.Sensor.from_channels(
1752 self.get_content(nut) for nut in nuts)
1754 @filldocs
1755 def get_responses(
1756 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1758 '''
1759 Get instrument responses matching given constraints.
1761 %(query_args)s
1763 :returns:
1764 List of :py:class:`~pyrocko.squirrel.model.Response` objects.
1766 See :py:meth:`iter_nuts` for details on time span matching.
1767 '''
1769 args = self._get_selection_args(obj, tmin, tmax, time, codes)
1770 nuts = sorted(
1771 self.iter_nuts('response', *args), key=lambda nut: nut.dkey)
1772 self._check_duplicates(nuts)
1773 return [self.get_content(nut) for nut in nuts]
1775 @filldocs
1776 def get_response(
1777 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1779 '''
1780 Get instrument response matching given constraints.
1782 %(query_args)s
1784 :returns:
1785 :py:class:`~pyrocko.squirrel.model.Response` object.
1787 Same as :py:meth:`get_responses` but returning exactly one response.
1788 Raises :py:exc:`~pyrocko.squirrel.error.NotAvailable` if zero or more
1789 than one is available.
1791 See :py:meth:`iter_nuts` for details on time span matching.
1792 '''
1794 responses = self.get_responses(obj, tmin, tmax, time, codes)
1795 if len(responses) == 0:
1796 raise error.NotAvailable(
1797 'No instrument response available.')
1798 elif len(responses) > 1:
1799 raise error.NotAvailable(
1800 'Multiple instrument responses matching given constraints.')
1802 return responses[0]
1804 @filldocs
1805 def get_events(
1806 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1808 '''
1809 Get events matching given constraints.
1811 %(query_args)s
1813 :returns:
1814 List of :py:class:`~pyrocko.model.event.Event` objects.
1816 See :py:meth:`iter_nuts` for details on time span matching.
1817 '''
1819 args = self._get_selection_args(obj, tmin, tmax, time, codes)
1820 nuts = sorted(
1821 self.iter_nuts('event', *args), key=lambda nut: nut.dkey)
1822 self._check_duplicates(nuts)
1823 return [self.get_content(nut) for nut in nuts]
1825 def _redeem_promises(self, *args):
1827 tmin, tmax, _ = args
1829 waveforms = list(self.iter_nuts('waveform', *args))
1830 promises = list(self.iter_nuts('waveform_promise', *args))
1832 codes_to_avail = defaultdict(list)
1833 for nut in waveforms:
1834 codes_to_avail[nut.codes].append((nut.tmin, nut.tmax+nut.deltat))
1836 def tts(x):
1837 if isinstance(x, tuple):
1838 return tuple(tts(e) for e in x)
1839 elif isinstance(x, list):
1840 return list(tts(e) for e in x)
1841 else:
1842 return util.time_to_str(x)
1844 orders = []
1845 for promise in promises:
1846 waveforms_avail = codes_to_avail[promise.codes]
1847 for block_tmin, block_tmax in blocks(
1848 max(tmin, promise.tmin),
1849 min(tmax, promise.tmax),
1850 promise.deltat):
1852 orders.append(
1853 WaveformOrder(
1854 source_id=promise.file_path,
1855 codes=tuple(promise.codes.split(separator)),
1856 tmin=block_tmin,
1857 tmax=block_tmax,
1858 deltat=promise.deltat,
1859 gaps=gaps(waveforms_avail, block_tmin, block_tmax)))
1861 orders_noop, orders = lpick(lambda order: order.gaps, orders)
1863 order_keys_noop = set(order_key(order) for order in orders_noop)
1864 if len(order_keys_noop) != 0 or len(orders_noop) != 0:
1865 logger.info(
1866 'Waveform orders already satisified with cached/local data: '
1867 '%i (%i)' % (len(order_keys_noop), len(orders_noop)))
1869 source_ids = []
1870 sources = {}
1871 for source in self._sources:
1872 if isinstance(source, fdsn.FDSNSource):
1873 source_ids.append(source._source_id)
1874 sources[source._source_id] = source
1876 source_priority = dict(
1877 (source_id, i) for (i, source_id) in enumerate(source_ids))
1879 order_groups = defaultdict(list)
1880 for order in orders:
1881 order_groups[order_key(order)].append(order)
1883 for k, order_group in order_groups.items():
1884 order_group.sort(
1885 key=lambda order: source_priority[order.source_id])
1887 n_order_groups = len(order_groups)
1889 if len(order_groups) != 0 or len(orders) != 0:
1890 logger.info(
1891 'Waveform orders standing for download: %i (%i)'
1892 % (len(order_groups), len(orders)))
1894 task = make_task('Waveform orders processed', n_order_groups)
1895 else:
1896 task = None
1898 def split_promise(order):
1899 self._split_nuts(
1900 'waveform_promise',
1901 order.tmin, order.tmax,
1902 codes=order.codes,
1903 path=order.source_id)
1905 def release_order_group(order):
1906 okey = order_key(order)
1907 for followup in order_groups[okey]:
1908 split_promise(followup)
1910 del order_groups[okey]
1912 if task:
1913 task.update(n_order_groups - len(order_groups))
1915 def noop(order):
1916 pass
1918 def success(order):
1919 release_order_group(order)
1920 split_promise(order)
1922 def batch_add(paths):
1923 self.add(paths)
1925 calls = queue.Queue()
1927 def enqueue(f):
1928 def wrapper(*args):
1929 calls.put((f, args))
1931 return wrapper
1933 for order in orders_noop:
1934 split_promise(order)
1936 while order_groups:
1938 orders_now = []
1939 empty = []
1940 for k, order_group in order_groups.items():
1941 try:
1942 orders_now.append(order_group.pop(0))
1943 except IndexError:
1944 empty.append(k)
1946 for k in empty:
1947 del order_groups[k]
1949 by_source_id = defaultdict(list)
1950 for order in orders_now:
1951 by_source_id[order.source_id].append(order)
1953 threads = []
1954 for source_id in by_source_id:
1955 def download():
1956 try:
1957 sources[source_id].download_waveforms(
1958 by_source_id[source_id],
1959 success=enqueue(success),
1960 error_permanent=enqueue(split_promise),
1961 error_temporary=noop,
1962 batch_add=enqueue(batch_add))
1964 finally:
1965 calls.put(None)
1967 thread = threading.Thread(target=download)
1968 thread.start()
1969 threads.append(thread)
1971 ndone = 0
1972 while ndone < len(threads):
1973 ret = calls.get()
1974 if ret is None:
1975 ndone += 1
1976 else:
1977 ret[0](*ret[1])
1979 for thread in threads:
1980 thread.join()
1982 if task:
1983 task.update(n_order_groups - len(order_groups))
1985 if task:
1986 task.done()
1988 @filldocs
1989 def get_waveform_nuts(
1990 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
1992 '''
1993 Get waveform content entities matching given constraints.
1995 %(query_args)s
1997 Like :py:meth:`get_nuts` with ``kind='waveform'`` but additionally
1998 resolves matching waveform promises (downloads waveforms from remote
1999 sources).
2001 See :py:meth:`iter_nuts` for details on time span matching.
2002 '''
2004 args = self._get_selection_args(obj, tmin, tmax, time, codes)
2005 self._redeem_promises(*args)
2006 return sorted(
2007 self.iter_nuts('waveform', *args), key=lambda nut: nut.dkey)
2009 @filldocs
2010 def get_waveforms(
2011 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2012 uncut=False, want_incomplete=True, degap=True, maxgap=5,
2013 maxlap=None, snap=None, include_last=False, load_data=True,
2014 accessor_id='default', operator_params=None):
2016 '''
2017 Get waveforms matching given constraints.
2019 %(query_args)s
2021 :param uncut:
2022 Set to ``True``, to disable cutting traces to [``tmin``, ``tmax``]
2023 and to disable degapping/deoverlapping. Returns untouched traces as
2024 they are read from file segment. File segments are always read in
2025 their entirety.
2026 :type uncut:
2027 bool
2029 :param want_incomplete:
2030 If ``True``, gappy/incomplete traces are included in the result.
2031 :type want_incomplete:
2032 bool
2034 :param degap:
2035 If ``True``, connect traces and remove gaps and overlaps.
2036 :type degap:
2037 bool
2039 :param maxgap:
2040 Maximum gap size in samples which is filled with interpolated
2041 samples when ``degap`` is ``True``.
2042 :type maxgap:
2043 int
2045 :param maxlap:
2046 Maximum overlap size in samples which is removed when ``degap`` is
2047 ``True``
2048 :type maxlap:
2049 int
2051 :param snap:
2052 Rounding functions used when computing sample index from time
2053 instance, for trace start and trace end, respectively. By default,
2054 ``(round, round)`` is used.
2055 :type snap:
2056 tuple of 2 callables
2058 :param include_last:
2059 If ``True``, add one more sample to the returned traces (the sample
2060 which would be the first sample of a query with ``tmin`` set to the
2061 current value of ``tmax``).
2062 :type include_last:
2063 bool
2065 :param load_data:
2066 If ``True``, waveform data samples are read from files (or cache).
2067 If ``False``, meta-information-only traces are returned (dummy
2068 traces with no data samples).
2069 :type load_data:
2070 bool
2072 :param accessor_id:
2073 Name of consumer on who's behalf data is accessed. Used in cache
2074 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2075 to distinguish different points of extraction for the decision of
2076 when to release cached waveform data. Should be used when data is
2077 alternately extracted from more than one region / selection.
2078 :type accessor_id:
2079 str
2081 See :py:meth:`iter_nuts` for details on time span matching.
2083 Loaded data is kept in memory (at least) until
2084 :py:meth:`clear_accessor` has been called or
2085 :py:meth:`advance_accessor` has been called two consecutive times
2086 without data being accessed between the two calls (by this accessor).
2087 Data may still be further kept in the memory cache if held alive by
2088 consumers with a different ``accessor_id``.
2089 '''
2091 tmin, tmax, codes = self._get_selection_args(
2092 obj, tmin, tmax, time, codes)
2094 self_tmin, self_tmax = self.get_time_span(
2095 ['waveform', 'waveform_promise'])
2097 if None in (self_tmin, self_tmax):
2098 logger.warning(
2099 'No waveforms available.')
2100 return []
2102 tmin = tmin if tmin is not None else self_tmin
2103 tmax = tmax if tmax is not None else self_tmax
2105 if codes is not None:
2106 operator = self.get_operator(codes)
2107 if operator is not None:
2108 return operator.get_waveforms(
2109 self, codes,
2110 tmin=tmin, tmax=tmax,
2111 uncut=uncut, want_incomplete=want_incomplete, degap=degap,
2112 maxgap=maxgap, maxlap=maxlap, snap=snap,
2113 include_last=include_last, load_data=load_data,
2114 accessor_id=accessor_id, params=operator_params)
2116 nuts = self.get_waveform_nuts(obj, tmin, tmax, time, codes)
2118 if load_data:
2119 traces = [
2120 self.get_content(nut, 'waveform', accessor_id) for nut in nuts]
2122 else:
2123 traces = [
2124 trace.Trace(**nut.trace_kwargs) for nut in nuts]
2126 if uncut:
2127 return traces
2129 if snap is None:
2130 snap = (round, round)
2132 chopped = []
2133 for tr in traces:
2134 if not load_data and tr.ydata is not None:
2135 tr = tr.copy(data=False)
2136 tr.ydata = None
2138 try:
2139 chopped.append(tr.chop(
2140 tmin, tmax,
2141 inplace=False,
2142 snap=snap,
2143 include_last=include_last))
2145 except trace.NoData:
2146 pass
2148 processed = self._process_chopped(
2149 chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax)
2151 return processed
2153 @filldocs
2154 def chopper_waveforms(
2155 self, obj=None, tmin=None, tmax=None, time=None, codes=None,
2156 tinc=None, tpad=0.,
2157 want_incomplete=True, snap_window=False,
2158 degap=True, maxgap=5, maxlap=None,
2159 snap=None, include_last=False, load_data=True,
2160 accessor_id=None, clear_accessor=True, operator_params=None):
2162 '''
2163 Iterate window-wise over waveform archive.
2165 %(query_args)s
2167 :param tinc:
2168 Time increment (window shift time) (default uses ``tmax-tmin``)
2169 :type tinc:
2170 timestamp
2172 :param tpad:
2173 Padding time appended on either side of the data window (window
2174 overlap is ``2*tpad``).
2175 :type tpad:
2176 timestamp
2178 :param want_incomplete:
2179 If ``True``, gappy/incomplete traces are included in the result.
2180 :type want_incomplete:
2181 bool
2183 :param snap_window:
2184 If ``True``, start time windows at multiples of tinc with respect
2185 to system time zero.
2187 :param degap:
2188 If ``True``, connect traces and remove gaps and overlaps.
2189 :type degap:
2190 bool
2192 :param maxgap:
2193 Maximum gap size in samples which is filled with interpolated
2194 samples when ``degap`` is ``True``.
2195 :type maxgap:
2196 int
2198 :param maxlap:
2199 Maximum overlap size in samples which is removed when ``degap`` is
2200 ``True``
2201 :type maxlap:
2202 int
2204 :param snap:
2205 Rounding functions used when computing sample index from time
2206 instance, for trace start and trace end, respectively. By default,
2207 ``(round, round)`` is used.
2208 :type snap:
2209 tuple of 2 callables
2211 :param include_last:
2212 If ``True``, add one more sample to the returned traces (the sample
2213 which would be the first sample of a query with ``tmin`` set to the
2214 current value of ``tmax``).
2215 :type include_last:
2216 bool
2218 :param load_data:
2219 If ``True``, waveform data samples are read from files (or cache).
2220 If ``False``, meta-information-only traces are returned (dummy
2221 traces with no data samples).
2222 :type load_data:
2223 bool
2225 :param accessor_id:
2226 Name of consumer on who's behalf data is accessed. Used in cache
2227 management (see :py:mod:`~pyrocko.squirrel.cache`). Used as a key
2228 to distinguish different points of extraction for the decision of
2229 when to release cached waveform data. Should be used when data is
2230 alternately extracted from more than one region / selection.
2231 :type accessor_id:
2232 str
2234 :param clear_accessor:
2235 If ``True`` (default), :py:meth:`clear_accessor` is called when the
2236 chopper finishes. Set to ``False`` to keep loaded waveforms in
2237 memory when the generator returns.
2239 :yields:
2240 A list of :py:class:`~pyrocko.trace.Trace` objects for every
2241 extracted time window.
2243 See :py:meth:`iter_nuts` for details on time span matching.
2244 '''
2246 tmin, tmax, codes = self._get_selection_args(
2247 obj, tmin, tmax, time, codes)
2249 self_tmin, self_tmax = self.get_time_span(
2250 ['waveform', 'waveform_promise'])
2252 if None in (self_tmin, self_tmax):
2253 logger.warning(
2254 'Content has undefined time span. No waveforms and no '
2255 'waveform promises?')
2256 return
2258 if snap_window and tinc is not None:
2259 tmin = tmin if tmin is not None else self_tmin
2260 tmax = tmax if tmax is not None else self_tmax
2261 tmin = math.floor(tmin / tinc) * tinc
2262 tmax = math.ceil(tmax / tinc) * tinc
2263 else:
2264 tmin = tmin if tmin is not None else self_tmin + tpad
2265 tmax = tmax if tmax is not None else self_tmax - tpad
2267 tinc = tinc if tinc is not None else tmax - tmin
2269 try:
2270 if accessor_id is None:
2271 accessor_id = 'chopper%i' % self._n_choppers_active
2273 self._n_choppers_active += 1
2275 eps = tinc * 1e-6
2276 if tinc != 0.0:
2277 nwin = int(((tmax - eps) - tmin) / tinc) + 1
2278 else:
2279 nwin = 1
2281 for iwin in range(nwin):
2282 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2283 chopped = []
2284 wmin, wmax = tmin+iwin*tinc, min(tmin+(iwin+1)*tinc, tmax)
2285 eps = tinc*1e-6
2286 if wmin >= tmax-eps:
2287 break
2289 chopped = self.get_waveforms(
2290 tmin=wmin-tpad,
2291 tmax=wmax+tpad,
2292 codes=codes,
2293 snap=snap,
2294 include_last=include_last,
2295 load_data=load_data,
2296 want_incomplete=want_incomplete,
2297 degap=degap,
2298 maxgap=maxgap,
2299 maxlap=maxlap,
2300 accessor_id=accessor_id,
2301 operator_params=operator_params)
2303 self.advance_accessor(accessor_id)
2305 yield Batch(
2306 tmin=wmin,
2307 tmax=wmax,
2308 i=iwin,
2309 n=nwin,
2310 traces=chopped)
2312 iwin += 1
2314 finally:
2315 self._n_choppers_active -= 1
2316 if clear_accessor:
2317 self.clear_accessor(accessor_id, 'waveform')
2319 def _process_chopped(
2320 self, chopped, degap, maxgap, maxlap, want_incomplete, tmin, tmax):
2322 chopped.sort(key=lambda a: a.full_id)
2323 if degap:
2324 chopped = trace.degapper(chopped, maxgap=maxgap, maxlap=maxlap)
2326 if not want_incomplete:
2327 chopped_weeded = []
2328 for tr in chopped:
2329 emin = tr.tmin - tmin
2330 emax = tr.tmax + tr.deltat - tmax
2331 if (abs(emin) <= 0.5*tr.deltat and abs(emax) <= 0.5*tr.deltat):
2332 chopped_weeded.append(tr)
2334 elif degap:
2335 if (0. < emin <= 5. * tr.deltat
2336 and -5. * tr.deltat <= emax < 0.):
2338 tr.extend(tmin, tmax-tr.deltat, fillmethod='repeat')
2339 chopped_weeded.append(tr)
2341 chopped = chopped_weeded
2343 return chopped
2345 def _get_pyrocko_stations(
2346 self, obj=None, tmin=None, tmax=None, time=None, codes=None):
2348 from pyrocko import model as pmodel
2350 by_nsl = defaultdict(lambda: (list(), list()))
2351 for station in self.get_stations(obj, tmin, tmax, time, codes):
2352 sargs = station._get_pyrocko_station_args()
2353 nsl = sargs[1:4]
2354 by_nsl[nsl][0].append(sargs)
2356 for channel in self.get_channels(obj, tmin, tmax, time, codes):
2357 sargs = channel._get_pyrocko_station_args()
2358 nsl = sargs[1:4]
2359 sargs_list, channels_list = by_nsl[nsl]
2360 sargs_list.append(sargs)
2361 channels_list.append(channel)
2363 pstations = []
2364 nsls = list(by_nsl.keys())
2365 nsls.sort()
2366 for nsl in nsls:
2367 sargs_list, channels_list = by_nsl[nsl]
2368 sargs = util.consistency_merge(sargs_list)
2370 by_c = defaultdict(list)
2371 for ch in channels_list:
2372 by_c[ch.channel].append(ch._get_pyrocko_channel_args())
2374 chas = list(by_c.keys())
2375 chas.sort()
2376 pchannels = []
2377 for cha in chas:
2378 list_of_cargs = by_c[cha]
2379 cargs = util.consistency_merge(list_of_cargs)
2380 pchannels.append(pmodel.Channel(
2381 name=cargs[0],
2382 azimuth=cargs[1],
2383 dip=cargs[2]))
2385 pstations.append(pmodel.Station(
2386 network=sargs[0],
2387 station=sargs[1],
2388 location=sargs[2],
2389 lat=sargs[3],
2390 lon=sargs[4],
2391 elevation=sargs[5],
2392 depth=sargs[6] or 0.0,
2393 channels=pchannels))
2395 return pstations
2397 @property
2398 def pile(self):
2400 '''
2401 Emulates the older :py:class:`pyrocko.pile.Pile` interface.
2403 This property exposes a :py:class:`pyrocko.squirrel.pile.Pile` object,
2404 which emulates most of the older :py:class:`pyrocko.pile.Pile` methods
2405 but uses the fluffy power of the Squirrel under the hood.
2407 This interface can be used as a drop-in replacement for piles which are
2408 used in existing scripts and programs for efficient waveform data
2409 access. The Squirrel-based pile scales better for large datasets. Newer
2410 scripts should use Squirrel's native methods to avoid the emulation
2411 overhead.
2412 '''
2413 from . import pile
2415 if self._pile is None:
2416 self._pile = pile.Pile(self)
2418 return self._pile
2420 def snuffle(self):
2421 '''
2422 Look at dataset in Snuffler.
2423 '''
2424 self.pile.snuffle()
2426 def _gather_codes_keys(self, kind, gather, selector):
2427 return set(
2428 gather(codes)
2429 for codes in self.iter_codes(kind)
2430 if selector is None or selector(codes))
2432 def __str__(self):
2433 return str(self.get_stats())
2435 def get_coverage(
2436 self, kind, tmin=None, tmax=None, codes_list=None, limit=None):
2438 '''
2439 Get coverage information.
2441 Get information about strips of gapless data coverage.
2443 :param kind:
2444 Content kind to be queried.
2445 :type kind:
2446 str
2448 :param tmin:
2449 Start time of query interval.
2450 :type tmin:
2451 timestamp
2453 :param tmax:
2454 End time of query interval.
2455 :type tmax:
2456 timestamp
2458 :param codes_list:
2459 List of code patterns to query. If not given or empty, an empty
2460 list is returned.
2461 :type codes_list:
2462 :py:class:`list` of :py:class:`tuple` of :py:class:`str`
2464 :param limit:
2465 Limit query to return only up to a given maximum number of entries
2466 per matching channel (without setting this option, very gappy data
2467 could cause the query to execute for a very long time).
2468 :type limit:
2469 int
2471 :returns:
2472 List of entries of the form ``(pattern, codes, deltat, tmin, tmax,
2473 data)`` where ``pattern`` is the request code pattern which
2474 yielded this entry, ``codes`` are the matching channel codes,
2475 ``tmin`` and ``tmax`` are the global min and max times for which
2476 data for this channel is available, regardless of any time
2477 restrictions in the query. ``data`` is a list with (up to
2478 ``limit``) change-points of the form ``(time, count)`` where a
2479 ``count`` of zero indicates a data gap, a value of 1 normal data
2480 coverage and higher values indicate duplicate/redundant data.
2481 '''
2483 tmin_seconds, tmin_offset = model.tsplit(tmin)
2484 tmax_seconds, tmax_offset = model.tsplit(tmax)
2486 kdata_all = []
2487 for pattern in codes_list:
2488 kdata = self.glob_codes(kind, [pattern])
2489 for row in kdata:
2490 row[0:0] = [pattern]
2492 kdata_all.extend(kdata)
2494 kind_codes_ids = [x[1] for x in kdata_all]
2496 counts_at_tmin = {}
2497 if tmin is not None:
2498 for nut in self.iter_nuts(
2499 kind, tmin, tmin, kind_codes_ids=kind_codes_ids):
2501 k = nut.codes, nut.deltat
2502 if k not in counts_at_tmin:
2503 counts_at_tmin[k] = 0
2505 counts_at_tmin[k] += 1
2507 coverage = []
2508 for pattern, kind_codes_id, codes, deltat in kdata_all:
2509 entry = [pattern, codes, deltat, None, None, []]
2510 for i, order in [(0, 'ASC'), (1, 'DESC')]:
2511 sql = self._sql('''
2512 SELECT
2513 time_seconds,
2514 time_offset
2515 FROM %(db)s.%(coverage)s
2516 WHERE
2517 kind_codes_id == ?
2518 ORDER BY
2519 kind_codes_id ''' + order + ''',
2520 time_seconds ''' + order + ''',
2521 time_offset ''' + order + '''
2522 LIMIT 1
2523 ''')
2525 for row in self._conn.execute(sql, [kind_codes_id]):
2526 entry[3+i] = model.tjoin(row[0], row[1])
2528 if None in entry[3:5]:
2529 continue
2531 args = [kind_codes_id]
2533 sql_time = ''
2534 if tmin is not None:
2535 # intentionally < because (== tmin) is queried from nuts
2536 sql_time += ' AND ( ? < time_seconds ' \
2537 'OR ( ? == time_seconds AND ? < time_offset ) ) '
2538 args.extend([tmin_seconds, tmin_seconds, tmin_offset])
2540 if tmax is not None:
2541 sql_time += ' AND ( time_seconds < ? ' \
2542 'OR ( ? == time_seconds AND time_offset <= ? ) ) '
2543 args.extend([tmax_seconds, tmax_seconds, tmax_offset])
2545 sql_limit = ''
2546 if limit is not None:
2547 sql_limit = ' LIMIT ?'
2548 args.append(limit)
2550 sql = self._sql('''
2551 SELECT
2552 time_seconds,
2553 time_offset,
2554 step
2555 FROM %(db)s.%(coverage)s
2556 WHERE
2557 kind_codes_id == ?
2558 ''' + sql_time + '''
2559 ORDER BY
2560 kind_codes_id,
2561 time_seconds,
2562 time_offset
2563 ''' + sql_limit)
2565 rows = list(self._conn.execute(sql, args))
2567 if limit is not None and len(rows) == limit:
2568 entry[-1] = None
2569 else:
2570 counts = counts_at_tmin.get((codes, deltat), 0)
2571 tlast = None
2572 if tmin is not None:
2573 entry[-1].append((tmin, counts))
2574 tlast = tmin
2576 for row in rows:
2577 t = model.tjoin(row[0], row[1])
2578 counts += row[2]
2579 entry[-1].append((t, counts))
2580 tlast = t
2582 if tmax is not None and (tlast is None or tlast != tmax):
2583 entry[-1].append((tmax, counts))
2585 coverage.append(entry)
2587 return coverage
2589 def add_operator(self, op):
2590 self._operators.append(op)
2592 def update_operator_mappings(self):
2593 available = [
2594 separator.join(codes)
2595 for codes in self.get_codes(kind=('channel'))]
2597 for operator in self._operators:
2598 operator.update_mappings(available, self._operator_registry)
2600 def iter_operator_mappings(self):
2601 for operator in self._operators:
2602 for in_codes, out_codes in operator.iter_mappings():
2603 yield operator, in_codes, out_codes
2605 def get_operator_mappings(self):
2606 return list(self.iter_operator_mappings())
2608 def get_operator(self, codes):
2609 if isinstance(codes, tuple):
2610 codes = separator.join(codes)
2611 try:
2612 return self._operator_registry[codes][0]
2613 except KeyError:
2614 return None
2616 def get_operator_group(self, codes):
2617 if isinstance(codes, tuple):
2618 codes = separator.join(codes)
2619 try:
2620 return self._operator_registry[codes]
2621 except KeyError:
2622 return None, (None, None, None)
2624 def iter_operator_codes(self):
2625 for _, _, out_codes in self.iter_operator_mappings():
2626 for codes in out_codes:
2627 yield tuple(codes.split(separator))
2629 def get_operator_codes(self):
2630 return list(self.iter_operator_codes())
2632 def print_tables(self, table_names=None, stream=None):
2633 '''
2634 Dump raw database tables in textual form (for debugging purposes).
2636 :param table_names:
2637 Names of tables to be dumped or ``None`` to dump all.
2638 :type table_names:
2639 :py:class:`list` of :py:class:`str`
2641 :param stream:
2642 Open file or ``None`` to dump to standard output.
2643 '''
2645 if stream is None:
2646 stream = sys.stdout
2648 if isinstance(table_names, str):
2649 table_names = [table_names]
2651 if table_names is None:
2652 table_names = [
2653 'selection_file_states',
2654 'selection_nuts',
2655 'selection_kind_codes_count',
2656 'files', 'nuts', 'kind_codes', 'kind_codes_count']
2658 m = {
2659 'selection_file_states': '%(db)s.%(file_states)s',
2660 'selection_nuts': '%(db)s.%(nuts)s',
2661 'selection_kind_codes_count': '%(db)s.%(kind_codes_count)s',
2662 'files': 'files',
2663 'nuts': 'nuts',
2664 'kind_codes': 'kind_codes',
2665 'kind_codes_count': 'kind_codes_count'}
2667 for table_name in table_names:
2668 self._database.print_table(
2669 m[table_name] % self._names, stream=stream)
2672class SquirrelStats(Object):
2673 '''
2674 Container to hold statistics about contents available from a Squirrel.
2676 See also :py:meth:`Squirrel.get_stats`.
2677 '''
2679 nfiles = Int.T(
2680 help='Number of files in selection.')
2681 nnuts = Int.T(
2682 help='Number of index nuts in selection.')
2683 codes = List.T(
2684 Tuple.T(content_t=String.T()),
2685 help='Available code sequences in selection, e.g. '
2686 '(agency, network, station, location) for stations nuts.')
2687 kinds = List.T(
2688 String.T(),
2689 help='Available content types in selection.')
2690 total_size = Int.T(
2691 help='Aggregated file size of files is selection.')
2692 counts = Dict.T(
2693 String.T(), Dict.T(Tuple.T(content_t=String.T()), Int.T()),
2694 help='Breakdown of how many nuts of any content type and code '
2695 'sequence are available in selection, ``counts[kind][codes]``.')
2696 time_spans = Dict.T(
2697 String.T(), Tuple.T(content_t=Timestamp.T()),
2698 help='Time spans by content type.')
2699 sources = List.T(
2700 String.T(),
2701 help='Descriptions of attached sources.')
2702 operators = List.T(
2703 String.T(),
2704 help='Descriptions of attached operators.')
2706 def __str__(self):
2707 kind_counts = dict(
2708 (kind, sum(self.counts[kind].values())) for kind in self.kinds)
2710 scodes = model.codes_to_str_abbreviated(self.codes)
2712 ssources = '<none>' if not self.sources else '\n' + '\n'.join(
2713 ' ' + s for s in self.sources)
2715 soperators = '<none>' if not self.operators else '\n' + '\n'.join(
2716 ' ' + s for s in self.operators)
2718 def stime(t):
2719 return util.tts(t) if t is not None and t not in (
2720 model.g_tmin, model.g_tmax) else '<none>'
2722 def stable(rows):
2723 ns = [max(len(w) for w in col) for col in zip(*rows)]
2724 return '\n'.join(
2725 ' '.join(w.ljust(n) for n, w in zip(ns, row))
2726 for row in rows)
2728 def indent(s):
2729 return '\n'.join(' '+line for line in s.splitlines())
2731 stspans = '<none>' if not self.kinds else '\n' + indent(stable([(
2732 kind + ':',
2733 str(kind_counts[kind]),
2734 stime(self.time_spans[kind][0]),
2735 '-',
2736 stime(self.time_spans[kind][1])) for kind in sorted(self.kinds)]))
2738 s = '''
2739Number of files: %i
2740Total size of known files: %s
2741Number of index nuts: %i
2742Available content kinds: %s
2743Available codes: %s
2744Sources: %s
2745Operators: %s''' % (
2746 self.nfiles,
2747 util.human_bytesize(self.total_size),
2748 self.nnuts,
2749 stspans, scodes, ssources, soperators)
2751 return s.lstrip()
2754__all__ = [
2755 'Squirrel',
2756 'SquirrelStats',
2757]