Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/dataset.py: 45%
55 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-03-07 11:54 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Portable dataset description.
9The :py:class:`Dataset` class defines sets of local and remote data-sources to
10be used in combination in Squirrel-based programs. By convention,
11Squirrel-based programs accept the ``--dataset`` option to read such dataset
12descriptions from file. To add a dataset programmatically, to a
13:py:class:`~pyrocko.squirrel.base.Squirrel` instance, use
14:py:meth:`~pyrocko.squirrel.base.Squirrel.add_dataset`.
15'''
17import os.path as op
18import logging
20from pyrocko.guts import List, load, StringPattern, String
22from ..has_paths import HasPaths
23from .client.base import Source
24from .client.catalog import CatalogSource
25from .client.fdsn import FDSNSource
26from .error import SquirrelError
27from .selection import re_persistent_name
28from .operators.base import Operator
30guts_prefix = 'squirrel'
32logger = logging.getLogger('psq.dataset')
35class PersistentID(StringPattern):
36 pattern = re_persistent_name
39def make_builtin_datasets():
40 datasets = {}
41 for site in ['isc', 'geofon', 'gcmt']:
42 for magnitude_min in [4.0, 5.0, 6.0, 7.0]:
43 name = 'events-%s-m%g' % (site, magnitude_min)
44 datasets[name] = Dataset(
45 sources=[
46 CatalogSource(
47 catalog=site,
48 query_args=dict(magmin=magnitude_min))],
49 comment='Event catalog: %s, minimum magnitude: %g' % (
50 site, magnitude_min))
52 for site, network, cha in [
53 ('bgr', 'gr', 'lh')]:
54 name = 'fdsn-%s-%s-%s' % (site, network, cha)
55 cha = cha.upper() + '?'
56 network = network.upper()
57 datasets[name] = Dataset(
58 sources=[
59 FDSNSource(
60 site=site,
61 query_args=dict(network=network, channel=cha))],
62 comment='FDSN: %s, network: %s, '
63 'channels: %s' % (site, network, cha))
65 return datasets
68g_builtin_datasets = None
71def get_builtin_datasets():
72 global g_builtin_datasets
73 g_builtin_datasets = make_builtin_datasets()
74 return g_builtin_datasets
77class Dataset(HasPaths):
78 '''
79 Dataset description.
80 '''
81 sources = List.T(Source.T())
82 operators = List.T(Operator.T())
83 comment = String.T(optional=True)
85 def setup(self, squirrel, check=True):
86 for source in self.sources:
87 squirrel.add_source(
88 source, check=check)
90 for operator in self.operators:
91 squirrel.add_operator(operator)
93 squirrel.update_operator_mappings()
96def read_dataset(path):
97 '''
98 Read dataset description file.
99 '''
101 if path.startswith(':'):
102 name = path[1:]
103 datasets = get_builtin_datasets()
104 try:
105 return datasets[name]
106 except KeyError:
107 raise SquirrelError(
108 ('No dataset name given. '
109 if not name else 'Named dataset not found: %s' % name) +
110 '\n Use `squirrel dataset` to get information about '
111 'available datasets. Available:\n'
112 ' %s' % '\n '.join(
113 sorted(datasets.keys())))
115 try:
116 dataset = load(filename=path)
117 except OSError:
118 raise SquirrelError(
119 'Cannot read dataset file: %s' % path)
121 if not isinstance(dataset, Dataset):
122 raise SquirrelError('Invalid dataset file "%s".' % path)
124 dataset.set_basepath(op.dirname(path) or '.')
125 return dataset
128__all__ = [
129 'PersistentID',
130 'Dataset',
131 'read_dataset',
132]