Source code for pyrocko.squirrel.dataset
# http://pyrocko.org - GPLv3
#
# The Pyrocko Developers, 21st Century
# ---|P------/S----------~Lg----------
'''
Portable dataset description.
The :py:class:`Dataset` class defines sets of local and remote data-sources to
be used in combination in Squirrel-based programs. By convention,
Squirrel-based programs accept the ``--dataset`` option to read such dataset
descriptions from file. To add a dataset programmatically, to a
:py:class:`~pyrocko.squirrel.base.Squirrel` instance, use
:py:meth:`~pyrocko.squirrel.base.Squirrel.add_dataset`.
'''
import os.path as op
import logging
from pyrocko.guts import List, load, StringPattern, String
from ..has_paths import HasPaths
from .client.base import Source
from .client.catalog import CatalogSource
from .client.fdsn import FDSNSource
from .error import SquirrelError
from .selection import re_persistent_name
from .operators.base import Operator
guts_prefix = 'squirrel'
logger = logging.getLogger('psq.dataset')
[docs]class PersistentID(StringPattern):
pattern = re_persistent_name
def make_builtin_datasets():
datasets = {}
for site in ['isc', 'geofon', 'gcmt']:
for magnitude_min in [4.0, 5.0, 6.0, 7.0]:
name = 'events-%s-m%g' % (site, magnitude_min)
datasets[name] = Dataset(
sources=[
CatalogSource(
catalog=site,
query_args=dict(magmin=magnitude_min))],
comment='Event catalog: %s, minimum magnitude: %g' % (
site, magnitude_min))
for site, network, cha in [
('bgr', 'gr', 'lh'),
('up', None, None)]:
name = 'fdsn-' + '-'.join(
x for x in (site, network, cha) if x is not None)
query_args = {}
comments = ['FDSN: %s' % site]
if network is not None:
query_args['network'] = network.upper()
comments.append('network: %s' % query_args['network'])
if cha is not None:
query_args['channel'] = cha.upper() + '?'
comments.append('channels: %s' % query_args['channel'])
datasets[name] = Dataset(
sources=[FDSNSource(site=site, query_args=query_args)],
comment=', '.join(comments))
from pyrocko import gato
datasets['gato-named-arrays'] = gato.get_named_arrays_dataset()
arrays = gato.get_named_arrays()
for aname in sorted(arrays.keys()):
datasets['gato-%s' % aname] = gato.get_named_arrays_dataset(aname)
return datasets
g_builtin_datasets = None
def get_builtin_datasets():
global g_builtin_datasets
g_builtin_datasets = make_builtin_datasets()
return g_builtin_datasets
[docs]class Dataset(HasPaths):
'''
Dataset description.
'''
sources = List.T(Source.T())
operators = List.T(Operator.T())
comment = String.T(optional=True)
def setup(self, squirrel, check=True):
for source in self.sources:
squirrel.add_source(
source, check=check)
for operator in self.operators:
squirrel.add_operator(operator)
squirrel.update_operator_mappings()
[docs]def read_dataset(path):
'''
Read dataset description file.
'''
if path.startswith(':'):
name = path[1:]
datasets = get_builtin_datasets()
try:
return datasets[name]
except KeyError:
raise SquirrelError(
('No dataset name given. '
if not name else 'Named dataset not found: %s' % name) +
'\n Use `squirrel dataset` to get information about '
'available datasets. Available:\n'
' %s' % '\n '.join(
sorted(datasets.keys())))
try:
dataset = load(filename=path)
except OSError:
raise SquirrelError(
'Cannot read dataset file: %s' % path)
if not isinstance(dataset, Dataset):
raise SquirrelError('Invalid dataset file "%s".' % path)
dataset.set_basepath(op.dirname(path) or '.')
return dataset
__all__ = [
'PersistentID',
'Dataset',
'read_dataset',
]