1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6from __future__ import absolute_import, print_function
8import os.path as op
9import logging
11from pyrocko.guts import List, load, StringPattern
13from ..has_paths import HasPaths
14from .client.base import Source
15from .error import SquirrelError
16from .selection import re_persistent_name
17from .operators.base import Operator
19guts_prefix = 'squirrel'
21logger = logging.getLogger('psq.dataset')
24class PersistentID(StringPattern):
25 pattern = re_persistent_name
28class Dataset(HasPaths):
29 '''
30 Dataset description.
31 '''
32 sources = List.T(Source.T())
33 persistent = PersistentID.T(optional=True)
34 operators = List.T(Operator.T())
36 def setup(self, squirrel, check=True):
37 for source in self.sources:
38 squirrel.add_source(
39 source, check=check)
41 for operator in self.operators:
42 squirrel.add_operator(operator)
44 squirrel.update_operator_mappings()
46 def get_squirrel(
47 self,
48 update=False,
49 check=True,
50 how_to_update='Avoiding dataset rescan. '
51 'Enable updating to force refresh or delete the '
52 'persistent selection for a clean start.'):
54 from pyrocko.squirrel import base
55 squirrel = base.Squirrel(persistent=self.persistent)
57 if self.persistent and not squirrel.is_new():
58 if not update:
59 logger.info(
60 'Using existing persistent selection: %s'
61 % self.persistent)
62 logger.info(how_to_update)
63 return squirrel
65 else:
66 logger.info(
67 'Updating existing persistent selection: %s'
68 % self.persistent)
70 squirrel.add_dataset(self, check=check)
72 return squirrel
75def read_dataset(path):
76 '''
77 Read dataset description file.
78 '''
79 try:
80 dataset = load(filename=path)
81 except OSError:
82 raise SquirrelError(
83 'Cannot read dataset file: %s' % path)
85 if not isinstance(dataset, Dataset):
86 raise SquirrelError('Invalid dataset file "%s".' % path)
88 dataset.set_basepath(op.dirname(path) or '.')
89 return dataset
92def from_dataset(path, update=False, check=True):
93 ds = read_dataset(path)
94 return ds.get_squirrel(update=update, check=check)
97__all__ = [
98 'PersistentID',
99 'Dataset',
100 'read_dataset',
101 'from_dataset'
102]