Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/dataset.py: 45%

55 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-01-03 09:20 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Portable dataset description. 

8 

9The :py:class:`Dataset` class defines sets of local and remote data-sources to 

10be used in combination in Squirrel-based programs. By convention, 

11Squirrel-based programs accept the ``--dataset`` option to read such dataset 

12descriptions from file. To add a dataset programmatically, to a 

13:py:class:`~pyrocko.squirrel.base.Squirrel` instance, use 

14:py:meth:`~pyrocko.squirrel.base.Squirrel.add_dataset`. 

15''' 

16 

17import os.path as op 

18import logging 

19 

20from pyrocko.guts import List, load, StringPattern, String 

21 

22from ..has_paths import HasPaths 

23from .client.base import Source 

24from .client.catalog import CatalogSource 

25from .client.fdsn import FDSNSource 

26from .error import SquirrelError 

27from .selection import re_persistent_name 

28from .operators.base import Operator 

29 

30guts_prefix = 'squirrel' 

31 

32logger = logging.getLogger('psq.dataset') 

33 

34 

35class PersistentID(StringPattern): 

36 pattern = re_persistent_name 

37 

38 

39def make_builtin_datasets(): 

40 datasets = {} 

41 for site in ['isc', 'geofon', 'gcmt']: 

42 for magnitude_min in [4.0, 5.0, 6.0, 7.0]: 

43 name = 'events-%s-m%g' % (site, magnitude_min) 

44 datasets[name] = Dataset( 

45 sources=[ 

46 CatalogSource( 

47 catalog=site, 

48 query_args=dict(magmin=magnitude_min))], 

49 comment='Event catalog: %s, minimum magnitude: %g' % ( 

50 site, magnitude_min)) 

51 

52 for site, network, cha in [ 

53 ('bgr', 'gr', 'lh')]: 

54 name = 'fdsn-%s-%s-%s' % (site, network, cha) 

55 cha = cha.upper() + '?' 

56 network = network.upper() 

57 datasets[name] = Dataset( 

58 sources=[ 

59 FDSNSource( 

60 site=site, 

61 query_args=dict(network=network, channel=cha))], 

62 comment='FDSN: %s, network: %s, ' 

63 'channels: %s' % (site, network, cha)) 

64 

65 return datasets 

66 

67 

68g_builtin_datasets = None 

69 

70 

71def get_builtin_datasets(): 

72 global g_builtin_datasets 

73 g_builtin_datasets = make_builtin_datasets() 

74 return g_builtin_datasets 

75 

76 

77class Dataset(HasPaths): 

78 ''' 

79 Dataset description. 

80 ''' 

81 sources = List.T(Source.T()) 

82 operators = List.T(Operator.T()) 

83 comment = String.T(optional=True) 

84 

85 def setup(self, squirrel, check=True): 

86 for source in self.sources: 

87 squirrel.add_source( 

88 source, check=check) 

89 

90 for operator in self.operators: 

91 squirrel.add_operator(operator) 

92 

93 squirrel.update_operator_mappings() 

94 

95 

96def read_dataset(path): 

97 ''' 

98 Read dataset description file. 

99 ''' 

100 

101 if path.startswith(':'): 

102 name = path[1:] 

103 datasets = get_builtin_datasets() 

104 try: 

105 return datasets[name] 

106 except KeyError: 

107 raise SquirrelError( 

108 ('No dataset name given. ' 

109 if not name else 'Named dataset not found: %s' % name) + 

110 '\n Use `squirrel dataset` to get information about ' 

111 'available datasets. Available:\n' 

112 ' %s' % '\n '.join( 

113 sorted(datasets.keys()))) 

114 

115 try: 

116 dataset = load(filename=path) 

117 except OSError: 

118 raise SquirrelError( 

119 'Cannot read dataset file: %s' % path) 

120 

121 if not isinstance(dataset, Dataset): 

122 raise SquirrelError('Invalid dataset file "%s".' % path) 

123 

124 dataset.set_basepath(op.dirname(path) or '.') 

125 return dataset 

126 

127 

128__all__ = [ 

129 'PersistentID', 

130 'Dataset', 

131 'read_dataset', 

132]