Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/cache.py: 87%

75 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-03-07 11:54 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel memory cacheing. 

8''' 

9 

10import logging 

11 

12from pyrocko.guts import Object, Int 

13 

14logger = logging.getLogger('psq.cache') 

15 

16 

17class ContentCacheStats(Object): 

18 ''' 

19 Information about cache state. 

20 ''' 

21 nentries = Int.T( 

22 help='Number of items in the cache.') 

23 naccessors = Int.T( 

24 help='Number of accessors currently holding references to cache ' 

25 'items.') 

26 

27 

28class ContentCache(object): 

29 

30 ''' 

31 Simple memory cache for file contents. 

32 

33 Squirrel manages data in small entities: nuts. Only the meta-data for each 

34 nut is stored in the database, content data has to be read from file. This 

35 cache helps to speed up data access for typical seismological access 

36 patterns. 

37 

38 Content data for stations, channels and instrument responses is small in 

39 size but slow to parse so it makes sense to cache these indefinitely once 

40 read. Also, it is usually inefficient to read a single station from a 

41 station file, so it is better to cache the contents of the complete file 

42 even if only one station is requested (it is likely that other stations 

43 from that file will be used anyway). 

44 

45 Content data for waveforms is large in size and we usually want to free the 

46 memory allocated for them after processing. Typical processing schemes 

47 require batches of waveforms to be available together (e.g. 

48 cross-correlations between pairs of stations) and there may be overlap 

49 between successive batches (e.g. sliding window processing schemes). 

50 

51 This cache implementation uses named accessors and batch window counting 

52 for flexible content caching. Loaded contents are held in memory as long as 

53 an accessor is holding a reference to it. For each accessor a batch counter 

54 is maintained, which starts at 0 and is incremented using calls to 

55 :py:meth:`advance_accessor`. Content accesses are tracked with calls to 

56 :py:meth:`get`, which sets a "last access" attribute on the cached item to 

57 the current value of the batch counter (each accessor has its own last 

58 access attribute on the items it uses). References to items which have 

59 not been accessed during the latest batch by the accessor in question are 

60 released during :py:meth:`advance_accessor`. :py:meth:`put` inserts new 

61 items into the cache. :py:meth:`has` checks if there already is content 

62 cached for a given item. To remove all references held by a given accessor, 

63 :py:meth:`clear_accessor` can be called. 

64 

65 **Example usage** 

66 

67 For meta-data content to be cached indefinitely, no calls to 

68 :py:meth:`advance_accessor` or :py:meth:`clear_accessor` should be made. 

69 For waveform content one would call :py:meth:`advance_accessor` after each 

70 move of a sliding window or :py:meth:`clear_accessor` after each processed 

71 event. For a process requiring data from two independent positions of 

72 extraction, e.g. for cross-correlations between all possible pairs of a set 

73 of events, two separate accessor names could be used. 

74 ''' 

75 

76 def __init__(self): 

77 self._entries = {} 

78 self._accessor_ticks = {} 

79 

80 def _prune_outdated(self, path, segment, nut_mtime): 

81 try: 

82 cache_mtime = self._entries[path, segment][0] 

83 except KeyError: 

84 return 

85 

86 if cache_mtime != nut_mtime: 

87 logger.debug('Forgetting (outdated): %s %s' % (path, segment)) 

88 self._entries.pop([path, segment], None) 

89 

90 def put(self, nut): 

91 ''' 

92 Insert a new/updated item into cache. 

93 

94 :param nut: 

95 Content item with attached data object. 

96 :type nut: 

97 :py:class:`~pyrocko.squirrel.model.Nut` 

98 ''' 

99 path, segment, element, mtime = nut.key 

100 self._prune_outdated(path, segment, nut.file_mtime) 

101 if (path, segment) not in self._entries.copy(): 

102 self._entries[path, segment] = nut.file_mtime, {}, {} 

103 

104 self._entries[path, segment][1][element] = nut 

105 

106 def get(self, nut, accessor='default', model='squirrel'): 

107 ''' 

108 Get a content item and track its access. 

109 

110 :param nut: 

111 Content item. 

112 :type nut: 

113 :py:class:`~pyrocko.squirrel.model.Nut` 

114 

115 :param accessor: 

116 Name of accessing consumer. Giving a new name initializes a new 

117 accessor. 

118 :type accessor: 

119 str 

120 

121 :returns: 

122 Content data object 

123 ''' 

124 path, segment, element, mtime = nut.key 

125 entry = self._entries[path, segment] 

126 

127 if accessor not in self._accessor_ticks: 

128 self._accessor_ticks[accessor] = 0 

129 

130 entry[2][accessor] = self._accessor_ticks[accessor] 

131 el = entry[1][element] 

132 

133 if model == 'squirrel': 

134 return el.content 

135 elif model.endswith('+'): 

136 return el.content, el.raw_content[model[:-1]] 

137 else: 

138 return el.raw_content[model] 

139 

140 def has(self, nut): 

141 ''' 

142 Check if item's content is currently in cache. 

143 

144 :param nut: 

145 Content item. 

146 :type nut: 

147 :py:class:`~pyrocko.squirrel.model.Nut` 

148 

149 :returns: 

150 :py:class:`bool` 

151 

152 ''' 

153 path, segment, element, nut_mtime = nut.key 

154 

155 try: 

156 entry = self._entries[path, segment] 

157 cache_mtime = entry[0] 

158 entry[1][element] 

159 except KeyError: 

160 return False 

161 

162 return cache_mtime == nut_mtime 

163 

164 def advance_accessor(self, accessor='default'): 

165 ''' 

166 Increment batch counter of an accessor. 

167 

168 :param accessor: 

169 Name of accessing consumer. Giving a new name initializes a new 

170 accessor. 

171 :type accessor: 

172 str 

173 ''' 

174 if accessor not in self._accessor_ticks: 

175 self._accessor_ticks[accessor] = 0 

176 

177 ta = self._accessor_ticks[accessor] 

178 

179 for path_segment, entry in self._entries.copy().items(): 

180 t = entry[2].get(accessor, ta) 

181 if t < ta: 

182 del entry[2][accessor] 

183 if not entry[2]: 

184 logger.debug('Forgetting (clear): %s %s' % path_segment) 

185 self._entries.pop(path_segment, None) 

186 

187 self._accessor_ticks[accessor] += 1 

188 

189 def clear_accessor(self, accessor='default'): 

190 ''' 

191 Clear all references held by an accessor. 

192 

193 :param accessor: 

194 Name of accessing consumer. 

195 :type accessor: 

196 str 

197 ''' 

198 for path_segment, entry in self._entries.copy().items(): 

199 entry[2].pop(accessor, None) 

200 if not entry[2]: 

201 logger.debug('Forgetting (clear): %s %s' % path_segment) 

202 self._entries.pop(path_segment, None) 

203 

204 try: 

205 del self._accessor_ticks[accessor] 

206 except KeyError: 

207 pass 

208 

209 def clear(self): 

210 ''' 

211 Empty the cache. 

212 ''' 

213 for accessor in list(self._accessor_ticks.keys()): 

214 self.clear_accessor(accessor) 

215 

216 self._entries = {} 

217 self._accessor_ticks = {} 

218 

219 def get_stats(self): 

220 ''' 

221 Get information about cache state. 

222 

223 :returns: :py:class:`ContentCacheStats` object. 

224 ''' 

225 return ContentCacheStats( 

226 nentries=len(self._entries), 

227 naccessors=len(self._accessor_ticks))