Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/squirrel/cache.py: 88%

81 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-06 15:01 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Squirrel memory cacheing. 

8''' 

9 

10import logging 

11 

12from pyrocko.guts import Object, Int 

13 

14logger = logging.getLogger('psq.cache') 

15 

16 

17class ContentCacheStats(Object): 

18 ''' 

19 Information about cache state. 

20 ''' 

21 nentries = Int.T( 

22 help='Number of items in the cache.') 

23 naccessors = Int.T( 

24 help='Number of accessors currently holding references to cache ' 

25 'items.') 

26 

27 

28class ContentCache(object): 

29 

30 ''' 

31 Simple memory cache for file contents. 

32 

33 Squirrel manages data in small entities: nuts. Only the meta-data for each 

34 nut is stored in the database, content data has to be read from file. This 

35 cache helps to speed up data access for typical seismological access 

36 patterns. 

37 

38 Content data for stations, channels and instrument responses is small in 

39 size but slow to parse so it makes sense to cache these indefinitely once 

40 read. Also, it is usually inefficient to read a single station from a 

41 station file, so it is better to cache the contents of the complete file 

42 even if only one station is requested (it is likely that other stations 

43 from that file will be used anyway). 

44 

45 Content data for waveforms is large in size and we usually want to free the 

46 memory allocated for them after processing. Typical processing schemes 

47 require batches of waveforms to be available together (e.g. 

48 cross-correlations between pairs of stations) and there may be overlap 

49 between successive batches (e.g. sliding window processing schemes). 

50 

51 This cache implementation uses named accessors and batch window counting 

52 for flexible content caching. Loaded contents are held in memory as long as 

53 an accessor is holding a reference to it. For each accessor a batch counter 

54 is maintained, which starts at 0 and is incremented using calls to 

55 :py:meth:`advance_accessor`. Content accesses are tracked with calls to 

56 :py:meth:`get`, which sets a "last access" attribute on the cached item to 

57 the current value of the batch counter (each accessor has its own last 

58 access attribute on the items it uses). References to items which have 

59 not been accessed during the latest batch by the accessor in question are 

60 released during :py:meth:`advance_accessor`. :py:meth:`put` inserts new 

61 items into the cache. :py:meth:`has` checks if there already is content 

62 cached for a given item. To remove all references held by a given accessor, 

63 :py:meth:`clear_accessor` can be called. 

64 

65 **Example usage** 

66 

67 For meta-data content to be cached indefinitely, no calls to 

68 :py:meth:`advance_accessor` or :py:meth:`clear_accessor` should be made. 

69 For waveform content one would call :py:meth:`advance_accessor` after each 

70 move of a sliding window or :py:meth:`clear_accessor` after each processed 

71 event. For a process requiring data from two independent positions of 

72 extraction, e.g. for cross-correlations between all possible pairs of a set 

73 of events, two separate accessor names could be used. 

74 ''' 

75 

76 def __init__(self): 

77 self._entries = {} 

78 self._accessor_ticks = {} 

79 

80 def _prune_outdated(self, path, segment, nut_mtime): 

81 try: 

82 cache_mtime = self._entries[path, segment][0] 

83 except KeyError: 

84 return 

85 

86 if cache_mtime != nut_mtime: 

87 logger.debug('Forgetting (outdated): %s %s' % (path, segment)) 

88 del self._entries[path, segment] 

89 

90 def put(self, nut): 

91 ''' 

92 Insert a new/updated item into cache. 

93 

94 :param nut: 

95 Content item with attached data object. 

96 :type nut: 

97 :py:class:`~pyrocko.squirrel.model.Nut` 

98 ''' 

99 path, segment, element, mtime = nut.key 

100 self._prune_outdated(path, segment, nut.file_mtime) 

101 

102 if (path, segment) not in self._entries: 

103 self._entries[path, segment] = nut.file_mtime, {}, {} 

104 

105 self._entries[path, segment][1][element] = nut 

106 

107 def get(self, nut, accessor='default', model='squirrel'): 

108 ''' 

109 Get a content item and track its access. 

110 

111 :param nut: 

112 Content item. 

113 :type nut: 

114 :py:class:`~pyrocko.squirrel.model.Nut` 

115 

116 :param accessor: 

117 Name of accessing consumer. Giving a new name initializes a new 

118 accessor. 

119 :type accessor: 

120 str 

121 

122 :returns: 

123 Content data object 

124 ''' 

125 path, segment, element, mtime = nut.key 

126 entry = self._entries[path, segment] 

127 

128 if accessor not in self._accessor_ticks: 

129 self._accessor_ticks[accessor] = 0 

130 

131 entry[2][accessor] = self._accessor_ticks[accessor] 

132 el = entry[1][element] 

133 

134 if model == 'squirrel': 

135 return el.content 

136 elif model.endswith('+'): 

137 return el.content, el.raw_content[model[:-1]] 

138 else: 

139 return el.raw_content[model] 

140 

141 def has(self, nut): 

142 ''' 

143 Check if item's content is currently in cache. 

144 

145 :param nut: 

146 Content item. 

147 :type nut: 

148 :py:class:`~pyrocko.squirrel.model.Nut` 

149 

150 :returns: 

151 :py:class:`bool` 

152 

153 ''' 

154 path, segment, element, nut_mtime = nut.key 

155 

156 try: 

157 entry = self._entries[path, segment] 

158 cache_mtime = entry[0] 

159 entry[1][element] 

160 except KeyError: 

161 return False 

162 

163 return cache_mtime == nut_mtime 

164 

165 def advance_accessor(self, accessor='default'): 

166 ''' 

167 Increment batch counter of an accessor. 

168 

169 :param accessor: 

170 Name of accessing consumer. Giving a new name initializes a new 

171 accessor. 

172 :type accessor: 

173 str 

174 ''' 

175 if accessor not in self._accessor_ticks: 

176 self._accessor_ticks[accessor] = 0 

177 

178 ta = self._accessor_ticks[accessor] 

179 

180 delete = [] 

181 for path_segment, entry in self._entries.items(): 

182 t = entry[2].get(accessor, ta) 

183 if t < ta: 

184 del entry[2][accessor] 

185 if not entry[2]: 

186 delete.append(path_segment) 

187 

188 for path_segment in delete: 

189 logger.debug('Forgetting (advance): %s %s' % path_segment) 

190 del self._entries[path_segment] 

191 

192 self._accessor_ticks[accessor] += 1 

193 

194 def clear_accessor(self, accessor='default'): 

195 ''' 

196 Clear all references held by an accessor. 

197 

198 :param accessor: 

199 Name of accessing consumer. 

200 :type accessor: 

201 str 

202 ''' 

203 delete = [] 

204 for path_segment, entry in self._entries.items(): 

205 entry[2].pop(accessor, None) 

206 if not entry[2]: 

207 delete.append(path_segment) 

208 

209 for path_segment in delete: 

210 logger.debug('Forgetting (clear): %s %s' % path_segment) 

211 del self._entries[path_segment] 

212 

213 try: 

214 del self._accessor_ticks[accessor] 

215 except KeyError: 

216 pass 

217 

218 def clear(self): 

219 ''' 

220 Empty the cache. 

221 ''' 

222 for accessor in list(self._accessor_ticks.keys()): 

223 self.clear_accessor(accessor) 

224 

225 self._entries = {} 

226 self._accessor_ticks = {} 

227 

228 def get_stats(self): 

229 ''' 

230 Get information about cache state. 

231 

232 :returns: :py:class:`ContentCacheStats` object. 

233 ''' 

234 return ContentCacheStats( 

235 nentries=len(self._entries), 

236 naccessors=len(self._accessor_ticks))