1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import logging 

7 

8from pyrocko.guts import Object, Int 

9 

10logger = logging.getLogger('psq.cache') 

11 

12 

13class ContentCacheStats(Object): 

14 ''' 

15 Information about cache state. 

16 ''' 

17 nentries = Int.T( 

18 help='Number of items in the cache.') 

19 naccessors = Int.T( 

20 help='Number of accessors currently holding references to cache ' 

21 'items.') 

22 

23 

24class ContentCache(object): 

25 

26 ''' 

27 Simple memory cache for file contents. 

28 

29 Squirrel manages data in small entities: nuts. Only the meta-data for each 

30 nut is stored in the database, content data has to be read from file. This 

31 cache helps to speed up data access for typical seismological access 

32 patterns. 

33 

34 Content data for stations, channels and instrument responses is small in 

35 size but slow to parse so it makes sense to cache these indefinitely once 

36 read. Also, it is usually inefficient to read a single station from a 

37 station file, so it is better to cache the contents of the complete file 

38 even if only one station is requested (it is likely that other stations 

39 from that file will be used anyway). 

40 

41 Content data for waveforms is large in size and we usually want to free the 

42 memory allocated for them after processing. Typical processing schemes 

43 require batches of waveforms to be available together (e.g. 

44 cross-correlations between pairs of stations) and there may be overlap 

45 between successive batches (e.g. sliding window processing schemes). 

46 

47 This cache implementation uses named accessors and batch window counting 

48 for flexible content caching. Loaded contents are held in memory as long as 

49 an accessor is holding a reference to it. For each accessor a batch counter 

50 is maintained, which starts at 0 and is incremented using calls to 

51 :py:meth:`advance_accessor`. Content accesses are tracked with calls to 

52 :py:meth:`get`, which sets a "last access" attribute on the cached item to 

53 the current value of the batch counter (each accessor has its own last 

54 access attribute on the items it uses). References to items which have 

55 not been accessed during the latest batch by the accessor in question are 

56 released during :py:meth:`advance_accessor`. :py:meth:`put` inserts new 

57 items into the cache. :py:meth:`has` checks if there already is content 

58 cached for a given item. To remove all references held by a given accessor, 

59 :py:meth:`clear_accessor` can be called. 

60 

61 **Example usage** 

62 

63 For meta-data content to be cached indefinitely, no calls to 

64 :py:meth:`advance_accessor` or :py:meth:`clear_accessor` should be made. 

65 For waveform content one would call :py:meth:`advance_accessor` after each 

66 move of a sliding window or :py:meth:`clear_accessor` after each processed 

67 event. For a process requiring data from two independent positions of 

68 extraction, e.g. for cross-correlations between all possible pairs of a set 

69 of events, two separate accessor names could be used. 

70 ''' 

71 

72 def __init__(self): 

73 self._entries = {} 

74 self._accessor_ticks = {} 

75 

76 def _prune_outdated(self, path, segment, nut_mtime): 

77 try: 

78 cache_mtime = self._entries[path, segment][0] 

79 except KeyError: 

80 return 

81 

82 if cache_mtime != nut_mtime: 

83 logger.debug('Forgetting (outdated): %s %s' % (path, segment)) 

84 del self._entries[path, segment] 

85 

86 def put(self, nut): 

87 ''' 

88 Insert a new/updated item into cache. 

89 

90 :param nut: 

91 Content item with attached data object. 

92 :type nut: 

93 :py:class:`~pyrocko.squirrel.model.Nut` 

94 ''' 

95 path, segment, element, mtime = nut.key 

96 self._prune_outdated(path, segment, nut.file_mtime) 

97 

98 if (path, segment) not in self._entries: 

99 self._entries[path, segment] = nut.file_mtime, {}, {} 

100 

101 self._entries[path, segment][1][element] = nut 

102 

103 def get(self, nut, accessor='default', model='squirrel'): 

104 ''' 

105 Get a content item and track its access. 

106 

107 :param nut: 

108 Content item. 

109 :type nut: 

110 :py:class:`~pyrocko.squirrel.model.Nut` 

111 

112 :param accessor: 

113 Name of accessing consumer. Giving a new name initializes a new 

114 accessor. 

115 :type accessor: 

116 str 

117 

118 :returns: 

119 Content data object 

120 ''' 

121 path, segment, element, mtime = nut.key 

122 entry = self._entries[path, segment] 

123 

124 if accessor not in self._accessor_ticks: 

125 self._accessor_ticks[accessor] = 0 

126 

127 entry[2][accessor] = self._accessor_ticks[accessor] 

128 el = entry[1][element] 

129 

130 if model == 'squirrel': 

131 return el.content 

132 elif model.endswith('+'): 

133 return el.content, el.raw_content[model[:-1]] 

134 else: 

135 return el.raw_content[model] 

136 

137 def has(self, nut): 

138 ''' 

139 Check if item's content is currently in cache. 

140 

141 :param nut: 

142 Content item. 

143 :type nut: 

144 :py:class:`~pyrocko.squirrel.model.Nut` 

145 

146 :returns: 

147 :py:class:`bool` 

148 

149 ''' 

150 path, segment, element, nut_mtime = nut.key 

151 

152 try: 

153 entry = self._entries[path, segment] 

154 cache_mtime = entry[0] 

155 entry[1][element] 

156 except KeyError: 

157 return False 

158 

159 return cache_mtime == nut_mtime 

160 

161 def advance_accessor(self, accessor='default'): 

162 ''' 

163 Increment batch counter of an accessor. 

164 

165 :param accessor: 

166 Name of accessing consumer. Giving a new name initializes a new 

167 accessor. 

168 :type accessor: 

169 str 

170 ''' 

171 if accessor not in self._accessor_ticks: 

172 self._accessor_ticks[accessor] = 0 

173 

174 ta = self._accessor_ticks[accessor] 

175 

176 delete = [] 

177 for path_segment, entry in self._entries.items(): 

178 t = entry[2].get(accessor, ta) 

179 if t < ta: 

180 del entry[2][accessor] 

181 if not entry[2]: 

182 delete.append(path_segment) 

183 

184 for path_segment in delete: 

185 logger.debug('Forgetting (advance): %s %s' % path_segment) 

186 del self._entries[path_segment] 

187 

188 self._accessor_ticks[accessor] += 1 

189 

190 def clear_accessor(self, accessor='default'): 

191 ''' 

192 Clear all references held by an accessor. 

193 

194 :param accessor: 

195 Name of accessing consumer. 

196 :type accessor: 

197 str 

198 ''' 

199 delete = [] 

200 for path_segment, entry in self._entries.items(): 

201 entry[2].pop(accessor, None) 

202 if not entry[2]: 

203 delete.append(path_segment) 

204 

205 for path_segment in delete: 

206 logger.debug('Forgetting (clear): %s %s' % path_segment) 

207 del self._entries[path_segment] 

208 

209 del self._accessor_ticks[accessor] 

210 

211 def clear(self): 

212 ''' 

213 Empty the cache. 

214 ''' 

215 for accessor in list(self._accessor_ticks.keys()): 

216 self.clear_accessor(accessor) 

217 

218 self._entries = {} 

219 self._accessor_ticks = {} 

220 

221 def get_stats(self): 

222 ''' 

223 Get information about cache state. 

224 

225 :returns: :py:class:`ContentCacheStats` object. 

226 ''' 

227 return ContentCacheStats( 

228 nentries=len(self._entries), 

229 naccessors=len(self._accessor_ticks))