1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import logging 

7 

8logger = logging.getLogger('psq.cache') 

9 

10 

11class ContentCache(object): 

12 

13 ''' 

14 Simple memory cache for file contents. 

15 

16 Squirrel manages data in small entities: nuts. Only the meta-data for each 

17 nut is stored in the database, content data has to be read from file. This 

18 cache helps to speed up data access for typical seismological access 

19 patterns. 

20 

21 Content data for stations, channels and instrument responses is small in 

22 size but slow to parse so it makes sense to cache these indefinitely once 

23 read. Also, it is usually inefficient to read a single station from a 

24 station file, so it is better to cache the contents of the complete file 

25 even if only one station is requested (it is likely that other stations 

26 from that file will be used anyway). 

27 

28 Content data for waveforms is large in size and we usually want to free the 

29 memory allocated for them after processing. Typical processing schemes 

30 require batches of waveforms to be available together (e.g. 

31 cross-correlations between pairs of stations) and there may be overlap 

32 between successive batches (e.g. sliding window processing schemes). 

33 

34 This cache implementation uses named accessors and batch window counting 

35 for flexible content caching. Loaded contents are held in memory as long as 

36 an accessor is holding a reference to it. For each accessor a batch counter 

37 is maintained, which starts at 0 and is incremented using calls to 

38 :py:meth:`advance_accessor`. Content accesses are tracked with calls to 

39 :py:meth:`get`, which sets a "last access" attribute on the cached item to 

40 the current value of the batch counter (each accessor has its own last 

41 access attribute on the items it uses). References to items which have 

42 not been accessed during the latest batch by the accessor in question are 

43 released during :py:meth:`advance_accessor`. :py:meth:`put` inserts new 

44 items into the cache. :py:meth:`has` checks if there already is content 

45 cached for a given item. To remove all references held by a given accessor, 

46 :py:meth:`clear_accessor` can be called. 

47 

48 **Example usage** 

49 

50 For meta-data content to be cached indefinitely, no calls to 

51 :py:meth:`advance_accessor` or :py:meth:`clear_accessor` should be made. 

52 For waveform content one would call :py:meth:`advance_accessor` after each 

53 move of a sliding window or :py:meth:`clear_accessor` after each processed 

54 event. For a process requiring data from two independent positions of 

55 extraction, e.g. for cross-correlations between all possible pairs of a set 

56 of events, two separate accessor names could be used. 

57 ''' 

58 

59 def __init__(self): 

60 self._entries = {} 

61 self._accessor_ticks = {} 

62 

63 def _prune_outdated(self, path, segment, nut_mtime): 

64 try: 

65 cache_mtime = self._entries[path, segment][0] 

66 except KeyError: 

67 return 

68 

69 if cache_mtime != nut_mtime: 

70 logger.debug('Forgetting (outdated): %s %s' % (path, segment)) 

71 del self._entries[path, segment] 

72 

73 def put(self, nut): 

74 ''' 

75 Insert a new/updated item into cache. 

76 

77 :param nut: 

78 Content item with attached data object. 

79 :type nut: 

80 :py:class:`~pyrocko.squirrel.model.Nut` 

81 ''' 

82 path, segment, element, mtime = nut.key 

83 self._prune_outdated(path, segment, nut.file_mtime) 

84 

85 if (path, segment) not in self._entries: 

86 self._entries[path, segment] = nut.file_mtime, {}, {} 

87 

88 self._entries[path, segment][1][element] = nut.content 

89 

90 def get(self, nut, accessor='default'): 

91 ''' 

92 Get a content item and track its access. 

93 

94 :param nut: 

95 Content item. 

96 :type nut: 

97 :py:class:`~pyrocko.squirrel.model.Nut` 

98 

99 :param accessor: 

100 Name of accessing consumer. Giving a new name initializes a new 

101 accessor. 

102 :type accessor: 

103 str 

104 

105 :returns: 

106 Content data object 

107 ''' 

108 path, segment, element, mtime = nut.key 

109 entry = self._entries[path, segment] 

110 

111 if accessor not in self._accessor_ticks: 

112 self._accessor_ticks[accessor] = 0 

113 

114 entry[2][accessor] = self._accessor_ticks[accessor] 

115 

116 return entry[1][element] 

117 

118 def has(self, nut): 

119 ''' 

120 Check if item's content is currently in cache. 

121 

122 :param nut: 

123 Content item. 

124 :type nut: 

125 :py:class:`~pyrocko.squirrel.model.Nut` 

126 

127 :returns: 

128 :py:class:`bool` 

129 

130 ''' 

131 path, segment, element, nut_mtime = nut.key 

132 

133 try: 

134 entry = self._entries[path, segment] 

135 cache_mtime = entry[0] 

136 entry[1][element] 

137 except KeyError: 

138 return False 

139 

140 return cache_mtime == nut_mtime 

141 

142 def advance_accessor(self, accessor='default'): 

143 ''' 

144 Increment batch counter of an accessor. 

145 

146 :param accessor: 

147 Name of accessing consumer. Giving a new name initializes a new 

148 accessor. 

149 :type accessor: 

150 str 

151 ''' 

152 if accessor not in self._accessor_ticks: 

153 self._accessor_ticks[accessor] = 0 

154 

155 ta = self._accessor_ticks[accessor] 

156 

157 delete = [] 

158 for path_segment, entry in self._entries.items(): 

159 t = entry[2].get(accessor, ta) 

160 if t < ta: 

161 del entry[2][accessor] 

162 if not entry[2]: 

163 delete.append(path_segment) 

164 

165 for path_segment in delete: 

166 logger.debug('Forgetting (advance): %s %s' % path_segment) 

167 del self._entries[path_segment] 

168 

169 self._accessor_ticks[accessor] += 1 

170 

171 def clear_accessor(self, accessor='default'): 

172 ''' 

173 Clear all references held by an accessor. 

174 

175 :param accessor: 

176 Name of accessing consumer. 

177 :type accessor: 

178 str 

179 ''' 

180 delete = [] 

181 for path_segment, entry in self._entries.items(): 

182 entry[2].pop(accessor, None) 

183 if not entry[2]: 

184 delete.append(path_segment) 

185 

186 for path_segment in delete: 

187 logger.debug('Forgetting (clear): %s %s' % path_segment) 

188 del self._entries[path_segment] 

189 

190 del self._accessor_ticks[accessor] 

191 

192 def clear(self): 

193 ''' 

194 Empty the cache. 

195 ''' 

196 for accessor in list(self._accessor_ticks.keys()): 

197 self.clear_accessor(accessor) 

198 

199 self._entries = {} 

200 self._accessor_ticks = {}