1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import logging
8from pyrocko.guts import Object, Int
10logger = logging.getLogger('psq.cache')
13class ContentCacheStats(Object):
14 '''
15 Information about cache state.
16 '''
17 nentries = Int.T(
18 help='Number of items in the cache.')
19 naccessors = Int.T(
20 help='Number of accessors currently holding references to cache '
21 'items.')
24class ContentCache(object):
26 '''
27 Simple memory cache for file contents.
29 Squirrel manages data in small entities: nuts. Only the meta-data for each
30 nut is stored in the database, content data has to be read from file. This
31 cache helps to speed up data access for typical seismological access
32 patterns.
34 Content data for stations, channels and instrument responses is small in
35 size but slow to parse so it makes sense to cache these indefinitely once
36 read. Also, it is usually inefficient to read a single station from a
37 station file, so it is better to cache the contents of the complete file
38 even if only one station is requested (it is likely that other stations
39 from that file will be used anyway).
41 Content data for waveforms is large in size and we usually want to free the
42 memory allocated for them after processing. Typical processing schemes
43 require batches of waveforms to be available together (e.g.
44 cross-correlations between pairs of stations) and there may be overlap
45 between successive batches (e.g. sliding window processing schemes).
47 This cache implementation uses named accessors and batch window counting
48 for flexible content caching. Loaded contents are held in memory as long as
49 an accessor is holding a reference to it. For each accessor a batch counter
50 is maintained, which starts at 0 and is incremented using calls to
51 :py:meth:`advance_accessor`. Content accesses are tracked with calls to
52 :py:meth:`get`, which sets a "last access" attribute on the cached item to
53 the current value of the batch counter (each accessor has its own last
54 access attribute on the items it uses). References to items which have
55 not been accessed during the latest batch by the accessor in question are
56 released during :py:meth:`advance_accessor`. :py:meth:`put` inserts new
57 items into the cache. :py:meth:`has` checks if there already is content
58 cached for a given item. To remove all references held by a given accessor,
59 :py:meth:`clear_accessor` can be called.
61 **Example usage**
63 For meta-data content to be cached indefinitely, no calls to
64 :py:meth:`advance_accessor` or :py:meth:`clear_accessor` should be made.
65 For waveform content one would call :py:meth:`advance_accessor` after each
66 move of a sliding window or :py:meth:`clear_accessor` after each processed
67 event. For a process requiring data from two independent positions of
68 extraction, e.g. for cross-correlations between all possible pairs of a set
69 of events, two separate accessor names could be used.
70 '''
72 def __init__(self):
73 self._entries = {}
74 self._accessor_ticks = {}
76 def _prune_outdated(self, path, segment, nut_mtime):
77 try:
78 cache_mtime = self._entries[path, segment][0]
79 except KeyError:
80 return
82 if cache_mtime != nut_mtime:
83 logger.debug('Forgetting (outdated): %s %s' % (path, segment))
84 del self._entries[path, segment]
86 def put(self, nut):
87 '''
88 Insert a new/updated item into cache.
90 :param nut:
91 Content item with attached data object.
92 :type nut:
93 :py:class:`~pyrocko.squirrel.model.Nut`
94 '''
95 path, segment, element, mtime = nut.key
96 self._prune_outdated(path, segment, nut.file_mtime)
98 if (path, segment) not in self._entries:
99 self._entries[path, segment] = nut.file_mtime, {}, {}
101 self._entries[path, segment][1][element] = nut
103 def get(self, nut, accessor='default', model='squirrel'):
104 '''
105 Get a content item and track its access.
107 :param nut:
108 Content item.
109 :type nut:
110 :py:class:`~pyrocko.squirrel.model.Nut`
112 :param accessor:
113 Name of accessing consumer. Giving a new name initializes a new
114 accessor.
115 :type accessor:
116 str
118 :returns:
119 Content data object
120 '''
121 path, segment, element, mtime = nut.key
122 entry = self._entries[path, segment]
124 if accessor not in self._accessor_ticks:
125 self._accessor_ticks[accessor] = 0
127 entry[2][accessor] = self._accessor_ticks[accessor]
129 if model == 'squirrel':
130 return entry[1][element].content
131 else:
132 return entry[1][element].raw_content[model]
134 def has(self, nut):
135 '''
136 Check if item's content is currently in cache.
138 :param nut:
139 Content item.
140 :type nut:
141 :py:class:`~pyrocko.squirrel.model.Nut`
143 :returns:
144 :py:class:`bool`
146 '''
147 path, segment, element, nut_mtime = nut.key
149 try:
150 entry = self._entries[path, segment]
151 cache_mtime = entry[0]
152 entry[1][element]
153 except KeyError:
154 return False
156 return cache_mtime == nut_mtime
158 def advance_accessor(self, accessor='default'):
159 '''
160 Increment batch counter of an accessor.
162 :param accessor:
163 Name of accessing consumer. Giving a new name initializes a new
164 accessor.
165 :type accessor:
166 str
167 '''
168 if accessor not in self._accessor_ticks:
169 self._accessor_ticks[accessor] = 0
171 ta = self._accessor_ticks[accessor]
173 delete = []
174 for path_segment, entry in self._entries.items():
175 t = entry[2].get(accessor, ta)
176 if t < ta:
177 del entry[2][accessor]
178 if not entry[2]:
179 delete.append(path_segment)
181 for path_segment in delete:
182 logger.debug('Forgetting (advance): %s %s' % path_segment)
183 del self._entries[path_segment]
185 self._accessor_ticks[accessor] += 1
187 def clear_accessor(self, accessor='default'):
188 '''
189 Clear all references held by an accessor.
191 :param accessor:
192 Name of accessing consumer.
193 :type accessor:
194 str
195 '''
196 delete = []
197 for path_segment, entry in self._entries.items():
198 entry[2].pop(accessor, None)
199 if not entry[2]:
200 delete.append(path_segment)
202 for path_segment in delete:
203 logger.debug('Forgetting (clear): %s %s' % path_segment)
204 del self._entries[path_segment]
206 del self._accessor_ticks[accessor]
208 def clear(self):
209 '''
210 Empty the cache.
211 '''
212 for accessor in list(self._accessor_ticks.keys()):
213 self.clear_accessor(accessor)
215 self._entries = {}
216 self._accessor_ticks = {}
218 def get_stats(self):
219 '''
220 Get information about cache state.
222 :returns: :py:class:`ContentCacheStats` object.
223 '''
224 return ContentCacheStats(
225 nentries=len(self._entries),
226 naccessors=len(self._accessor_ticks))