1 # lru_cache.py -- Simple LRU cache for dulwich
2 # Copyright (C) 2006, 2008 Canonical Ltd
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 """A simple least-recently-used (LRU) cache."""
22 class _LRUNode(object):
23 """This maintains the linked-list which is the lru internals."""
25 __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
27 def __init__(self, key, value, cleanup=None):
29 self.next_key = _null_key
32 self.cleanup = cleanup
33 # TODO: We could compute this 'on-the-fly' like we used to, and remove
34 # one pointer from this object, we just need to decide if it
35 # actually costs us much of anything in normal usage
42 prev_key = self.prev.key
43 return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
44 self.next_key, prev_key)
46 def run_cleanup(self):
47 if self.cleanup is not None:
48 self.cleanup(self.key, self.value)
50 # Just make sure to break any refcycles, etc
54 class LRUCache(object):
55 """A class which manages a cache of entries, removing unused ones."""
57 def __init__(self, max_cache=100, after_cleanup_count=None):
59 # The "HEAD" of the lru linked list
60 self._most_recently_used = None
61 # The "TAIL" of the lru linked list
62 self._least_recently_used = None
63 self._update_max_cache(max_cache, after_cleanup_count)
65 def __contains__(self, key):
66 return key in self._cache
68 def __getitem__(self, key):
71 # Inlined from _record_access to decrease the overhead of __getitem__
72 # We also have more knowledge about structure if __getitem__ is
73 # succeeding, then we know that self._most_recently_used must not be
75 mru = self._most_recently_used
77 # Nothing to do, this node is already at the head of the queue
79 # Remove this node from the old location
81 next_key = node.next_key
82 # benchmarking shows that the lookup of _null_key in globals is faster
83 # than the attribute lookup for (node is self._least_recently_used)
84 if next_key is _null_key:
85 # 'node' is the _least_recently_used, because it doesn't have a
86 # 'next' item. So move the current lru to the previous node.
87 self._least_recently_used = node_prev
89 node_next = cache[next_key]
90 node_next.prev = node_prev
91 node_prev.next_key = next_key
92 # Insert this node at the front of the list
93 node.next_key = mru.key
95 self._most_recently_used = node
100 return len(self._cache)
103 """Walk the LRU list, only meant to be used in tests."""
104 node = self._most_recently_used
106 if node.prev is not None:
107 raise AssertionError('the _most_recently_used entry is not'
108 ' supposed to have a previous entry'
110 while node is not None:
111 if node.next_key is _null_key:
112 if node is not self._least_recently_used:
113 raise AssertionError('only the last node should have'
114 ' no next value: %s' % (node,))
117 node_next = self._cache[node.next_key]
118 if node_next.prev is not node:
119 raise AssertionError('inconsistency found, node.next.prev'
120 ' != node: %s' % (node,))
121 if node.prev is None:
122 if node is not self._most_recently_used:
123 raise AssertionError('only the _most_recently_used should'
124 ' not have a previous node: %s'
127 if node.prev.next_key != node.key:
128 raise AssertionError('inconsistency found, node.prev.next'
129 ' != node: %s' % (node,))
133 def add(self, key, value, cleanup=None):
134 """Add a new value to the cache.
136 Also, if the entry is ever removed from the cache, call
139 :param key: The key to store it under
140 :param value: The object to store
141 :param cleanup: None or a function taking (key, value) to indicate
142 'value' should be cleaned up.
145 raise ValueError('cannot use _null_key as a key')
146 if key in self._cache:
147 node = self._cache[key]
150 node.cleanup = cleanup
152 node = _LRUNode(key, value, cleanup=cleanup)
153 self._cache[key] = node
154 self._record_access(node)
156 if len(self._cache) > self._max_cache:
157 # Trigger the cleanup
160 def cache_size(self):
161 """Get the number of entries we will cache."""
162 return self._max_cache
164 def get(self, key, default=None):
165 node = self._cache.get(key, None)
168 self._record_access(node)
172 """Get the list of keys currently cached.
174 Note that values returned here may not be available by the time you
175 request them later. This is simply meant as a peak into the current
178 :return: An unordered list of keys that are currently cached.
180 return self._cache.keys()
183 """Get the key:value pairs as a dict."""
184 return dict((k, n.value) for k, n in self._cache.iteritems())
187 """Clear the cache until it shrinks to the requested size.
189 This does not completely wipe the cache, just makes sure it is under
190 the after_cleanup_count.
192 # Make sure the cache is shrunk to the correct size
193 while len(self._cache) > self._after_cleanup_count:
196 def __setitem__(self, key, value):
197 """Add a value to the cache, there will be no cleanup function."""
198 self.add(key, value, cleanup=None)
200 def _record_access(self, node):
201 """Record that key was accessed."""
202 # Move 'node' to the front of the queue
203 if self._most_recently_used is None:
204 self._most_recently_used = node
205 self._least_recently_used = node
207 elif node is self._most_recently_used:
208 # Nothing to do, this node is already at the head of the queue
210 # We've taken care of the tail pointer, remove the node, and insert it
213 if node is self._least_recently_used:
214 self._least_recently_used = node.prev
215 if node.prev is not None:
216 node.prev.next_key = node.next_key
217 if node.next_key is not _null_key:
218 node_next = self._cache[node.next_key]
219 node_next.prev = node.prev
221 node.next_key = self._most_recently_used.key
222 self._most_recently_used.prev = node
223 self._most_recently_used = node
226 def _remove_node(self, node):
227 if node is self._least_recently_used:
228 self._least_recently_used = node.prev
229 self._cache.pop(node.key)
230 # If we have removed all entries, remove the head pointer as well
231 if self._least_recently_used is None:
232 self._most_recently_used = None
234 # Now remove this node from the linked list
235 if node.prev is not None:
236 node.prev.next_key = node.next_key
237 if node.next_key is not _null_key:
238 node_next = self._cache[node.next_key]
239 node_next.prev = node.prev
240 # And remove this node's pointers
242 node.next_key = _null_key
244 def _remove_lru(self):
245 """Remove one entry from the lru, and handle consequences.
247 If there are no more references to the lru, then this entry should be
248 removed from the cache.
250 self._remove_node(self._least_recently_used)
253 """Clear out all of the cache."""
254 # Clean up in LRU order
258 def resize(self, max_cache, after_cleanup_count=None):
259 """Change the number of entries that will be cached."""
260 self._update_max_cache(max_cache,
261 after_cleanup_count=after_cleanup_count)
263 def _update_max_cache(self, max_cache, after_cleanup_count=None):
264 self._max_cache = max_cache
265 if after_cleanup_count is None:
266 self._after_cleanup_count = self._max_cache * 8 / 10
268 self._after_cleanup_count = min(after_cleanup_count,
273 class LRUSizeCache(LRUCache):
274 """An LRUCache that removes things based on the size of the values.
276 This differs in that it doesn't care how many actual items there are,
277 it just restricts the cache to be cleaned up after so much data is stored.
279 The size of items added will be computed using compute_size(value), which
280 defaults to len() if not supplied.
283 def __init__(self, max_size=1024*1024, after_cleanup_size=None,
285 """Create a new LRUSizeCache.
287 :param max_size: The max number of bytes to store before we start
288 clearing out entries.
289 :param after_cleanup_size: After cleaning up, shrink everything to this
291 :param compute_size: A function to compute the size of the values. We
292 use a function here, so that you can pass 'len' if you are just
293 using simple strings, or a more complex function if you are using
294 something like a list of strings, or even a custom object.
295 The function should take the form "compute_size(value) => integer".
296 If not supplied, it defaults to 'len()'
299 self._compute_size = compute_size
300 if compute_size is None:
301 self._compute_size = len
302 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
303 LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
305 def add(self, key, value, cleanup=None):
306 """Add a new value to the cache.
308 Also, if the entry is ever removed from the cache, call
311 :param key: The key to store it under
312 :param value: The object to store
313 :param cleanup: None or a function taking (key, value) to indicate
314 'value' should be cleaned up.
317 raise ValueError('cannot use _null_key as a key')
318 node = self._cache.get(key, None)
319 value_len = self._compute_size(value)
320 if value_len >= self._after_cleanup_size:
321 # The new value is 'too big to fit', as it would fill up/overflow
322 # the cache all by itself
324 # We won't be replacing the old node, so just remove it
325 self._remove_node(node)
326 if cleanup is not None:
330 node = _LRUNode(key, value, cleanup=cleanup)
331 self._cache[key] = node
333 self._value_size -= node.size
334 node.size = value_len
335 self._value_size += value_len
336 self._record_access(node)
338 if self._value_size > self._max_size:
343 """Clear the cache until it shrinks to the requested size.
345 This does not completely wipe the cache, just makes sure it is under
346 the after_cleanup_size.
348 # Make sure the cache is shrunk to the correct size
349 while self._value_size > self._after_cleanup_size:
352 def _remove_node(self, node):
353 self._value_size -= node.size
354 LRUCache._remove_node(self, node)
356 def resize(self, max_size, after_cleanup_size=None):
357 """Change the number of bytes that will be cached."""
358 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
359 max_cache = max(int(max_size/512), 1)
360 self._update_max_cache(max_cache)
362 def _update_max_size(self, max_size, after_cleanup_size=None):
363 self._max_size = max_size
364 if after_cleanup_size is None:
365 self._after_cleanup_size = self._max_size * 8 / 10
367 self._after_cleanup_size = min(after_cleanup_size, self._max_size)