Python 最近最少使用算法 LRUCache
# lrucache.py -- a simple LRU (Least-Recently-Used) cache class |
002 |
|
003 |
# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca> |
004 |
# Licensed under the Academic Free License 2.1 |
005 |
|
006 |
# Licensed for ftputil under the revised BSD license |
007 |
# with permission by the author, Evan Prodromou. Many |
008 |
# thanks, Evan! :-) |
009 |
# |
010 |
# The original file is available at |
011 |
# http://pypi.python.org/pypi/lrucache/0.2 . |
012 |
|
013 |
# arch-tag: LRU cache main module |
014 |
|
015 |
"""a simple LRU (Least-Recently-Used) cache module |
016 |
|
017 |
This module provides very simple LRU (Least-Recently-Used) cache |
018 |
functionality. |
019 |
|
020 |
An *in-memory cache* is useful for storing the results of an |
021 |
'expe\nsive' process (one that takes a lot of time or resources) for |
022 |
later re-use. Typical examples are accessing data from the filesystem, |
023 |
a database, or a network location. If you know you'll need to re-read |
024 |
the data again, it can help to keep it in a cache. |
025 |
|
026 |
You *can* use a Python dictionary as a cache for some purposes. |
027 |
However, if the results you're caching are large, or you have a lot of |
028 |
possible results, this can be impractical memory-wise. |
029 |
|
030 |
An *LRU cache*, on the other hand, only keeps _some_ of the results in |
031 |
memory, which keeps you from overusing resources. The cache is bounded |
032 |
by a maximum size; if you try to add more values to the cache, it will |
033 |
automatically discard the values that you haven't read or written to |
034 |
in the longest time. In other words, the least-recently-used items are |
035 |
discarded. [1]_ |
036 |
|
037 |
.. [1]: 'Discarded' here means 'removed from the cache'. |
038 |
|
039 |
""" |
040 |
|
041 |
from __future__ import generators |
042 |
import time |
043 |
from heapq import heappush, heappop, heapify |
044 |
|
045 |
# the suffix after the hyphen denotes modifications by the |
046 |
# ftputil project with respect to the original version |
047 |
__version__ = "0.2-1" |
048 |
__all__ = [ 'CacheKeyError' , 'LRUCache' , 'DEFAULT_SIZE' ] |
049 |
__docformat__ = 'reStructuredText en' |
050 |
|
051 |
DEFAULT_SIZE = 16 |
052 |
"""Default size of a new LRUCache object, if no 'size' argument is given.""" |
053 |
|
054 |
class CacheKeyError(KeyError): |
055 |
"""Error raised when cache requests fail |
056 |
|
057 |
When a cache record is accessed which no longer exists (or never did), |
058 |
this error is raised. To avoid it, you may want to check for the existence |
059 |
of a cache record before reading or deleting it.""" |
060 |
pass |
061 |
|
062 |
class LRUCache( object ): |
063 |
"""Least-Recently-Used (LRU) cache. |
064 |
|
065 |
Instances of this class provide a least-recently-used (LRU) cache. They |
066 |
emulate a Python mapping type. You can use an LRU cache more or less like |
067 |
a Python dictionary, with the exception that objects you put into the |
068 |
cache may be discarded before you take them out. |
069 |
|
070 |
Some example usage:: |
071 |
|
072 |
cache = LRUCache(32) # new cache |
073 |
cache['foo'] = get_file_contents('foo') # or whatever |
074 |
|
075 |
if 'foo' in cache: # if it's still in cache... |
076 |
# use cached version |
077 |
contents = cache['foo'] |
078 |
else: |
079 |
# recalculate |
080 |
contents = get_file_contents('foo') |
081 |
# store in cache for next time |
082 |
cache['foo'] = contents |
083 |
|
084 |
print cache.size # Maximum size |
085 |
|
086 |
print len(cache) # 0 <= len(cache) <= cache.size |
087 |
|
088 |
cache.size = 10 # Auto-shrink on size assignment |
089 |
|
090 |
for i in range(50): # note: larger than cache size |
091 |
cache[i] = i |
092 |
|
093 |
if 0 not in cache: print 'Zero was discarded.' |
094 |
|
095 |
if 42 in cache: |
096 |
del cache[42] # Manual deletion |
097 |
|
098 |
for j in cache: # iterate (in LRU order) |
099 |
print j, cache[j] # iterator produces keys, not values |
100 |
""" |
101 |
|
102 |
class __Node( object ): |
103 |
"""Record of a cached value. Not for public consumption.""" |
104 |
|
105 |
def __init__( self , key, obj, timestamp, sort_key): |
106 |
object .__init__( self ) |
107 |
self .key = key |
108 |
self .obj = obj |
109 |
self .atime = timestamp |
110 |
self .mtime = self .atime |
111 |
self ._sort_key = sort_key |
112 |
|
113 |
def __cmp__( self , other): |
114 |
return cmp ( self ._sort_key, other._sort_key) |
115 |
|
116 |
def __repr__( self ): |
117 |
return "<%s %s => %s (%s)>" % \ |
118 |
( self .__class__, self .key, self .obj, \ |
119 |
time.asctime(time.localtime( self .atime))) |
120 |
|
121 |
def __init__( self , size = DEFAULT_SIZE): |
122 |
# Check arguments |
123 |
if size < = 0 : |
124 |
raise ValueError, size |
125 |
elif type (size) is not type ( 0 ): |
126 |
raise TypeError, size |
127 |
object .__init__( self ) |
128 |
self .__heap = [] |
129 |
self .__dict = {} |
130 |
"""Maximum size of the cache. |
131 |
If more than 'size' elements are added to the cache, |
132 |
the least-recently-used ones will be discarded.""" |
133 |
self .size = size |
134 |
self .__counter = 0 |
135 |
|
136 |
def _sort_key( self ): |
137 |
"""Return a new integer value upon every call. |
138 |
|
139 |
Cache nodes need a monotonically increasing time indicator. |
140 |
time.time() and time.clock() don't guarantee this in a |
141 |
platform-independent way. |
142 |
""" |
143 |
self .__counter + = 1 |
144 |
return self .__counter |
145 |
|
146 |
def __len__( self ): |
147 |
return len ( self .__heap) |
148 |
|
149 |
def __contains__( self , key): |
150 |
return self .__dict.has_key(key) |
151 |
|
152 |
def __setitem__( self , key, obj): |
153 |
if self .__dict.has_key(key): |
154 |
node = self .__dict[key] |
155 |
# update node object in-place |
156 |
node.obj = obj |
157 |
node.atime = time.time() |
158 |
node.mtime = node.atime |
159 |
node._sort_key = self ._sort_key() |
160 |
heapify( self .__heap) |
161 |
else : |
162 |
# size may have been reset, so we loop |
163 |
while len ( self .__heap) > = self .size: |
164 |
lru = heappop( self .__heap) |
165 |
del self .__dict[lru.key] |
166 |
node = self .__Node(key, obj, time.time(), self ._sort_key()) |
167 |
self .__dict[key] = node |
168 |
heappush( self .__heap, node) |
169 |
|
170 |
def __getitem__( self , key): |
171 |
if not self .__dict.has_key(key): |
172 |
raise CacheKeyError(key) |
173 |
else : |
174 |
node = self .__dict[key] |
175 |
# update node object in-place |
176 |
node.atime = time.time() |
177 |
node._sort_key = self ._sort_key() |
178 |
heapify( self .__heap) |
179 |
return node.obj |
180 |
|
181 |
def __delitem__( self , key): |
182 |
if not self .__dict.has_key(key): |
183 |
raise CacheKeyError(key) |
184 |
else : |
185 |
node = self .__dict[key] |
186 |
del self .__dict[key] |
187 |
self .__heap.remove(node) |
188 |
heapify( self .__heap) |
189 |
return node.obj |
190 |
|
191 |
def __iter__( self ): |
192 |
copy = self .__heap[:] |
193 |
while len (copy) > 0 : |
194 |
node = heappop(copy) |
195 |
yield node.key |
196 |
raise StopIteration |
197 |
|
198 |
def __setattr__( self , name, value): |
199 |
object .__setattr__( self , name, value) |
200 |
# automagically shrink heap on resize |
201 |
if name = = 'size' : |
202 |
while len ( self .__heap) > value: |
203 |
lru = heappop( self .__heap) |
204 |
del self .__dict[lru.key] |
205 |
|
206 |
def __repr__( self ): |
207 |
return "<%s (%d elements)>" % ( str ( self .__class__), len ( self .__heap)) |
208 |
|
209 |
def mtime( self , key): |
210 |
"""Return the last modification time for the cache record with key. |
211 |
May be useful for cache instances where the stored values can get |
212 |
'stale', such as caching file or network resource contents.""" |
213 |
if not self .__dict.has_key(key): |
214 |
raise CacheKeyError(key) |
215 |
else : |
216 |
node = self .__dict[key] |
217 |
return node.mtime |
218 |
|
219 |
if __name__ = = "__main__" : |
220 |
cache = LRUCache( 25 ) |
221 |
print cache |
222 |
for i in range ( 50 ): |
223 |
cache[i] = str (i) |
224 |
print cache |
225 |
if 46 in cache: |
226 |
print "46 in cache" |
227 |
del cache[ 46 ] |
228 |
print cache |
229 |
cache.size = 10 |
230 |
print cache |
231 |
cache[ 46 ] = '46' |
232 |
print cache |
233 |
print len (cache) |
234 |
for c in cache: |
235 |
print c |
236 |
print cache |
237 |
print cache.mtime( 46 ) |
238 |
for c in cache: |
239 |
print c |