Python 最近最少使用算法 LRUCache

# lrucache.py -- a simple LRU (Least-Recently-Used) cache class 
002   
003 # Copyright 2004 Evan Prodromou <evan@bad.dynu.ca> 
004 # Licensed under the Academic Free License 2.1 
005   
006 # Licensed for ftputil under the revised BSD license 
007 # with permission by the author, Evan Prodromou. Many 
008 # thanks, Evan! :-) 
009
010 # The original file is available at 
011 # http://pypi.python.org/pypi/lrucache/0.2 . 
012   
013 # arch-tag: LRU cache main module 
014   
015 """a simple LRU (Least-Recently-Used) cache module 
016   
017 This module provides very simple LRU (Least-Recently-Used) cache 
018 functionality. 
019   
020 An *in-memory cache* is useful for storing the results of an 
021 'expe\nsive' process (one that takes a lot of time or resources) for 
022 later re-use. Typical examples are accessing data from the filesystem, 
023 a database, or a network location. If you know you'll need to re-read 
024 the data again, it can help to keep it in a cache. 
025   
026 You *can* use a Python dictionary as a cache for some purposes. 
027 However, if the results you're caching are large, or you have a lot of 
028 possible results, this can be impractical memory-wise. 
029   
030 An *LRU cache*, on the other hand, only keeps _some_ of the results in 
031 memory, which keeps you from overusing resources. The cache is bounded 
032 by a maximum size; if you try to add more values to the cache, it will 
033 automatically discard the values that you haven't read or written to 
034 in the longest time. In other words, the least-recently-used items are 
035 discarded. [1]_ 
036   
037 .. [1]: 'Discarded' here means 'removed from the cache'. 
038   
039 """ 
040   
041 from __future__ import generators 
042 import time 
043 from heapq import heappush, heappop, heapify 
044   
045 # the suffix after the hyphen denotes modifications by the 
046 #  ftputil project with respect to the original version 
047 __version__ = "0.2-1" 
048 __all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE'
049 __docformat__ = 'reStructuredText en' 
050   
051 DEFAULT_SIZE = 16 
052 """Default size of a new LRUCache object, if no 'size' argument is given.""" 
053   
054 class CacheKeyError(KeyError): 
055     """Error raised when cache requests fail 
056   
057     When a cache record is accessed which no longer exists (or never did), 
058     this error is raised. To avoid it, you may want to check for the existence 
059     of a cache record before reading or deleting it.""" 
060     pass 
061   
062 class LRUCache(object): 
063     """Least-Recently-Used (LRU) cache. 
064   
065     Instances of this class provide a least-recently-used (LRU) cache. They 
066     emulate a Python mapping type. You can use an LRU cache more or less like 
067     a Python dictionary, with the exception that objects you put into the 
068     cache may be discarded before you take them out. 
069   
070     Some example usage:: 
071   
072     cache = LRUCache(32) # new cache 
073     cache['foo'] = get_file_contents('foo') # or whatever 
074   
075     if 'foo' in cache: # if it's still in cache... 
076         # use cached version 
077         contents = cache['foo'] 
078     else: 
079         # recalculate 
080         contents = get_file_contents('foo') 
081         # store in cache for next time 
082         cache['foo'] = contents 
083   
084     print cache.size # Maximum size 
085   
086     print len(cache) # 0 <= len(cache) <= cache.size 
087   
088     cache.size = 10 # Auto-shrink on size assignment 
089   
090     for i in range(50): # note: larger than cache size 
091         cache[i] = i 
092   
093     if 0 not in cache: print 'Zero was discarded.' 
094   
095     if 42 in cache: 
096         del cache[42] # Manual deletion 
097   
098     for j in cache:   # iterate (in LRU order) 
099         print j, cache[j] # iterator produces keys, not values 
100     """ 
101   
102     class __Node(object): 
103         """Record of a cached value. Not for public consumption.""" 
104   
105         def __init__(self, key, obj, timestamp, sort_key): 
106             object.__init__(self
107             self.key = key 
108             self.obj = obj 
109             self.atime = timestamp 
110             self.mtime = self.atime 
111             self._sort_key = sort_key 
112   
113         def __cmp__(self, other): 
114             return cmp(self._sort_key, other._sort_key) 
115   
116         def __repr__(self): 
117             return "<%s %s => %s (%s)>" %
118                    (self.__class__, self.key, self.obj, \ 
119                     time.asctime(time.localtime(self.atime))) 
120   
121     def __init__(self, size=DEFAULT_SIZE): 
122         # Check arguments 
123         if size <= 0
124             raise ValueError, size 
125         elif type(size) is not type(0): 
126             raise TypeError, size 
127         object.__init__(self
128         self.__heap = [] 
129         self.__dict = {} 
130         """Maximum size of the cache. 
131         If more than 'size' elements are added to the cache, 
132         the least-recently-used ones will be discarded.""" 
133         self.size = size 
134         self.__counter = 0 
135   
136     def _sort_key(self): 
137         """Return a new integer value upon every call. 
138           
139         Cache nodes need a monotonically increasing time indicator. 
140         time.time() and time.clock() don't guarantee this in a 
141         platform-independent way. 
142         """ 
143         self.__counter += 1 
144         return self.__counter 
145   
146     def __len__(self): 
147         return len(self.__heap) 
148   
149     def __contains__(self, key): 
150         return self.__dict.has_key(key) 
151   
152     def __setitem__(self, key, obj): 
153         if self.__dict.has_key(key): 
154             node = self.__dict[key] 
155             # update node object in-place 
156             node.obj = obj 
157             node.atime = time.time() 
158             node.mtime = node.atime 
159             node._sort_key = self._sort_key() 
160             heapify(self.__heap) 
161         else
162             # size may have been reset, so we loop 
163             while len(self.__heap) >= self.size: 
164                 lru = heappop(self.__heap) 
165                 del self.__dict[lru.key] 
166             node = self.__Node(key, obj, time.time(), self._sort_key()) 
167             self.__dict[key] = node 
168             heappush(self.__heap, node) 
169   
170     def __getitem__(self, key): 
171         if not self.__dict.has_key(key): 
172             raise CacheKeyError(key) 
173         else
174             node = self.__dict[key] 
175             # update node object in-place 
176             node.atime = time.time() 
177             node._sort_key = self._sort_key() 
178             heapify(self.__heap) 
179             return node.obj 
180   
181     def __delitem__(self, key): 
182         if not self.__dict.has_key(key): 
183             raise CacheKeyError(key) 
184         else
185             node = self.__dict[key] 
186             del self.__dict[key] 
187             self.__heap.remove(node) 
188             heapify(self.__heap) 
189             return node.obj 
190   
191     def __iter__(self): 
192         copy = self.__heap[:] 
193         while len(copy) > 0
194             node = heappop(copy) 
195             yield node.key 
196         raise StopIteration 
197   
198     def __setattr__(self, name, value): 
199         object.__setattr__(self, name, value) 
200         # automagically shrink heap on resize 
201         if name == 'size'
202             while len(self.__heap) > value: 
203                 lru = heappop(self.__heap) 
204                 del self.__dict[lru.key] 
205   
206     def __repr__(self): 
207         return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap)) 
208   
209     def mtime(self, key): 
210         """Return the last modification time for the cache record with key. 
211         May be useful for cache instances where the stored values can get 
212         'stale', such as caching file or network resource contents.""" 
213         if not self.__dict.has_key(key): 
214             raise CacheKeyError(key) 
215         else
216             node = self.__dict[key] 
217             return node.mtime 
218   
219 if __name__ == "__main__"
220     cache = LRUCache(25
221     print cache 
222     for i in range(50): 
223         cache[i] = str(i) 
224     print cache 
225     if 46 in cache: 
226         print "46 in cache" 
227         del cache[46
228     print cache 
229     cache.size = 10 
230     print cache 
231     cache[46] = '46' 
232     print cache 
233     print len(cache) 
234     for c in cache: 
235         print
236     print cache 
237     print cache.mtime(46
238     for c in cache: 
239         print c

posted on 2012-08-06 16:33  很多不懂呀。。  阅读(1300)  评论(0编辑  收藏  举报

导航