Time-based Cache

Solves the :proxy issue for good. More convenient, more flexible
master
pictuga 2014-05-24 19:01:21 +02:00
parent cf1f5d79fa
commit 26c91070f5
1 changed files with 27 additions and 26 deletions

View File

@ -97,9 +97,10 @@ class Options:
class Cache: class Cache:
""" Light, error-prone caching system. """ """ Light, error-prone caching system. """
def __init__(self, folder, key): def __init__(self, folder, key, lifespan=10*24*3600):
self._key = key self._key = key
self._dir = folder self._dir = folder
self._lifespan = lifespan
maxsize = os.statvfs('./').f_namemax - len(self._dir) - 1 - 4 # ".tmp" maxsize = os.statvfs('./').f_namemax - len(self._dir) - 1 - 4 # ".tmp"
self._hash = urllib.quote_plus(self._key)[:maxsize] self._hash = urllib.quote_plus(self._key)[:maxsize]
@ -107,31 +108,28 @@ class Cache:
self._file = self._dir + '/' + self._hash self._file = self._dir + '/' + self._hash
self._file_tmp = self._file + '.tmp' self._file_tmp = self._file + '.tmp'
self._cached = {} # what *was* cached self._cache = {}
self._cache = {} # new things to put in cache
if os.path.isfile(self._file): if os.path.isfile(self._file):
data = open(self._file).read() data = open(self._file).read()
if data: if data:
self._cached = json.loads(data) self._cache = json.loads(data)
def __del__(self): def __del__(self):
self.save() self.save()
def __contains__(self, key): def __contains__(self, key):
return key in self._cache or key in self._cached return key in self._cache
def get(self, key): def get(self, key):
if key in self._cache: if key in self._cache:
return self._cache[key] self._cache[key]['last'] = time.time()
elif key in self._cached: return self._cache[key]['value']
self._cache[key] = self._cached[key]
return self._cached[key]
else: else:
return None return None
def set(self, key, content): def set(self, key, content):
self._cache[key] = content self._cache[key] = {'last': time.time(), 'value': content}
__getitem__ = get __getitem__ = get
__setitem__ = set __setitem__ = set
@ -140,13 +138,13 @@ class Cache:
if len(self._cache) == 0: if len(self._cache) == 0:
return return
# useful to circumvent issue caused by :proxy
if len(self._cache) < 5:
self._cache.update(self._cached)
if not os.path.exists(self._dir): if not os.path.exists(self._dir):
os.makedirs(self._dir) os.makedirs(self._dir)
for i in self._cache:
if time.time() - self._cache[i]['last'] > self._lifespan > -1:
del self._cache[i]
out = json.dumps(self._cache, indent=4) out = json.dumps(self._cache, indent=4)
try: try:
@ -157,22 +155,25 @@ class Cache:
except OSError: except OSError:
log('failed to move cache to file') log('failed to move cache to file')
def isYoungerThan(self, sec): def last(self, key):
if not os.path.exists(self._file): if key not in self._cache:
return False return -1
return time.time() - os.path.getmtime(self._file) < sec return self._cache[key]['last']
def new(self, key): def age(self, key):
if key not in self._cache:
return -1
return time.time() - self.last(key)
def new(self, *arg, **karg):
""" Returns a Cache object in the same directory """ """ Returns a Cache object in the same directory """
if key != self._key: if arg[0] != self._key:
return Cache(self._dir, key) return Cache(self._dir, *arg, **karg)
else: else:
return self return self
def redirect(self, key):
return self.__init__(self._dir, key)
class SimpleDownload(urllib2.HTTPCookieProcessor): class SimpleDownload(urllib2.HTTPCookieProcessor):
""" """
Custom urllib2 handler to download a page, using etag/last-modified headers, Custom urllib2 handler to download a page, using etag/last-modified headers,
@ -421,7 +422,7 @@ def Fill(item, cache, feedurl='/', fast=False):
content = cache.get(link) content = cache.get(link)
match = re.search(r'^error-([a-z]{2,10})$', content) match = re.search(r'^error-([a-z]{2,10})$', content)
if match: if match:
if cache.isYoungerThan(DELAY): if cache.age(link) > DELAY:
log('cached error: %s' % match.groups()[0]) log('cached error: %s' % match.groups()[0])
return True return True
else: else:
@ -496,7 +497,7 @@ def Fetch(url, cache, options):
log('cache redirect') log('cache redirect')
# fetch feed # fetch feed
if cache.isYoungerThan(DELAY) and not options.theforce and 'xml' in cache and 'style' in cache: if not options.theforce and 'xml' in cache and cache.age('xml') < DELAY and 'style' in cache:
log('xml cached') log('xml cached')
xml = cache.get('xml') xml = cache.get('xml')
style = cache.get('style') style = cache.get('style')