Save Cache when it's new.

So as to avoid crashes on first fetch.
master
pictuga 2013-04-23 00:24:41 +02:00
parent ca90d082c3
commit fa7cd957df
1 changed files with 10 additions and 5 deletions

View File

@ -64,10 +64,11 @@ class Cache:
self._key = key self._key = key
self._dir = folder self._dir = folder
self._file = self._dir + "/" + str(hash(self._key)) self._file = self._dir + "/" + str(hash(self._key))
self._new = not os.path.exists(self._file)
self._cached = {} # what *was* cached self._cached = {} # what *was* cached
self._cache = {} # new things to put in cache self._cache = {} # new things to put in cache
if os.path.exists(self._file): if not self._new:
data = open(self._file).read().strip().split("\n") data = open(self._file).read().strip().split("\n")
for line in data: for line in data:
key, bdata = line.split("\t") key, bdata = line.split("\t")
@ -91,7 +92,7 @@ class Cache:
def set(self, key, content): def set(self, key, content):
self._cache[key] = b64encode(content) self._cache[key] = b64encode(content)
if not os.path.exists(self._file): if self._new:
self.save() self.save()
def save(self): def save(self):
@ -274,13 +275,17 @@ def Fill(rss, cache):
item = XMLMap(rss, ITEM_MAP, True) item = XMLMap(rss, ITEM_MAP, True)
log(item.link) log(item.link)
if 'link' not in item:
log('no link')
return
# content already provided? # content already provided?
if 'content' in item: if 'content' in item and 'desc' in item:
content_len = len(lxml.html.fromstring(item.content).text_content()) content_len = len(lxml.html.fromstring(item.content).text_content())
log('content: %s vs %s' % (content_len, len(item.desc))) log('content: %s vs %s' % (content_len, len(item.desc)))
if content_len > 5*len(item.desc): if content_len > 5*len(item.desc):
log('provided') log('provided')
return item return
match = re.search('/([0-9a-zA-Z]{20,})/story01.htm$', item.link) match = re.search('/([0-9a-zA-Z]{20,})/story01.htm$', item.link)
if match: if match:
@ -296,7 +301,7 @@ def Fill(rss, cache):
if item.link in cache: if item.link in cache:
log('cached') log('cached')
item.content = cache.get(item.link) item.content = cache.get(item.link)
return item return
# download # download
ddl = EncDownload(item.link) ddl = EncDownload(item.link)