From 131ba092078d1afb47e02e29fa125e6bc842147d Mon Sep 17 00:00:00 2001 From: pictuga Date: Tue, 7 Apr 2015 09:38:22 +0800 Subject: [PATCH] Change :cache mode behavior Makes underlying code way cleaner --- morss/crawler.py | 17 ++++++----------- morss/morss.py | 8 ++------ 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/morss/crawler.py b/morss/crawler.py index fa394a4..330dddb 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -203,7 +203,7 @@ class BaseCacheHandler(BaseHandler): handler_order = 499 def __init__(self, force_min=None): - self.force_min = force_min # force_min (seconds) to bypass http headers, -1 forever, 0 never, -2 do nothing if not in cache, -3 is like -2 but raises an error + self.force_min = force_min # force_min (seconds) to bypass http headers, -1 forever, 0 never, -2 do nothing if not in cache def _load(self, url): out = list(self.load(url)) @@ -254,21 +254,16 @@ class BaseCacheHandler(BaseHandler): cache_age = time.time() - timestamp # list in a simple way what to do when - if self.force_min in (-2, -3): + if self.force_min == -2: if code is not None: # already in cache, perfect, use cache pass else: - # ok then... - if self.force_min == -2: - headers['morss'] = 'from_cache' - resp = addinfourl(BytesIO(), headers, req.get_full_url(), 409) - resp.msg = 'Conflict' - return resp - - elif self.force_min == -3: - raise NotInCache() + headers['morss'] = 'from_cache' + resp = addinfourl(BytesIO(), headers, req.get_full_url(), 409) + resp.msg = 'Conflict' + return resp elif code is None: # cache empty, refresh diff --git a/morss/morss.py b/morss/morss.py index 4181cfd..a7baa9b 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -272,19 +272,15 @@ def Fill(item, options, feedurl='/', fast=False): if fast: # super-fast mode - delay = -3 + delay = -2 try: con = custom_handler(('html', 'text/*'), delay).open(link, timeout=TIMEOUT) data = con.read() - except crawler.NotInCache: - log('skipped') - return False - except (IOError, HTTPException) as e: log('http error') - return True + return False # let's just delete errors stuff when in cache mode contenttype = con.info().get('Content-Type', '').split(';')[0] if contenttype not in MIMETYPE['html'] and contenttype != 'text/plain':