From 9ee6ff60e193d3960a75677b768355080be809e7 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sat, 18 Mar 2017 22:18:10 -1000 Subject: [PATCH] crawler: 301 http code doesn't respect headers More or less according to the specs --- morss/crawler.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/morss/crawler.py b/morss/crawler.py index 5827a69..205a408 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -294,6 +294,12 @@ class BaseCacheHandler(BaseHandler): # force refresh return None + elif code == 301 and cache_age < 7*24*3600: + print('301 so cached') + # "301 Moved Permanently" has to be cached...as long as we want (awesome HTTP specs), let's say a week (why not?) + # use force_min=0 if you want to bypass this (needed for a proper refresh) + pass + elif self.force_min is None and ('no-cache' in cc_list or 'no-store' in cc_list or ('private' in cc_list and not self.private)): @@ -308,11 +314,6 @@ class BaseCacheHandler(BaseHandler): # still recent enough for us, use cache pass - elif code == 301 and cache_age < 7*24*3600: - # "301 Moved Permanently" has to be cached...as long as we want (awesome HTTP specs), let's say a week (why not?) - # use force_min=0 if you want to bypass this (needed for a proper refresh) - pass - else: # according to the www, we have to refresh when nothing is said return None