diff --git a/morss/crawler.py b/morss/crawler.py index 5a6c859..47893f9 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -148,7 +148,8 @@ def detect_encoding(data, con=None): class EncodingFixHandler(BaseHandler): def http_response(self, req, resp): - if 200 <= resp.code < 300 and resp.info().maintype == 'text': + maintype = resp.info()['Content-Type'].split('/')[0] + if 200 <= resp.code < 300 and maintype == 'text': data = resp.read() enc = detect_encoding(data, resp) @@ -226,8 +227,9 @@ class ContentNegociationHandler(BaseHandler): #FIXME class MetaRedirectHandler(BaseHandler): def http_response(self, req, resp): - if 200 <= resp.code < 300 and resp.info().maintype == 'text': - if resp.info().type in MIMETYPE['html']: + contenttype = resp.info()['Content-Type'].split(';')[0] + if 200 <= resp.code < 300 and contenttype.startswith('text/'): + if contenttype in MIMETYPE['html']: data = resp.read() match = re.search(r'(?i)]*?url=(http.*?)["\']', data) if match: diff --git a/morss/morss.py b/morss/morss.py index 70a05e9..d08b355 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -391,7 +391,8 @@ def Fill(item, cache, options, feedurl='/', fast=False): cache.set(link, 'error-http') return True - if con.info().type not in MIMETYPE['html'] and con.info().type != 'text/plain': + contenttype = con.info()['Content-Type'].split(';')[0] + if contenttype not in MIMETYPE['html'] and contenttype != 'text/plain': log('non-text page') cache.set(link, 'error-type') return True @@ -459,17 +460,19 @@ def Fetch(url, cache, options): cache.set('etag', con.headers.getheader('etag')) cache.set('lastmodified', con.headers.getheader('last-modified')) + contenttype = con.info()['Content-Type'].split(';')[0] + if url.startswith('https://itunes.apple.com/lookup?id='): style = 'itunes' - elif xml.startswith('