Compare commits

...

2 Commits

Author SHA1 Message Date
pictuga 325a373e3e feeds: add SyntaxError catch 2020-04-20 16:15:15 +02:00
pictuga 2719bd6776 crawler: fix chinese encoding 2020-04-20 16:14:55 +02:00
2 changed files with 10 additions and 1 deletions

View File

@ -149,6 +149,15 @@ class GZIPHandler(BaseHandler):
def detect_encoding(data, resp=None):
enc = detect_raw_encoding(data, resp)
if enc == 'gb2312':
enc = 'gbk'
return enc
def detect_raw_encoding(data, resp=None):
if resp is not None:
enc = resp.headers.get('charset')
if enc is not None:

View File

@ -100,7 +100,7 @@ def parse(data, url=None, mimetype=None, encoding=None):
try:
feed = parser(data, encoding=encoding)
except (ValueError):
except (ValueError, SyntaxError):
# parsing did not work
pass