From 2719bd6776797e5cd6dca877dbd4f393186394f0 Mon Sep 17 00:00:00 2001 From: pictuga Date: Mon, 20 Apr 2020 16:14:55 +0200 Subject: [PATCH] crawler: fix chinese encoding --- morss/crawler.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/morss/crawler.py b/morss/crawler.py index 18795f1..94669d6 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -149,6 +149,15 @@ class GZIPHandler(BaseHandler): def detect_encoding(data, resp=None): + enc = detect_raw_encoding(data, resp) + + if enc == 'gb2312': + enc = 'gbk' + + return enc + + +def detect_raw_encoding(data, resp=None): if resp is not None: enc = resp.headers.get('charset') if enc is not None: