From eeac6308553f1c0b03c19fa809bdab722388fff3 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 5 Apr 2020 21:11:57 +0200 Subject: [PATCH] crawler: add more "realistic" headers --- morss/crawler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/morss/crawler.py b/morss/crawler.py index 7607762..8b1254f 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -52,7 +52,7 @@ def custom_handler(follow=None, delay=None, encoding=None): handlers.append(HTTPEquivHandler()) handlers.append(HTTPRefreshHandler()) handlers.append(UAHandler(DEFAULT_UA)) - handlers.append(AutoRefererHandler()) + handlers.append(BrowserlyHeaderHandler()) handlers.append(EncodingFixHandler(encoding)) @@ -195,9 +195,13 @@ class UAHandler(BaseHandler): https_request = http_request -class AutoRefererHandler(BaseHandler): +class BrowserlyHeaderHandler(BaseHandler): + """ Add more headers to look less suspicious """ + def http_request(self, req): req.add_unredirected_header('Referer', '%s://%s' % (req.type, req.host)) + req.add_unredirected_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') + req.add_unredirected_header('Accept-Language', 'en-US,en;q=0.5') return req https_request = http_request