crawler: add more "realistic" headers

master
pictuga 2020-04-05 21:11:57 +02:00
parent e136b0feb2
commit eeac630855
1 changed files with 6 additions and 2 deletions

View File

@ -52,7 +52,7 @@ def custom_handler(follow=None, delay=None, encoding=None):
handlers.append(HTTPEquivHandler()) handlers.append(HTTPEquivHandler())
handlers.append(HTTPRefreshHandler()) handlers.append(HTTPRefreshHandler())
handlers.append(UAHandler(DEFAULT_UA)) handlers.append(UAHandler(DEFAULT_UA))
handlers.append(AutoRefererHandler()) handlers.append(BrowserlyHeaderHandler())
handlers.append(EncodingFixHandler(encoding)) handlers.append(EncodingFixHandler(encoding))
@ -195,9 +195,13 @@ class UAHandler(BaseHandler):
https_request = http_request https_request = http_request
class AutoRefererHandler(BaseHandler): class BrowserlyHeaderHandler(BaseHandler):
""" Add more headers to look less suspicious """
def http_request(self, req): def http_request(self, req):
req.add_unredirected_header('Referer', '%s://%s' % (req.type, req.host)) req.add_unredirected_header('Referer', '%s://%s' % (req.type, req.host))
req.add_unredirected_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
req.add_unredirected_header('Accept-Language', 'en-US,en;q=0.5')
return req return req
https_request = http_request https_request = http_request