crawler: add more "realistic" headers
parent
e136b0feb2
commit
eeac630855
|
@ -52,7 +52,7 @@ def custom_handler(follow=None, delay=None, encoding=None):
|
||||||
handlers.append(HTTPEquivHandler())
|
handlers.append(HTTPEquivHandler())
|
||||||
handlers.append(HTTPRefreshHandler())
|
handlers.append(HTTPRefreshHandler())
|
||||||
handlers.append(UAHandler(DEFAULT_UA))
|
handlers.append(UAHandler(DEFAULT_UA))
|
||||||
handlers.append(AutoRefererHandler())
|
handlers.append(BrowserlyHeaderHandler())
|
||||||
|
|
||||||
handlers.append(EncodingFixHandler(encoding))
|
handlers.append(EncodingFixHandler(encoding))
|
||||||
|
|
||||||
|
@ -195,9 +195,13 @@ class UAHandler(BaseHandler):
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
|
|
||||||
|
|
||||||
class AutoRefererHandler(BaseHandler):
|
class BrowserlyHeaderHandler(BaseHandler):
|
||||||
|
""" Add more headers to look less suspicious """
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
req.add_unredirected_header('Referer', '%s://%s' % (req.type, req.host))
|
req.add_unredirected_header('Referer', '%s://%s' % (req.type, req.host))
|
||||||
|
req.add_unredirected_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
|
||||||
|
req.add_unredirected_header('Accept-Language', 'en-US,en;q=0.5')
|
||||||
return req
|
return req
|
||||||
|
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
|
|
Loading…
Reference in New Issue