From fbe811384a5f07f3681bdbca3f5e8549fdc89f8f Mon Sep 17 00:00:00 2001 From: pictuga Date: Fri, 27 Oct 2017 23:10:03 +0200 Subject: [PATCH] crawler: add (unused) DebugHandler to output headers sent/received Saves a lot of time when debugging --- morss/crawler.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/morss/crawler.py b/morss/crawler.py index 2cafd4e..d5416eb 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -44,6 +44,7 @@ def custom_handler(accept=None, strict=False, delay=None, encoding=None, basic=F # FTPHandler, FileHandler, HTTPErrorProcessor] # & HTTPSHandler + #handlers.append(DebugHandler()) handlers.append(HTTPCookieProcessor()) handlers.append(GZIPHandler()) handlers.append(HTTPEquivHandler()) @@ -63,6 +64,21 @@ def custom_handler(accept=None, strict=False, delay=None, encoding=None, basic=F return build_opener(*handlers) +class DebugHandler(BaseHandler): + handler_order = 2000 + + def http_request(self, req): + print(repr(req.header_items())) + return req + + def http_response(self, req, resp): + print(resp.headers.__dict__) + return resp + + https_request = http_request + https_response = http_response + + class GZIPHandler(BaseHandler): def http_request(self, req): req.add_unredirected_header('Accept-Encoding', 'gzip')