crawler: fix regression brought with 44a6b2591
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
parent
cb21871c35
commit
e42df98f83
|
@ -19,6 +19,7 @@ import os
|
||||||
import pickle
|
import pickle
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import zlib
|
import zlib
|
||||||
from cgi import parse_header
|
from cgi import parse_header
|
||||||
|
@ -33,7 +34,7 @@ try:
|
||||||
# python 2
|
# python 2
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
from mimetools import Message as message_from_string
|
from httplib import HTTPMessage
|
||||||
from urllib2 import (BaseHandler, HTTPCookieProcessor, HTTPRedirectHandler,
|
from urllib2 import (BaseHandler, HTTPCookieProcessor, HTTPRedirectHandler,
|
||||||
Request, addinfourl, build_opener, parse_http_list,
|
Request, addinfourl, build_opener, parse_http_list,
|
||||||
parse_keqv_list)
|
parse_keqv_list)
|
||||||
|
@ -41,6 +42,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from email import message_from_string
|
from email import message_from_string
|
||||||
|
from http.client import HTTPMessage
|
||||||
from urllib.parse import quote, urlparse, urlunparse
|
from urllib.parse import quote, urlparse, urlunparse
|
||||||
from urllib.request import (BaseHandler, HTTPCookieProcessor,
|
from urllib.request import (BaseHandler, HTTPCookieProcessor,
|
||||||
HTTPRedirectHandler, Request, addinfourl,
|
HTTPRedirectHandler, Request, addinfourl,
|
||||||
|
@ -426,9 +428,19 @@ class HTTPRefreshHandler(BaseHandler):
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
|
def parse_headers(text=u'\n\''):
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
# python 3
|
||||||
|
return message_from_string(text)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# python 2
|
||||||
|
return HTTPMessage(StringIO(text))
|
||||||
|
|
||||||
|
|
||||||
def error_response(code, msg, url=''):
|
def error_response(code, msg, url=''):
|
||||||
# return an error as a response
|
# return an error as a response
|
||||||
resp = addinfourl(BytesIO(), message_from_string('\n\n'), url, code)
|
resp = addinfourl(BytesIO(), parse_headers(), url, code)
|
||||||
resp.msg = msg
|
resp.msg = msg
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
@ -479,7 +491,7 @@ class CacheHandler(BaseHandler):
|
||||||
data = None
|
data = None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
data['headers'] = message_from_string(data['headers'] or unicode()) # headers
|
data['headers'] = parse_headers(data['headers'] or unicode())
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@ -505,7 +517,7 @@ class CacheHandler(BaseHandler):
|
||||||
self.save(req.get_full_url(), {
|
self.save(req.get_full_url(), {
|
||||||
'code': resp.code,
|
'code': resp.code,
|
||||||
'msg': resp.msg,
|
'msg': resp.msg,
|
||||||
'headers': str(resp.headers),
|
'headers': resp.headers,
|
||||||
'data': data,
|
'data': data,
|
||||||
'timestamp': time.time()
|
'timestamp': time.time()
|
||||||
})
|
})
|
||||||
|
@ -646,8 +658,6 @@ if 'IGNORE_SSL' in os.environ:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
|
||||||
|
|
||||||
req = adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
req = adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||||
|
|
||||||
if sys.flags.interactive:
|
if sys.flags.interactive:
|
||||||
|
|
Loading…
Reference in New Issue