Support for non-textual gzipped http content
parent
ecc18dc4ab
commit
7156dd5522
7
morss.py
7
morss.py
|
@ -233,15 +233,15 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def http_response(self, req, resp):
|
||||||
urllib2.HTTPCookieProcessor.http_response(self, req, resp)
|
urllib2.HTTPCookieProcessor.http_response(self, req, resp)
|
||||||
|
odata = data = resp.read()
|
||||||
|
|
||||||
if 200 <= resp.code < 300 and resp.info().maintype == 'text':
|
if 200 <= resp.code < 300:
|
||||||
data = resp.read()
|
|
||||||
|
|
||||||
# gzip
|
# gzip
|
||||||
if resp.headers.get('Content-Encoding') == 'gzip':
|
if resp.headers.get('Content-Encoding') == 'gzip':
|
||||||
log('un-gzip')
|
log('un-gzip')
|
||||||
data = GzipFile(fileobj=StringIO(data), mode='r').read()
|
data = GzipFile(fileobj=StringIO(data), mode='r').read()
|
||||||
|
|
||||||
|
if 200 <= resp.code < 300 and resp.info().maintype == 'text':
|
||||||
# <meta> redirect
|
# <meta> redirect
|
||||||
if resp.info().type in MIMETYPE['html']:
|
if resp.info().type in MIMETYPE['html']:
|
||||||
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
||||||
|
@ -262,6 +262,7 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
if self.decode:
|
if self.decode:
|
||||||
data = decodeHTML(data, resp)
|
data = decodeHTML(data, resp)
|
||||||
|
|
||||||
|
if odata != data:
|
||||||
fp = StringIO(data)
|
fp = StringIO(data)
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
resp = urllib2.addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
resp = urllib2.addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
|
|
Loading…
Reference in New Issue