Support for non-textual gzipped http content

master
pictuga 2013-11-24 21:55:07 +01:00
parent ecc18dc4ab
commit 7156dd5522
1 changed files with 4 additions and 3 deletions

View File

@ -233,15 +233,15 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
def http_response(self, req, resp): def http_response(self, req, resp):
urllib2.HTTPCookieProcessor.http_response(self, req, resp) urllib2.HTTPCookieProcessor.http_response(self, req, resp)
odata = data = resp.read()
if 200 <= resp.code < 300 and resp.info().maintype == 'text': if 200 <= resp.code < 300:
data = resp.read()
# gzip # gzip
if resp.headers.get('Content-Encoding') == 'gzip': if resp.headers.get('Content-Encoding') == 'gzip':
log('un-gzip') log('un-gzip')
data = GzipFile(fileobj=StringIO(data), mode='r').read() data = GzipFile(fileobj=StringIO(data), mode='r').read()
if 200 <= resp.code < 300 and resp.info().maintype == 'text':
# <meta> redirect # <meta> redirect
if resp.info().type in MIMETYPE['html']: if resp.info().type in MIMETYPE['html']:
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data) match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
@ -262,6 +262,7 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
if self.decode: if self.decode:
data = decodeHTML(data, resp) data = decodeHTML(data, resp)
if odata != data:
fp = StringIO(data) fp = StringIO(data)
old_resp = resp old_resp = resp
resp = urllib2.addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code) resp = urllib2.addinfourl(fp, old_resp.headers, old_resp.url, old_resp.code)