Only perform <meta> redirects on html pages
parent
3176c2a8e8
commit
c25aec7107
23
morss.py
23
morss.py
|
@ -196,19 +196,20 @@ class HTMLDownloader(urllib2.HTTPCookieProcessor):
|
||||||
data = GzipFile(fileobj=StringIO(data), mode='r').read()
|
data = GzipFile(fileobj=StringIO(data), mode='r').read()
|
||||||
|
|
||||||
# <meta> redirect
|
# <meta> redirect
|
||||||
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
if resp.info().type in ['text/html', 'application/xhtml+xml']:
|
||||||
if match:
|
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
||||||
newurl = match.groups()[0]
|
if match:
|
||||||
log('redirect: %s' % newurl)
|
newurl = match.groups()[0]
|
||||||
|
log('redirect: %s' % newurl)
|
||||||
|
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||||
if k.lower() not in ('content-length', 'content-type'))
|
if k.lower() not in ('content-length', 'content-type'))
|
||||||
new = urllib2.Request(newurl,
|
new = urllib2.Request(newurl,
|
||||||
headers=newheaders,
|
headers=newheaders,
|
||||||
origin_req_host=req.get_origin_req_host(),
|
origin_req_host=req.get_origin_req_host(),
|
||||||
unverifiable=True)
|
unverifiable=True)
|
||||||
|
|
||||||
return self.parent.open(new, timeout=req.timeout)
|
return self.parent.open(new, timeout=req.timeout)
|
||||||
|
|
||||||
# decode
|
# decode
|
||||||
data = decodeHTML(resp, data)
|
data = decodeHTML(resp, data)
|
||||||
|
|
Loading…
Reference in New Issue