Only perform <meta> redirects on html pages

master
pictuga 2013-09-15 15:33:14 +02:00
parent 3176c2a8e8
commit c25aec7107
1 changed files with 12 additions and 11 deletions

View File

@ -196,6 +196,7 @@ class HTMLDownloader(urllib2.HTTPCookieProcessor):
data = GzipFile(fileobj=StringIO(data), mode='r').read() data = GzipFile(fileobj=StringIO(data), mode='r').read()
# <meta> redirect # <meta> redirect
if resp.info().type in ['text/html', 'application/xhtml+xml']:
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data) match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
if match: if match:
newurl = match.groups()[0] newurl = match.groups()[0]