Only perform <meta> redirects on html pages

2013-09-15 15:33:14 +02:00
parent 3176c2a8e8
commit c25aec7107
1 changed files with 12 additions and 11 deletions
--- a/morss.py
+++ b/morss.py
@@ -196,19 +196,20 @@ class HTMLDownloader(urllib2.HTTPCookieProcessor):
 				data = GzipFile(fileobj=StringIO(data), mode='r').read()
 			# <meta> redirect
-			match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
+			if resp.info().type in ['text/html', 'application/xhtml+xml']:
-			if match:
+				match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
-				newurl = match.groups()[0]
+				if match:
-				log('redirect: %s' % newurl)
+					newurl = match.groups()[0]
 					log('redirect: %s' % newurl)
-				newheaders = dict((k,v) for k,v in req.headers.items()
+					newheaders = dict((k,v) for k,v in req.headers.items()
-					if k.lower() not in ('content-length', 'content-type'))
+						if k.lower() not in ('content-length', 'content-type'))
-				new = urllib2.Request(newurl,
+					new = urllib2.Request(newurl,
-					headers=newheaders,
+						headers=newheaders,
-					origin_req_host=req.get_origin_req_host(),
+						origin_req_host=req.get_origin_req_host(),
-					unverifiable=True)
+						unverifiable=True)
-				return self.parent.open(new, timeout=req.timeout)
+					return self.parent.open(new, timeout=req.timeout)
 			# decode
 			data = decodeHTML(resp, data)