Bypass feedsportal.

2013-04-04 19:29:22 +02:00
parent ad25516e34
commit d6e6d61199
1 changed files with 14 additions and 4 deletions
--- a/morss.py
+++ b/morss.py
@@ -3,6 +3,7 @@ import sys
 import os
 from os.path import expanduser
 from lxml import etree
+import re
 import string
 import urllib2
 from cookielib import CookieJar
@@ -37,11 +38,20 @@ class Info:
 		self.opener = False
 		self.enc = False

-		self.link = self.item.findtext('link')
+		self.link = self.item.xpath('link')[0]
 		self.desc = self.item.xpath('description')[0]

+	def checkURL(self):
+		if self.link.text.startswith("http://rss.feedsportal.com"):
+			log('feedsportal')
+			url = re.search('/([0-9a-zA-Z]+)/[a-zA-Z0-9\.]+$', self.link.text).groups()[0].split('0')
+			t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'L':'ww', 'S':'w.'}
+			self.link.text = 'http://' + "".join([(t[s[0]] if s[0] in t else "=") + s[1:] for s in url[1:]])
+			log(self.link.text)
+
 	def fetch(self):
-		log(self.link)
+		log(self.link.text)
+		self.checkURL()
 		if not self.findCache():
 			self.download()
 			self.chardet()
@@ -64,7 +74,7 @@ class Info:

 	def findCache(self):
 		if self.feed.cache is not False:
-			xpath = "//link[text()='" + self.link + "']/../description/text()"
+			xpath = "//link[text()='" + self.link.text + "']/../description/text()"
 			match = self.feed.cache.xpath(xpath)
 			if len(match):
 				log('cached')
@@ -87,7 +97,7 @@ class Info:
 		try:
 			cj = CookieJar()
 			self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
-			self.con = self.opener.open(self.link.encode('utf-8'))
+			self.con = self.opener.open(self.link.text.encode('utf-8'))
 			self.data = self.con.read()
 		except (urllib2.HTTPError, urllib2.URLError) as error:
 			log(error)