Bypass feedsportal.

master
pictuga 2013-04-04 19:29:22 +02:00
parent ad25516e34
commit d6e6d61199
1 changed files with 14 additions and 4 deletions

View File

@ -3,6 +3,7 @@ import sys
import os import os
from os.path import expanduser from os.path import expanduser
from lxml import etree from lxml import etree
import re
import string import string
import urllib2 import urllib2
from cookielib import CookieJar from cookielib import CookieJar
@ -37,11 +38,20 @@ class Info:
self.opener = False self.opener = False
self.enc = False self.enc = False
self.link = self.item.findtext('link') self.link = self.item.xpath('link')[0]
self.desc = self.item.xpath('description')[0] self.desc = self.item.xpath('description')[0]
def checkURL(self):
if self.link.text.startswith("http://rss.feedsportal.com"):
log('feedsportal')
url = re.search('/([0-9a-zA-Z]+)/[a-zA-Z0-9\.]+$', self.link.text).groups()[0].split('0')
t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'L':'ww', 'S':'w.'}
self.link.text = 'http://' + "".join([(t[s[0]] if s[0] in t else "=") + s[1:] for s in url[1:]])
log(self.link.text)
def fetch(self): def fetch(self):
log(self.link) log(self.link.text)
self.checkURL()
if not self.findCache(): if not self.findCache():
self.download() self.download()
self.chardet() self.chardet()
@ -64,7 +74,7 @@ class Info:
def findCache(self): def findCache(self):
if self.feed.cache is not False: if self.feed.cache is not False:
xpath = "//link[text()='" + self.link + "']/../description/text()" xpath = "//link[text()='" + self.link.text + "']/../description/text()"
match = self.feed.cache.xpath(xpath) match = self.feed.cache.xpath(xpath)
if len(match): if len(match):
log('cached') log('cached')
@ -87,7 +97,7 @@ class Info:
try: try:
cj = CookieJar() cj = CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
self.con = self.opener.open(self.link.encode('utf-8')) self.con = self.opener.open(self.link.text.encode('utf-8'))
self.data = self.con.read() self.data = self.con.read()
except (urllib2.HTTPError, urllib2.URLError) as error: except (urllib2.HTTPError, urllib2.URLError) as error:
log(error) log(error)