Bypass feedsportal.

master
pictuga 2013-04-04 19:29:22 +02:00
parent ad25516e34
commit d6e6d61199
1 changed files with 14 additions and 4 deletions

View File

@ -3,6 +3,7 @@ import sys
import os
from os.path import expanduser
from lxml import etree
import re
import string
import urllib2
from cookielib import CookieJar
@ -37,11 +38,20 @@ class Info:
self.opener = False
self.enc = False
self.link = self.item.findtext('link')
self.link = self.item.xpath('link')[0]
self.desc = self.item.xpath('description')[0]
def checkURL(self):
if self.link.text.startswith("http://rss.feedsportal.com"):
log('feedsportal')
url = re.search('/([0-9a-zA-Z]+)/[a-zA-Z0-9\.]+$', self.link.text).groups()[0].split('0')
t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'L':'ww', 'S':'w.'}
self.link.text = 'http://' + "".join([(t[s[0]] if s[0] in t else "=") + s[1:] for s in url[1:]])
log(self.link.text)
def fetch(self):
log(self.link)
log(self.link.text)
self.checkURL()
if not self.findCache():
self.download()
self.chardet()
@ -64,7 +74,7 @@ class Info:
def findCache(self):
if self.feed.cache is not False:
xpath = "//link[text()='" + self.link + "']/../description/text()"
xpath = "//link[text()='" + self.link.text + "']/../description/text()"
match = self.feed.cache.xpath(xpath)
if len(match):
log('cached')
@ -87,7 +97,7 @@ class Info:
try:
cj = CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
self.con = self.opener.open(self.link.encode('utf-8'))
self.con = self.opener.open(self.link.text.encode('utf-8'))
self.data = self.con.read()
except (urllib2.HTTPError, urllib2.URLError) as error:
log(error)