Bypass feedsportal.
parent
ad25516e34
commit
d6e6d61199
18
morss.py
18
morss.py
|
@ -3,6 +3,7 @@ import sys
|
|||
import os
|
||||
from os.path import expanduser
|
||||
from lxml import etree
|
||||
import re
|
||||
import string
|
||||
import urllib2
|
||||
from cookielib import CookieJar
|
||||
|
@ -37,11 +38,20 @@ class Info:
|
|||
self.opener = False
|
||||
self.enc = False
|
||||
|
||||
self.link = self.item.findtext('link')
|
||||
self.link = self.item.xpath('link')[0]
|
||||
self.desc = self.item.xpath('description')[0]
|
||||
|
||||
def checkURL(self):
|
||||
if self.link.text.startswith("http://rss.feedsportal.com"):
|
||||
log('feedsportal')
|
||||
url = re.search('/([0-9a-zA-Z]+)/[a-zA-Z0-9\.]+$', self.link.text).groups()[0].split('0')
|
||||
t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'L':'ww', 'S':'w.'}
|
||||
self.link.text = 'http://' + "".join([(t[s[0]] if s[0] in t else "=") + s[1:] for s in url[1:]])
|
||||
log(self.link.text)
|
||||
|
||||
def fetch(self):
|
||||
log(self.link)
|
||||
log(self.link.text)
|
||||
self.checkURL()
|
||||
if not self.findCache():
|
||||
self.download()
|
||||
self.chardet()
|
||||
|
@ -64,7 +74,7 @@ class Info:
|
|||
|
||||
def findCache(self):
|
||||
if self.feed.cache is not False:
|
||||
xpath = "//link[text()='" + self.link + "']/../description/text()"
|
||||
xpath = "//link[text()='" + self.link.text + "']/../description/text()"
|
||||
match = self.feed.cache.xpath(xpath)
|
||||
if len(match):
|
||||
log('cached')
|
||||
|
@ -87,7 +97,7 @@ class Info:
|
|||
try:
|
||||
cj = CookieJar()
|
||||
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
||||
self.con = self.opener.open(self.link.encode('utf-8'))
|
||||
self.con = self.opener.open(self.link.text.encode('utf-8'))
|
||||
self.data = self.con.read()
|
||||
except (urllib2.HTTPError, urllib2.URLError) as error:
|
||||
log(error)
|
||||
|
|
Loading…
Reference in New Issue