feeds: various small cleanup/fixes
parent
24c8a0ecd0
commit
e606c5eefb
|
@ -44,18 +44,6 @@ except NameError:
|
||||||
basestring = unicode = str
|
basestring = unicode = str
|
||||||
|
|
||||||
|
|
||||||
Element = etree.Element
|
|
||||||
|
|
||||||
NSMAP = {'atom': 'http://www.w3.org/2005/Atom',
|
|
||||||
'atom03': 'http://purl.org/atom/ns#',
|
|
||||||
'media': 'http://search.yahoo.com/mrss/',
|
|
||||||
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
||||||
'slash': 'http://purl.org/rss/1.0/modules/slash/',
|
|
||||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
|
||||||
'content': 'http://purl.org/rss/1.0/modules/content/',
|
|
||||||
'rssfake': 'http://purl.org/rss/1.0/'}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_rules(filename=None):
|
def parse_rules(filename=None):
|
||||||
if not filename:
|
if not filename:
|
||||||
filename = os.path.join(os.path.dirname(__file__), 'feedify.ini')
|
filename = os.path.join(os.path.dirname(__file__), 'feedify.ini')
|
||||||
|
@ -224,6 +212,15 @@ class ParserBase(object):
|
||||||
|
|
||||||
|
|
||||||
class ParserXML(ParserBase):
|
class ParserXML(ParserBase):
|
||||||
|
NSMAP = {'atom': 'http://www.w3.org/2005/Atom',
|
||||||
|
'atom03': 'http://purl.org/atom/ns#',
|
||||||
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
|
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
||||||
|
'slash': 'http://purl.org/rss/1.0/modules/slash/',
|
||||||
|
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||||
|
'content': 'http://purl.org/rss/1.0/modules/content/',
|
||||||
|
'rssfake': 'http://purl.org/rss/1.0/'}
|
||||||
|
|
||||||
def parse(self, raw):
|
def parse(self, raw):
|
||||||
parser = etree.XMLParser(recover=True)
|
parser = etree.XMLParser(recover=True)
|
||||||
return etree.fromstring(raw, parser)
|
return etree.fromstring(raw, parser)
|
||||||
|
@ -242,8 +239,8 @@ class ParserXML(ParserBase):
|
||||||
match = re.search(r'^([^:]+):([^:]+)$', rule) # to match fakerss:content
|
match = re.search(r'^([^:]+):([^:]+)$', rule) # to match fakerss:content
|
||||||
if match:
|
if match:
|
||||||
match = match.groups()
|
match = match.groups()
|
||||||
if match[0] in NSMAP:
|
if match[0] in self.NSMAP:
|
||||||
return "{%s}%s" % (NSMAP[match[0]], match[1].lower())
|
return "{%s}%s" % (self.NSMAP[match[0]], match[1].lower())
|
||||||
|
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
|
@ -257,7 +254,7 @@ class ParserXML(ParserBase):
|
||||||
|
|
||||||
def rule_search_all(self, rule):
|
def rule_search_all(self, rule):
|
||||||
try:
|
try:
|
||||||
return self.root.xpath(rule, namespaces=NSMAP)
|
return self.root.xpath(rule, namespaces=self.NSMAP)
|
||||||
|
|
||||||
except etree.XPathEvalError:
|
except etree.XPathEvalError:
|
||||||
return []
|
return []
|
||||||
|
@ -405,17 +402,18 @@ class Uniq(object):
|
||||||
_id = None
|
_id = None
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
# check if an item was already created for it
|
# check if a wrapper was already created for it
|
||||||
# if so, reuse it
|
# if so, reuse it
|
||||||
# if not, create a new one
|
# if not, create a new one
|
||||||
|
# note that the item itself (the tree node) is created beforehands
|
||||||
|
|
||||||
tmp_id = cls._gen_id(*args, **kwargs)
|
tmp_id = cls._gen_id(*args, **kwargs)
|
||||||
if tmp_id is not None and tmp_id in cls._map:
|
if tmp_id in cls._map:
|
||||||
return cls._map[tmp_id]
|
return cls._map[tmp_id]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
obj = object.__new__(cls, *args, **kwargs)
|
obj = object.__new__(cls, *args, **kwargs)
|
||||||
cls._map[obj._id] = obj
|
cls._map[tmp_id] = obj
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -164,8 +164,8 @@ def ItemFix(item, feedurl='/'):
|
||||||
item.link = parse_qs(urlparse(item.link).query)['u'][0]
|
item.link = parse_qs(urlparse(item.link).query)['u'][0]
|
||||||
log(item.link)
|
log(item.link)
|
||||||
|
|
||||||
# feedburner
|
# feedburner FIXME only works if RSS...
|
||||||
feeds.NSMAP['feedburner'] = 'http://rssnamespace.org/feedburner/ext/1.0'
|
item.NSMAP['feedburner'] = 'http://rssnamespace.org/feedburner/ext/1.0'
|
||||||
match = item.rule_str('feedburner:origLink')
|
match = item.rule_str('feedburner:origLink')
|
||||||
if match:
|
if match:
|
||||||
item.link = match
|
item.link = match
|
||||||
|
|
Loading…
Reference in New Issue