feeds: add XML support for merger
parent
5a4a86d622
commit
6d28323e3a
148
morss/feeds.py
148
morss/feeds.py
|
@ -239,6 +239,143 @@ class ParserBase(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ParserXML(ParserBase):
|
||||||
|
def parse(self, raw):
|
||||||
|
parser = etree.XMLParser(recover=True)
|
||||||
|
return etree.fromstring(raw, parser)
|
||||||
|
|
||||||
|
def tostring(self, **k):
|
||||||
|
return etree.tostring(self.root, **k)
|
||||||
|
|
||||||
|
def _rule_parse(self, rule):
|
||||||
|
test = re.search(r'^(.*)/@([a-z]+)$', rule) # to match //div/a/@href
|
||||||
|
return test.groups() if test else (rule, None)
|
||||||
|
|
||||||
|
def _resolve_ns(self, rule):
|
||||||
|
match = re.search(r'^([^:]+):([^:]+)$', rule)
|
||||||
|
if match:
|
||||||
|
match = match.groups()
|
||||||
|
if match[0] in NSMAP:
|
||||||
|
return "{%s}%s" % (NSMAP[match[0]], match[1].lower())
|
||||||
|
|
||||||
|
return rule
|
||||||
|
|
||||||
|
def rule_search_all(self, rule):
|
||||||
|
try:
|
||||||
|
return self.root.xpath(rule, namespaces=NSMAP)
|
||||||
|
|
||||||
|
except etree.XPathEvalError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def rule_create(self, rule):
|
||||||
|
# duplicate, copy from template or create from scratch
|
||||||
|
rule, key = self._rule_parse(rule)
|
||||||
|
|
||||||
|
# try recreating based on the rule (for really basic rules, ie. plain RSS)
|
||||||
|
if re.search(r'^[a-zA-Z0-9/:]+$', rule):
|
||||||
|
chain = rule.strip('/').split('/')
|
||||||
|
current = self.root
|
||||||
|
|
||||||
|
if rule[0] == '/':
|
||||||
|
chain = chain[1:]
|
||||||
|
|
||||||
|
for (i, node) in enumerate(chain):
|
||||||
|
test = current.find(self._resolve_ns(node))
|
||||||
|
|
||||||
|
if test and i < len(chain) - 1:
|
||||||
|
# yay, go on
|
||||||
|
current = test
|
||||||
|
|
||||||
|
else:
|
||||||
|
# opps need to create
|
||||||
|
element = etree.Element(self._resolve_ns(node))
|
||||||
|
current.append(element)
|
||||||
|
current = element
|
||||||
|
|
||||||
|
return current
|
||||||
|
|
||||||
|
# try duplicating from existing (works well with fucked up structures)
|
||||||
|
match = self.rule_search_last(rule)
|
||||||
|
if match:
|
||||||
|
element = deepcopy(match)
|
||||||
|
match.getparen().append(element)
|
||||||
|
return element
|
||||||
|
|
||||||
|
# try duplicating from template
|
||||||
|
# FIXME
|
||||||
|
# >>> self.xml.getroottree().getpath(ff.find('a'))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def rule_remove(self, rule):
|
||||||
|
rule, key = self._rule_parse(rule)
|
||||||
|
|
||||||
|
match = self.rule_search(rule)
|
||||||
|
|
||||||
|
if key is not None:
|
||||||
|
del x.attrib[key]
|
||||||
|
|
||||||
|
else:
|
||||||
|
match.getparent().remove(match)
|
||||||
|
|
||||||
|
def rule_set(self, rule, value):
|
||||||
|
rule, key = self._rule_parse(rule)
|
||||||
|
|
||||||
|
match = self.rule_search(rule)
|
||||||
|
|
||||||
|
if key is not None:
|
||||||
|
match.attrib[key] = value
|
||||||
|
|
||||||
|
else:
|
||||||
|
match.text = value
|
||||||
|
|
||||||
|
def rule_str(self, rule):
|
||||||
|
match = self.rule_search(rule)
|
||||||
|
|
||||||
|
if isinstance(match, etree._Element):
|
||||||
|
return match.text or ""
|
||||||
|
|
||||||
|
else:
|
||||||
|
return match or ""
|
||||||
|
|
||||||
|
def bool_prs(self, x):
|
||||||
|
return (x or '').lower() != 'false'
|
||||||
|
|
||||||
|
def bool_fmt(self, x):
|
||||||
|
return 'true' if x else 'false'
|
||||||
|
|
||||||
|
def time_prs(self, x):
|
||||||
|
try:
|
||||||
|
return parse_time(x)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def time_fmt(self, x):
|
||||||
|
try:
|
||||||
|
time = parse_time(x)
|
||||||
|
return time.strftime(self.rules['timeformat'])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_raw(self, rule_name):
|
||||||
|
return self.rule_search_all(self.rules[rule_name])
|
||||||
|
|
||||||
|
def get_str(self, rule_name):
|
||||||
|
return self.rule_str(self.rules[rule_name])
|
||||||
|
|
||||||
|
def set_str(self, rule_name, value):
|
||||||
|
try:
|
||||||
|
return self.rule_set(self.rules[rule_name], value)
|
||||||
|
|
||||||
|
except AttributeError:
|
||||||
|
# does not exist, have to create it
|
||||||
|
self.rule_create(self.rules[rule_name])
|
||||||
|
return self.rule_set(self.rules[rule_name], value)
|
||||||
|
|
||||||
|
def remove(self, rule_name):
|
||||||
|
self.rule_remove(self.rules[rule_name])
|
||||||
|
|
||||||
|
|
||||||
class FeedBase(object):
|
class FeedBase(object):
|
||||||
"""
|
"""
|
||||||
Base for xml-related classes, which provides simple wrappers around xpath
|
Base for xml-related classes, which provides simple wrappers around xpath
|
||||||
|
@ -396,6 +533,13 @@ class Feed(object):
|
||||||
return len(self.get_raw('items'))
|
return len(self.get_raw('items'))
|
||||||
|
|
||||||
|
|
||||||
|
class FeedXML(Feed, ParserXML):
|
||||||
|
itemsClass = 'ItemXML'
|
||||||
|
|
||||||
|
def tostring(self, **k):
|
||||||
|
return etree.tostring(self.root.getroottree(), **k)
|
||||||
|
|
||||||
|
|
||||||
class FeedParser(FeedBase):
|
class FeedParser(FeedBase):
|
||||||
itemsClass = 'FeedItem'
|
itemsClass = 'FeedItem'
|
||||||
mimetype = 'application/xml'
|
mimetype = 'application/xml'
|
||||||
|
@ -621,6 +765,10 @@ class Item(Uniq):
|
||||||
lambda f: f.remove('item_updated') )
|
lambda f: f.remove('item_updated') )
|
||||||
|
|
||||||
|
|
||||||
|
class ItemXML(Item, ParserXML):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FeedItem(FeedBase, Uniq):
|
class FeedItem(FeedBase, Uniq):
|
||||||
timeFormat = ''
|
timeFormat = ''
|
||||||
dic = ('title', 'link', 'desc', 'content', 'id', 'is_permalink', 'time', 'updated')
|
dic = ('title', 'link', 'desc', 'content', 'id', 'is_permalink', 'time', 'updated')
|
||||||
|
|
Loading…
Reference in New Issue