feeds: fix handling of html code

master
pictuga 2020-03-19 09:48:53 +01:00
parent 13ea52ef80
commit 449bc3c695
1 changed files with 15 additions and 5 deletions

View File

@ -387,29 +387,39 @@ class ParserXML(ParserBase):
match = self.rule_search(rrule) match = self.rule_search(rrule)
html_rich = ('atom' in rule or self.rules['mode'] == 'html') \
and rule in [self.rules.get('item_desc'), self.rules.get('item_content')]
if key is not None: if key is not None:
match.attrib[key] = value match.attrib[key] = value
else: else:
if match is not None and len(match): if html_rich:
# atom stuff # atom stuff
self._clean_node(match) self._clean_node(match)
match.attrib['type'] = 'xhtml'
match.append(lxml.html.fragment_fromstring(value, create_parent='div'))
if match.attrib.get('type', '') == 'xhtml': else:
if match is not None and len(match):
self._clean_node(match)
match.attrib['type'] = 'html' match.attrib['type'] = 'html'
match.text = value match.text = value
def rule_str(self, rule): def rule_str(self, rule):
match = self.rule_search(rule) match = self.rule_search(rule)
html_rich = ('atom' in rule or self.rules['mode'] == 'html') \
and rule in [self.rules.get('item_desc'), self.rules.get('item_content')]
if isinstance(match, etree._Element): if isinstance(match, etree._Element):
if len(match): if html_rich:
# atom stuff # atom stuff
return self._inner_html(match) return self._inner_html(match)
else: else:
return match.text or "" return etree.tostring(match, method='text', encoding='unicode').strip()
else: else:
return match or "" return match or ""