From 449bc3c695dd051ddda9bef87703e0111de97685 Mon Sep 17 00:00:00 2001 From: pictuga Date: Thu, 19 Mar 2020 09:48:53 +0100 Subject: [PATCH] feeds: fix handling of html code --- morss/feeds.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/morss/feeds.py b/morss/feeds.py index cdd4f1c..b7f85cb 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -387,29 +387,39 @@ class ParserXML(ParserBase): match = self.rule_search(rrule) + html_rich = ('atom' in rule or self.rules['mode'] == 'html') \ + and rule in [self.rules.get('item_desc'), self.rules.get('item_content')] + if key is not None: match.attrib[key] = value else: - if match is not None and len(match): + if html_rich: # atom stuff self._clean_node(match) + match.attrib['type'] = 'xhtml' + match.append(lxml.html.fragment_fromstring(value, create_parent='div')) - if match.attrib.get('type', '') == 'xhtml': + else: + if match is not None and len(match): + self._clean_node(match) match.attrib['type'] = 'html' - match.text = value + match.text = value def rule_str(self, rule): match = self.rule_search(rule) + html_rich = ('atom' in rule or self.rules['mode'] == 'html') \ + and rule in [self.rules.get('item_desc'), self.rules.get('item_content')] + if isinstance(match, etree._Element): - if len(match): + if html_rich: # atom stuff return self._inner_html(match) else: - return match.text or "" + return etree.tostring(match, method='text', encoding='unicode').strip() else: return match or ""