feeds: fix handling of html code

2020-03-19 09:48:53 +01:00 · 2020-03-19 09:48:53 +01:00 · 449bc3c695
parent 13ea52ef80
commit 449bc3c695
1 changed files with 15 additions and 5 deletions
--- a/morss/feeds.py
+++ b/morss/feeds.py
@ -387,15 +387,22 @@ class ParserXML(ParserBase):
        match = self.rule_search(rrule)
        html_rich = ('atom' in rule or self.rules['mode'] == 'html') \
            and rule in [self.rules.get('item_desc'), self.rules.get('item_content')]
        if key is not None:
            match.attrib[key] = value
        else:
-            if match is not None and len(match):
+            if html_rich:
                # atom stuff
                self._clean_node(match)
                match.attrib['type'] = 'xhtml'
                match.append(lxml.html.fragment_fromstring(value, create_parent='div'))
-                if match.attrib.get('type', '') == 'xhtml':
+            else:
                if match is not None and len(match):
                    self._clean_node(match)
                    match.attrib['type'] = 'html'
                match.text = value
@ -403,13 +410,16 @@ class ParserXML(ParserBase):
    def rule_str(self, rule):
        match = self.rule_search(rule)
        html_rich = ('atom' in rule or self.rules['mode'] == 'html') \
            and rule in [self.rules.get('item_desc'), self.rules.get('item_content')]
        if isinstance(match, etree._Element):
-            if len(match):
+            if html_rich:
                # atom stuff
                return self._inner_html(match)
            else:
-                return match.text or ""
+                return etree.tostring(match, method='text', encoding='unicode').strip()
        else:
            return match or ""