diff --git a/morss/feeds.py b/morss/feeds.py index c6249a3..d92ff43 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -15,6 +15,7 @@ import dateutil.parser from copy import deepcopy import lxml.html +from bs4 import BeautifulSoup json.encoder.c_make_encoder = None @@ -441,7 +442,7 @@ class ParserHTML(ParserXML): def parse(self, raw): parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print - return etree.fromstring(raw, parser) + return etree.fromstring(BeautifulSoup(raw, 'lxml').prettify(), parser) def tostring(self, encoding='unicode', **k): return lxml.html.tostring(self.root, encoding=encoding, **k)