parent
99461ea185
commit
40c69f17d2
|
@ -15,6 +15,7 @@ import dateutil.parser
|
|||
from copy import deepcopy
|
||||
|
||||
import lxml.html
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
json.encoder.c_make_encoder = None
|
||||
|
||||
|
@ -441,7 +442,7 @@ class ParserHTML(ParserXML):
|
|||
|
||||
def parse(self, raw):
|
||||
parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print
|
||||
return etree.fromstring(raw, parser)
|
||||
return etree.fromstring(BeautifulSoup(raw, 'lxml').prettify(), parser)
|
||||
|
||||
def tostring(self, encoding='unicode', **k):
|
||||
return lxml.html.tostring(self.root, encoding=encoding, **k)
|
||||
|
|
Loading…
Reference in New Issue