parent
99461ea185
commit
40c69f17d2
|
@ -15,6 +15,7 @@ import dateutil.parser
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
json.encoder.c_make_encoder = None
|
json.encoder.c_make_encoder = None
|
||||||
|
|
||||||
|
@ -441,7 +442,7 @@ class ParserHTML(ParserXML):
|
||||||
|
|
||||||
def parse(self, raw):
|
def parse(self, raw):
|
||||||
parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print
|
parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print
|
||||||
return etree.fromstring(raw, parser)
|
return etree.fromstring(BeautifulSoup(raw, 'lxml').prettify(), parser)
|
||||||
|
|
||||||
def tostring(self, encoding='unicode', **k):
|
def tostring(self, encoding='unicode', **k):
|
||||||
return lxml.html.tostring(self.root, encoding=encoding, **k)
|
return lxml.html.tostring(self.root, encoding=encoding, **k)
|
||||||
|
|
Loading…
Reference in New Issue