Use etree.tostring 'method' arg
Gives appropriately formatted html code. Some pages might otherwise be rendered as blank.master
parent
7d0d416610
commit
22005065e8
|
@ -319,7 +319,7 @@ class ParserXML(ParserBase):
|
||||||
return self.root.getparent().remove(self.root)
|
return self.root.getparent().remove(self.root)
|
||||||
|
|
||||||
def tostring(self, encoding='unicode', **k):
|
def tostring(self, encoding='unicode', **k):
|
||||||
return etree.tostring(self.root, encoding=encoding, **k)
|
return etree.tostring(self.root, encoding=encoding, method='xml', **k)
|
||||||
|
|
||||||
def _rule_parse(self, rule):
|
def _rule_parse(self, rule):
|
||||||
test = re.search(r'^(.*)/@([a-z]+)$', rule) # to match //div/a/@href
|
test = re.search(r'^(.*)/@([a-z]+)$', rule) # to match //div/a/@href
|
||||||
|
@ -463,7 +463,7 @@ class ParserHTML(ParserXML):
|
||||||
return html_parse(raw, encoding=self.encoding)
|
return html_parse(raw, encoding=self.encoding)
|
||||||
|
|
||||||
def tostring(self, encoding='unicode', **k):
|
def tostring(self, encoding='unicode', **k):
|
||||||
return lxml.html.tostring(self.root, encoding=encoding, **k)
|
return lxml.html.tostring(self.root, encoding=encoding, method='html', **k)
|
||||||
|
|
||||||
def rule_search_all(self, rule):
|
def rule_search_all(self, rule):
|
||||||
try:
|
try:
|
||||||
|
@ -724,7 +724,7 @@ class FeedXML(Feed, ParserXML):
|
||||||
if self.root.getprevious() is None:
|
if self.root.getprevious() is None:
|
||||||
self.root.addprevious(etree.PI('xml-stylesheet', 'type="text/xsl" href="/sheet.xsl"'))
|
self.root.addprevious(etree.PI('xml-stylesheet', 'type="text/xsl" href="/sheet.xsl"'))
|
||||||
|
|
||||||
return etree.tostring(self.root.getroottree(), encoding=encoding, **k)
|
return etree.tostring(self.root.getroottree(), encoding=encoding, method='xml', **k)
|
||||||
|
|
||||||
|
|
||||||
class ItemXML(Item, ParserXML):
|
class ItemXML(Item, ParserXML):
|
||||||
|
|
|
@ -294,7 +294,7 @@ def ItemAfter(item, options):
|
||||||
for link in content.xpath('//a'):
|
for link in content.xpath('//a'):
|
||||||
log(link.text_content())
|
log(link.text_content())
|
||||||
link.drop_tag()
|
link.drop_tag()
|
||||||
item.content = lxml.etree.tostring(content)
|
item.content = lxml.etree.tostring(content, method='html')
|
||||||
|
|
||||||
if options.noref:
|
if options.noref:
|
||||||
item.link = ''
|
item.link = ''
|
||||||
|
@ -612,7 +612,7 @@ def cgi_get(environ, start_response):
|
||||||
for elem in html.xpath('//'+tag):
|
for elem in html.xpath('//'+tag):
|
||||||
elem.getparent().remove(elem)
|
elem.getparent().remove(elem)
|
||||||
|
|
||||||
output = lxml.etree.tostring(html.getroottree(), encoding='utf-8')
|
output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
|
||||||
|
|
||||||
elif options.get == 'article':
|
elif options.get == 'article':
|
||||||
output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
|
output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
|
||||||
|
|
|
@ -341,7 +341,7 @@ def get_article(data, url=None, encoding_in=None, encoding_out='unicode', debug=
|
||||||
if url:
|
if url:
|
||||||
best.make_links_absolute(url)
|
best.make_links_absolute(url)
|
||||||
|
|
||||||
return lxml.etree.tostring(best if not debug else html, pretty_print=True, encoding=encoding_out)
|
return lxml.etree.tostring(best if not debug else html, method='html', encoding=encoding_out)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue