readabilite: run the new cleaning code
parent
58fe5243af
commit
7d005e9a65
|
@ -132,6 +132,12 @@ def write_score_all(root, grades):
|
||||||
node.attrib['score'] = str(int(grades.get(node, 0)))
|
node.attrib['score'] = str(int(grades.get(node, 0)))
|
||||||
|
|
||||||
|
|
||||||
|
def clean_root(root):
|
||||||
|
for node in list(root):
|
||||||
|
clean_root(node)
|
||||||
|
clean_node(node)
|
||||||
|
|
||||||
|
|
||||||
def clean_node(node):
|
def clean_node(node):
|
||||||
parent = node.getparent()
|
parent = node.getparent()
|
||||||
|
|
||||||
|
@ -277,4 +283,6 @@ def get_article(data, url=None, encoding=None):
|
||||||
if url:
|
if url:
|
||||||
best.make_links_absolute(url)
|
best.make_links_absolute(url)
|
||||||
|
|
||||||
|
clean_root(best)
|
||||||
|
|
||||||
return lxml.etree.tostring(best, pretty_print=True)
|
return lxml.etree.tostring(best, pretty_print=True)
|
||||||
|
|
Loading…
Reference in New Issue