readabilite: remove code duplicate

master
pictuga 1 year ago
parent fe5dbf1ce0
commit e81f6b173f

@ -207,8 +207,10 @@ def clean_root(root, keep_threshold=None):
def clean_node(node, keep_threshold=None):
parent = node.getparent()
# remove comments
if (isinstance(node, lxml.html.HtmlComment)
or isinstance(node, lxml.html.HtmlProcessingInstruction)):
parent.remove(node)
return
if parent is None:
@ -242,11 +244,6 @@ def clean_node(node, keep_threshold=None):
parent.remove(node)
return
# remove comments
if isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction):
parent.remove(node)
return
# remove if too many kids & too high link density
wc = count_words(node.text_content())
if wc != 0 and len(list(node.iter())) > 3:

Loading…
Cancel
Save