readabilite: remove code duplicate
This commit is contained in:
		@@ -207,8 +207,10 @@ def clean_root(root, keep_threshold=None):
 | 
			
		||||
def clean_node(node, keep_threshold=None):
 | 
			
		||||
    parent = node.getparent()
 | 
			
		||||
 | 
			
		||||
    # remove comments
 | 
			
		||||
    if (isinstance(node, lxml.html.HtmlComment)
 | 
			
		||||
            or isinstance(node, lxml.html.HtmlProcessingInstruction)):
 | 
			
		||||
        parent.remove(node)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    if parent is None:
 | 
			
		||||
@@ -242,11 +244,6 @@ def clean_node(node, keep_threshold=None):
 | 
			
		||||
        parent.remove(node)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    # remove comments
 | 
			
		||||
    if isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction):
 | 
			
		||||
        parent.remove(node)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    # remove if too many kids & too high link density
 | 
			
		||||
    wc = count_words(node.text_content())
 | 
			
		||||
    if wc != 0 and len(list(node.iter())) > 3:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user