readabilite: remove code duplicate
This commit is contained in:
		@@ -207,8 +207,10 @@ def clean_root(root, keep_threshold=None):
 | 
				
			|||||||
def clean_node(node, keep_threshold=None):
 | 
					def clean_node(node, keep_threshold=None):
 | 
				
			||||||
    parent = node.getparent()
 | 
					    parent = node.getparent()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # remove comments
 | 
				
			||||||
    if (isinstance(node, lxml.html.HtmlComment)
 | 
					    if (isinstance(node, lxml.html.HtmlComment)
 | 
				
			||||||
            or isinstance(node, lxml.html.HtmlProcessingInstruction)):
 | 
					            or isinstance(node, lxml.html.HtmlProcessingInstruction)):
 | 
				
			||||||
 | 
					        parent.remove(node)
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if parent is None:
 | 
					    if parent is None:
 | 
				
			||||||
@@ -242,11 +244,6 @@ def clean_node(node, keep_threshold=None):
 | 
				
			|||||||
        parent.remove(node)
 | 
					        parent.remove(node)
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # remove comments
 | 
					 | 
				
			||||||
    if isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction):
 | 
					 | 
				
			||||||
        parent.remove(node)
 | 
					 | 
				
			||||||
        return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # remove if too many kids & too high link density
 | 
					    # remove if too many kids & too high link density
 | 
				
			||||||
    wc = count_words(node.text_content())
 | 
					    wc = count_words(node.text_content())
 | 
				
			||||||
    if wc != 0 and len(list(node.iter())) > 3:
 | 
					    if wc != 0 and len(list(node.iter())) > 3:
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user