diff --git a/morss/readabilite.py b/morss/readabilite.py index 7747dd7..becea1d 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -207,8 +207,10 @@ def clean_root(root, keep_threshold=None): def clean_node(node, keep_threshold=None): parent = node.getparent() + # remove comments if (isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction)): + parent.remove(node) return if parent is None: @@ -242,11 +244,6 @@ def clean_node(node, keep_threshold=None): parent.remove(node) return - # remove comments - if isinstance(node, lxml.html.HtmlComment) or isinstance(node, lxml.html.HtmlProcessingInstruction): - parent.remove(node) - return - # remove if too many kids & too high link density wc = count_words(node.text_content()) if wc != 0 and len(list(node.iter())) > 3: