From a7b01ee85e0d24e2d9cf0c711c6db6b46707842c Mon Sep 17 00:00:00 2001 From: pictuga Date: Sat, 21 Mar 2020 17:23:50 +0100 Subject: [PATCH] readabilite: further html processing instructions fix --- morss/readabilite.py | 1 + 1 file changed, 1 insertion(+) diff --git a/morss/readabilite.py b/morss/readabilite.py index 6498a77..3412a57 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -93,6 +93,7 @@ def score_node(node): class_id = node.get('class', '') + node.get('id', '') if (isinstance(node, lxml.html.HtmlComment) + or isinstance(node, lxml.html.HtmlProcessingInstruction) or node.tag in tags_bad or regex_bad.search(class_id)): return 0