readabilite: further html processing instructions fix

master
pictuga 2020-03-21 17:23:50 +01:00
parent fbcb23cf88
commit a7b01ee85e
1 changed files with 1 additions and 0 deletions

View File

@ -93,6 +93,7 @@ def score_node(node):
class_id = node.get('class', '') + node.get('id', '') class_id = node.get('class', '') + node.get('id', '')
if (isinstance(node, lxml.html.HtmlComment) if (isinstance(node, lxml.html.HtmlComment)
or isinstance(node, lxml.html.HtmlProcessingInstruction)
or node.tag in tags_bad or node.tag in tags_bad
or regex_bad.search(class_id)): or regex_bad.search(class_id)):
return 0 return 0