readabilite: improve score for <p>

Helps a lot with bbc, le monde. Might backfire on other websites tho...
This commit is contained in:
pictuga 2017-03-01 18:02:45 -10:00
parent a8ac2ed1ca
commit 3fc89d5359

@ -63,6 +63,9 @@ def score_node(node):
if node.tag in ['h1', 'h2', 'article']:
score += 8
if node.tag in ['p']:
score += 3
class_id = node.get('class', '') + node.get('id', '')
score += len(regex_good.findall(class_id) * 4)