readabilite: shift "good" tags to a var (list)

So that this list can later be re-used
master
pictuga 2017-02-25 18:07:28 -10:00
parent b14381f575
commit e71fc967ce
1 changed files with 2 additions and 1 deletions

View File

@ -42,11 +42,12 @@ regex_good = re.compile('|'.join(['and', 'article', 'body', 'column',
'main', 'shadow', 'content', 'entry', 'hentry', 'main', 'page',
'pagination', 'post', 'text', 'blog', 'story', 'par']), re.I)
tags_junk = ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style']
def score_node(node):
score = 0
if node.tag in ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style']:
if node.tag in tags_junk:
return 0
if isinstance(node, lxml.html.HtmlComment):