readabilite: shift "good" tags to a var (list)
So that this list can later be re-used
This commit is contained in:
		@@ -42,11 +42,12 @@ regex_good = re.compile('|'.join(['and', 'article', 'body', 'column',
 | 
			
		||||
      'main', 'shadow', 'content', 'entry', 'hentry', 'main', 'page',
 | 
			
		||||
      'pagination', 'post', 'text', 'blog', 'story', 'par']), re.I)
 | 
			
		||||
 | 
			
		||||
tags_junk = ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style']
 | 
			
		||||
 | 
			
		||||
def score_node(node):
 | 
			
		||||
    score = 0
 | 
			
		||||
 | 
			
		||||
    if node.tag in ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style']:
 | 
			
		||||
    if node.tag in tags_junk:
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
    if isinstance(node, lxml.html.HtmlComment):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user