From e71fc967ceac3041e97accebcf6e7885bf590589 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sat, 25 Feb 2017 18:07:28 -1000 Subject: [PATCH] readabilite: shift "good" tags to a var (list) So that this list can later be re-used --- morss/readabilite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/morss/readabilite.py b/morss/readabilite.py index 6f1e7fb..506214a 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -42,11 +42,12 @@ regex_good = re.compile('|'.join(['and', 'article', 'body', 'column', 'main', 'shadow', 'content', 'entry', 'hentry', 'main', 'page', 'pagination', 'post', 'text', 'blog', 'story', 'par']), re.I) +tags_junk = ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style'] def score_node(node): score = 0 - if node.tag in ['script', 'head', 'iframe', 'object', 'noscript', 'param', 'embed', 'layer', 'applet', 'style']: + if node.tag in tags_junk: return 0 if isinstance(node, lxml.html.HtmlComment):