From e136b0feb2d3d151fbbb239cc779853d57a85ffc Mon Sep 17 00:00:00 2001
From: pictuga <contact@pictuga.com>
Date: Sun, 5 Apr 2020 20:47:30 +0200
Subject: [PATCH] readabilite: loosen the slayer

Previous impl. lead to too many empty results
---
 morss/readabilite.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/morss/readabilite.py b/morss/readabilite.py
index 0768315..1bfdd2a 100644
--- a/morss/readabilite.py
+++ b/morss/readabilite.py
@@ -63,7 +63,7 @@ regex_good = re.compile('|'.join(class_good), re.I)
 
 tags_junk = ['script', 'head', 'iframe', 'object', 'noscript',
     'param', 'embed', 'layer', 'applet', 'style', 'form', 'input', 'textarea',
-    'button', 'footer']
+    'button', 'footer', 'link', 'meta']
 
 tags_bad = tags_junk + ['a', 'aside']
 
@@ -94,11 +94,18 @@ def score_node(node):
     class_id = node.get('class', '') + node.get('id', '')
 
     if (isinstance(node, lxml.html.HtmlComment)
-            or isinstance(node, lxml.html.HtmlProcessingInstruction)
-            or node.tag in tags_bad
-            or regex_bad.search(class_id)):
+            or isinstance(node, lxml.html.HtmlProcessingInstruction)):
         return 0
 
+    if node.tag in tags_junk:
+        score += -1 # actuall -2 as tags_junk is included tags_bad
+
+    if node.tag in tags_bad:
+        score += -1
+
+    if regex_bad.search(class_id):
+        score += -1
+
     if node.tag in tags_good:
         score += 4
 
@@ -126,7 +133,7 @@ def score_all(node, grades=None):
         score = score_node(child)
         child.attrib['seen'] = 'yes, ' + str(int(score))
 
-        if score > 0:
+        if score > 0 or not len(grades):
             spread_score(child, score, grades)
             score_all(child, grades)