3 changed files with 8 additions and 16 deletions
--- a/morss/feeds.py
+++ b/morss/feeds.py
@ -442,7 +442,7 @@ class ParserHTML(ParserXML):

    def parse(self, raw):
        parser = etree.HTMLParser(remove_blank_text=True) # remove_blank_text needed for pretty_print
-        return etree.fromstring(BeautifulSoup(raw, 'lxml').prettify('utf-8'), parser)
+        return etree.fromstring(BeautifulSoup(raw, 'lxml').prettify(), parser)

    def tostring(self, encoding='unicode', **k):
        return lxml.html.tostring(self.root, encoding=encoding, **k)
--- a/morss/morss.py
+++ b/morss/morss.py
@ -720,7 +720,7 @@ def cgi_error_handler(environ, start_response, app):
@middleware
 def cgi_encode(environ, start_response, app):
    out = app(environ, start_response)
-    return [x if isinstance(x, bytes) else str(x).encode('utf-8') for x in out]
+    return [x if isinstance(x, bytes) else x.encode('utf-8') for x in out]


 def cli_app():
--- a/morss/readabilite.py
+++ b/morss/readabilite.py
@ -1,6 +1,5 @@
 import lxml.etree
 import lxml.html
-from bs4 import BeautifulSoup
 import re


@ -10,7 +9,7 @@ def parse(data, encoding=None):
    else:
        parser = lxml.html.HTMLParser(remove_blank_text=True, remove_comments=True)

-    return lxml.html.fromstring(BeautifulSoup(data, 'lxml').prettify('utf-8'), parser=parser)
+    return lxml.html.fromstring(data, parser=parser)


 def count_words(string):
@ -63,7 +62,7 @@ regex_good = re.compile('|'.join(class_good), re.I)

 tags_junk = ['script', 'head', 'iframe', 'object', 'noscript',
    'param', 'embed', 'layer', 'applet', 'style', 'form', 'input', 'textarea',
-    'button', 'footer', 'link', 'meta']
+    'button', 'footer']

 tags_bad = tags_junk + ['a', 'aside']

@ -94,18 +93,11 @@ def score_node(node):
    class_id = node.get('class', '') + node.get('id', '')

    if (isinstance(node, lxml.html.HtmlComment)
-            or isinstance(node, lxml.html.HtmlProcessingInstruction)):
+            or isinstance(node, lxml.html.HtmlProcessingInstruction)
+            or node.tag in tags_bad
+            or regex_bad.search(class_id)):
        return 0

-    if node.tag in tags_junk:
-        score += -1 # actuall -2 as tags_junk is included tags_bad
-
-    if node.tag in tags_bad:
-        score += -1
-
-    if regex_bad.search(class_id):
-        score += -1
-
    if node.tag in tags_good:
        score += 4

@ -133,7 +125,7 @@ def score_all(node, grades=None):
        score = score_node(child)
        child.attrib['seen'] = 'yes, ' + str(int(score))

-        if score > 0 or not len(grades):
+        if score > 0:
            spread_score(child, score, grades)
            score_all(child, grades)