From 4aa25bf3d853d43d4e618e2197fa573851951b8c Mon Sep 17 00:00:00 2001 From: pictuga Date: Fri, 24 Mar 2017 21:50:46 -1000 Subject: [PATCH] readabilite: clean_html before scoring Surprisingly efficient --- morss/readabilite.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/morss/readabilite.py b/morss/readabilite.py index e05f68b..fcc13e9 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -208,13 +208,13 @@ def get_best_node(grades): def get_article(data, url=None, encoding=None): html = parse(data, encoding) + + clean_html(html) br2p(html) scores = score_all(html) best = get_best_node(scores) - clean_html(best) - if url: best.make_links_absolute(url)