From c86572374e35ee88f114b2daeb6ddaeac00d6782 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sat, 25 Apr 2020 12:24:36 +0200 Subject: [PATCH] readabilite: minimum score requirement --- morss/readabilite.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/morss/readabilite.py b/morss/readabilite.py index 7cafcea..0f39144 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -307,14 +307,14 @@ def get_best_node(ranked_grades): return lowest -def get_article(data, url=None, encoding=None, debug=False): +def get_article(data, url=None, encoding=None, debug=False, threshold=5): " Input a raw html string, returns a raw html string of the article " html = parse(data, encoding) score_all(html) scores = rank_grades(get_all_scores(html)) - if not len(scores): + if not len(scores) or scores[0][1] < threshold: return None best = get_best_node(scores)