From 3ba74649f6f457759eff62147c8bc700e21ad198 Mon Sep 17 00:00:00 2001 From: pictuga Date: Tue, 10 Sep 2013 15:25:55 +0200 Subject: [PATCH] Test if linked pages are text documents Useful for feeds such as HackerNews --- morss.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/morss.py b/morss.py index 1f48c4b..d8e8739 100644 --- a/morss.py +++ b/morss.py @@ -351,6 +351,11 @@ def Fill(item, cache, feedurl='/', fast=False): cache.set(item.link, 'error-http') return True + if con.info().maintype != 'text': + log('non-text page') + cache.set(item.link, 'error-type') + return True + out = readability.Document(data, url=con.url).summary(True) if countWord(out) > max(count_content, count_desc) > 0: