From 53c9b07d19ec3176d48c350b2a7b928d183e8964 Mon Sep 17 00:00:00 2001 From: pictuga Date: Tue, 22 Oct 2013 20:55:24 +0200 Subject: [PATCH] Split Fill() into Fix and Fill Fix improves links and suck things --- morss.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/morss.py b/morss.py index 3586657..9b637fe 100644 --- a/morss.py +++ b/morss.py @@ -242,14 +242,17 @@ def decodeHTML(data, con=None): log(enc) return data.decode(enc, 'replace') -def Fill(item, cache, feedurl='/', fast=False): - """ Returns True when it has done its best """ +def Fix(item, feedurl='/'): + """ Improves feed items (absolute links, resolve feedburner links, etc) """ + # check unwanted uppercase title + if len(item.title) > 20 and item.title.isupper(): + item.title = item.title.title() + + # check if it includes link if not item.link: log('no link') - return True - - log(item.link) + return item # check relative urls item.link = urlparse.urljoin(feedurl, item.link) @@ -285,9 +288,16 @@ def Fill(item, cache, feedurl='/', fast=False): item.link = match[0] log(item.link) - # check unwanted uppercase title - if len(item.title) > 20 and item.title.isupper(): - item.title = item.title.title() + return item + +def Fill(item, cache, feedurl='/', fast=False): + """ Returns True when it has done its best """ + + if not item.link: + log('no link') + return item + + log(item.link) # content already provided? count_content = countWord(item.content) @@ -433,6 +443,7 @@ def Gather(url, cachePath, options): # set startTime = time.time() for i, item in enumerate(rss.items): + item = Fix(item, url) if 'progress' in options: if MAX_ITEM == 0: print '%s/%s' % (i+1, size)