From a8ac2ed1caddd7af7af196902d0c9c034d649432 Mon Sep 17 00:00:00 2001 From: pictuga Date: Tue, 28 Feb 2017 23:24:32 -1000 Subject: [PATCH] Turn FeedBefore/After into ItemBefore/After To reduce the number of loops --- morss/morss.py | 112 +++++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 55 deletions(-) diff --git a/morss/morss.py b/morss/morss.py index 36711fa..1d917dd 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -294,6 +294,55 @@ def ItemFill(item, options, feedurl='/', fast=False): return True +def ItemBefore(item, options): + # return None if item deleted + + if options.empty: + item.remove() + return None + + if options.search: + if options.search not in item.title: + item.remove() + return None + + return item + + +def ItemAfter(item, options): + if options.strip: + del item.desc + del item.content + + if options.clip and item.desc and item.content: + item.content = item.desc + "

* * *


" + item.content + del item.desc + + if not options.keep and not options.proxy: + del item.desc + + if options.nolink and item.content: + content = lxml.html.fromstring(item.content) + for link in content.xpath('//a'): + log(link.text_content()) + link.drop_tag() + item.content = lxml.etree.tostring(content) + + if options.noref: + item.link = '' + + if options.md: + conv = HTML2Text(baseurl=item.link) + conv.unicode_snob = True + + if item.desc: + item.desc = conv.handle(item.desc) + if item.content: + item.content = conv.handle(item.content) + + return item + + def FeedFetch(url, options): # basic url clean-up if url is None: @@ -397,6 +446,11 @@ def FeedGather(rss, url, options): item.remove() return + item = ItemBefore(item, options) + + if item is None: + return + item = ItemFix(item, url) if time.time() - start_time > max_time >= 0 or i + 1 > max_item >= 0: @@ -404,10 +458,13 @@ def FeedGather(rss, url, options): if ItemFill(item, options, url, True) is False: item.remove() return + else: if not options.proxy: ItemFill(item, options, url) + item = ItemAfter(item, options) + queue = Queue() for i in range(threads): @@ -437,55 +494,6 @@ def FeedGather(rss, url, options): return rss -def FeedBefore(rss, options): - for i, item in enumerate(list(rss.items)): - if options.empty: - item.remove() - continue - - if options.search: - if options.search not in item.title: - item.remove() - continue - - return rss - - -def FeedAfter(rss, options): - for i, item in enumerate(list(rss.items)): - if options.strip: - del item.desc - del item.content - - if options.clip and item.desc and item.content: - item.content = item.desc + "

* * *


" + item.content - del item.desc - - if not options.keep and not options.proxy: - del item.desc - - if options.nolink and item.content: - content = lxml.html.fromstring(item.content) - for link in content.xpath('//a'): - log(link.text_content()) - link.drop_tag() - item.content = lxml.etree.tostring(content) - - if options.noref: - item.link = '' - - if options.md: - conv = HTML2Text(baseurl=item.link) - conv.unicode_snob = True - - if item.desc: - item.desc = conv.handle(item.desc) - if item.content: - item.content = conv.handle(item.content) - - return rss - - def FeedFormat(rss, options): if options.callback: if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None: @@ -515,9 +523,7 @@ def process(url, cache=None, options=None): options = Options(options) if cache: crawler.sqlite_default = cache rss = FeedFetch(url, options) - rss = FeedBefore(rss, options) rss = FeedGather(rss, url, options) - rss = FeedAfter(rss, options) return FeedFormat(rss, options) @@ -579,9 +585,7 @@ def cgi_app(environ, start_response): start_response(headers['status'], list(headers.items())) - rss = FeedBefore(rss, options) rss = FeedGather(rss, url, options) - rss = FeedAfter(rss, options) out = FeedFormat(rss, options) if not options.silent: @@ -648,9 +652,7 @@ def cli_app(): crawler.sqlite_default = os.path.expanduser('~/.cache/morss-cache.db') rss = FeedFetch(url, options) - rss = FeedBefore(rss, options) rss = FeedGather(rss, url, options) - rss = FeedAfter(rss, options) out = FeedFormat(rss, options) if not options.silent: