From ecc18dc4ab3157eacd1811c65ed9fbd87f538d78 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 24 Nov 2013 20:52:53 +0100 Subject: [PATCH] Thread Gather() Twice faster. Ctrl-C no longer works in terminal. Don't know how to fix it. Using threading.active_count() or sth didn't work as expected. New settings (THREADS) to change the number of threads to use. --- morss.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/morss.py b/morss.py index 9fff56b..fe72906 100644 --- a/morss.py +++ b/morss.py @@ -4,6 +4,9 @@ import os import os.path import time +import Queue +import threading + from fnmatch import fnmatch from base64 import b64encode, b64decode import re @@ -33,6 +36,7 @@ MAX_ITEM = 50 # cache-only beyond MAX_TIME = 7 # cache-only after (in sec) DELAY = 10*60 # xml cache & ETag cache (in sec) TIMEOUT = 2 # http timeout (in sec) +THREADS = 10 # number of threads (1 for single-threaded) DEBUG = False HOLD = False @@ -497,10 +501,16 @@ def Gather(url, cachePath, options): return False size = len(rss.items) + startTime = time.time() + # set - startTime = time.time() - for i, item in enumerate(rss.items): + def runner(queue): + while True: + worker(*queue.get()) + queue.task_done() + + def worker(i, item): if options.progress: if MAX_ITEM == -1: print '%s/%s' % (i+1, size) @@ -511,7 +521,7 @@ def Gather(url, cachePath, options): if time.time() - startTime > LIM_TIME >= 0 or i+1 > LIM_ITEM >= 0: log('dropped') item.remove() - continue + return item = Fix(item, url) @@ -519,7 +529,7 @@ def Gather(url, cachePath, options): if not options.proxy: if Fill(item, cache, url, True) is False: item.remove() - continue + return else: if not options.proxy: Fill(item, cache, url) @@ -531,6 +541,19 @@ def Gather(url, cachePath, options): if not options.keep: del item.desc + queue = Queue.Queue() + + for i in range(THREADS): + t = threading.Thread(target=runner, args=(queue,)) + t.daemon = True + t.start() + + for i, item in enumerate(rss.items): + queue.put([i, item]) + + queue.join() + cache.save() + log(len(rss.items)) log(time.time() - startTime) @@ -584,6 +607,8 @@ if __name__ == '__main__': if 'o'+user_id not in token: facebook.set('o'+user_id, ltoken) + facebook.save() + if 'REQUEST_URI' in os.environ: print 'Status: 200' print 'Content-Type: text/plain'