Thread Gather()

Twice faster. Ctrl-C no longer works in terminal. Don't know how to fix it. Using threading.active_count() or sth didn't work as expected. New settings (THREADS) to change the number of threads to use.
master
pictuga 2013-11-24 20:52:53 +01:00
parent 8832611a05
commit ecc18dc4ab
1 changed files with 29 additions and 4 deletions

View File

@ -4,6 +4,9 @@ import os
import os.path
import time
import Queue
import threading
from fnmatch import fnmatch
from base64 import b64encode, b64decode
import re
@ -33,6 +36,7 @@ MAX_ITEM = 50 # cache-only beyond
MAX_TIME = 7 # cache-only after (in sec)
DELAY = 10*60 # xml cache & ETag cache (in sec)
TIMEOUT = 2 # http timeout (in sec)
THREADS = 10 # number of threads (1 for single-threaded)
DEBUG = False
HOLD = False
@ -497,10 +501,16 @@ def Gather(url, cachePath, options):
return False
size = len(rss.items)
startTime = time.time()
# set
startTime = time.time()
for i, item in enumerate(rss.items):
def runner(queue):
while True:
worker(*queue.get())
queue.task_done()
def worker(i, item):
if options.progress:
if MAX_ITEM == -1:
print '%s/%s' % (i+1, size)
@ -511,7 +521,7 @@ def Gather(url, cachePath, options):
if time.time() - startTime > LIM_TIME >= 0 or i+1 > LIM_ITEM >= 0:
log('dropped')
item.remove()
continue
return
item = Fix(item, url)
@ -519,7 +529,7 @@ def Gather(url, cachePath, options):
if not options.proxy:
if Fill(item, cache, url, True) is False:
item.remove()
continue
return
else:
if not options.proxy:
Fill(item, cache, url)
@ -531,6 +541,19 @@ def Gather(url, cachePath, options):
if not options.keep:
del item.desc
queue = Queue.Queue()
for i in range(THREADS):
t = threading.Thread(target=runner, args=(queue,))
t.daemon = True
t.start()
for i, item in enumerate(rss.items):
queue.put([i, item])
queue.join()
cache.save()
log(len(rss.items))
log(time.time() - startTime)
@ -584,6 +607,8 @@ if __name__ == '__main__':
if 'o'+user_id not in token:
facebook.set('o'+user_id, ltoken)
facebook.save()
if 'REQUEST_URI' in os.environ:
print 'Status: 200'
print 'Content-Type: text/plain'