From 7e45b2611dcfa0d88ec02278346c7290306849c8 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 19 Apr 2020 12:29:52 +0200 Subject: [PATCH] Disable multi-threading Impact was mostly negative due to locks --- README.md | 4 +--- morss/morss.py | 48 ++++++------------------------------------------ 2 files changed, 7 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index ced0985..450cf81 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,6 @@ The arguments are: - `noref`: drop items' link - `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time - `debug`: to have some feedback from the script execution. Useful for debugging - - `mono`: disable multithreading while fetching, makes debugging easier - `theforce`: force download the rss feed and ignore cached http errros - `silent`: don't output the final RSS (useless on its own, but can be nice when debugging) - http server only @@ -262,7 +261,7 @@ morss uses caching to make loading faster. There are 2 possible cache backends - `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will be cleared every time the program is run -- `MySQLCacheHandler`: /!\ Does NOT support multi-threading +- `MySQLCacheHandler` ## Configuration ### Length limitation @@ -281,7 +280,6 @@ different values at the top of the script. - `DELAY` sets the browser cache delay, only for HTTP clients - `TIMEOUT` sets the HTTP timeout when fetching rss feeds and articles -- `THREADS` sets the number of threads to use. `1` makes no use of multithreading. ### Content matching diff --git a/morss/morss.py b/morss/morss.py index 429c89b..a317457 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -6,8 +6,6 @@ import time from datetime import datetime from dateutil import tz -import threading - from fnmatch import fnmatch import re @@ -25,13 +23,11 @@ import cgitb try: # python 2 - from Queue import Queue from httplib import HTTPException from urllib import unquote from urlparse import urlparse, urljoin, parse_qs except ImportError: # python 3 - from queue import Queue from http.client import HTTPException from urllib.parse import unquote from urllib.parse import urlparse, urljoin, parse_qs @@ -374,35 +370,22 @@ def FeedGather(rss, url, options): lim_time = LIM_TIME max_item = MAX_ITEM max_time = MAX_TIME - threads = THREADS if options.cache: max_time = 0 - if options.mono: - threads = 1 - - # set - def runner(queue): - while True: - value = queue.get() - try: - worker(*value) - - except Exception as e: - log('Thread Error: %s' % e.message) - queue.task_done() - - def worker(i, item): + now = datetime.now(tz.tzutc()) + sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True) + for i, item in enumerate(sorted_items): if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0: log('dropped') item.remove() - return + continue item = ItemBefore(item, options) if item is None: - return + continue item = ItemFix(item, url) @@ -410,7 +393,7 @@ def FeedGather(rss, url, options): if not options.proxy: if ItemFill(item, options, url, True) is False: item.remove() - return + continue else: if not options.proxy: @@ -418,25 +401,6 @@ def FeedGather(rss, url, options): item = ItemAfter(item, options) - queue = Queue() - - for i in range(threads): - t = threading.Thread(target=runner, args=(queue,)) - t.daemon = True - t.start() - - now = datetime.now(tz.tzutc()) - sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True) - for i, item in enumerate(sorted_items): - if threads == 1: - worker(*[i, item]) - - else: - queue.put([i, item]) - - if threads != 1: - queue.join() - if options.ad: new = rss.items.append() new.title = "Are you hungry?"