parent
036e5190f1
commit
7e45b2611d
|
@ -85,7 +85,6 @@ The arguments are:
|
||||||
- `noref`: drop items' link
|
- `noref`: drop items' link
|
||||||
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
||||||
- `debug`: to have some feedback from the script execution. Useful for debugging
|
- `debug`: to have some feedback from the script execution. Useful for debugging
|
||||||
- `mono`: disable multithreading while fetching, makes debugging easier
|
|
||||||
- `theforce`: force download the rss feed and ignore cached http errros
|
- `theforce`: force download the rss feed and ignore cached http errros
|
||||||
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
|
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
|
||||||
- http server only
|
- http server only
|
||||||
|
@ -262,7 +261,7 @@ morss uses caching to make loading faster. There are 2 possible cache backends
|
||||||
|
|
||||||
- `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will
|
- `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will
|
||||||
be cleared every time the program is run
|
be cleared every time the program is run
|
||||||
- `MySQLCacheHandler`: /!\ Does NOT support multi-threading
|
- `MySQLCacheHandler`
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
### Length limitation
|
### Length limitation
|
||||||
|
@ -281,7 +280,6 @@ different values at the top of the script.
|
||||||
|
|
||||||
- `DELAY` sets the browser cache delay, only for HTTP clients
|
- `DELAY` sets the browser cache delay, only for HTTP clients
|
||||||
- `TIMEOUT` sets the HTTP timeout when fetching rss feeds and articles
|
- `TIMEOUT` sets the HTTP timeout when fetching rss feeds and articles
|
||||||
- `THREADS` sets the number of threads to use. `1` makes no use of multithreading.
|
|
||||||
|
|
||||||
### Content matching
|
### Content matching
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,6 @@ import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
|
|
||||||
import threading
|
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -25,13 +23,11 @@ import cgitb
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from Queue import Queue
|
|
||||||
from httplib import HTTPException
|
from httplib import HTTPException
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from urlparse import urlparse, urljoin, parse_qs
|
from urlparse import urlparse, urljoin, parse_qs
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from queue import Queue
|
|
||||||
from http.client import HTTPException
|
from http.client import HTTPException
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
from urllib.parse import urlparse, urljoin, parse_qs
|
from urllib.parse import urlparse, urljoin, parse_qs
|
||||||
|
@ -374,35 +370,22 @@ def FeedGather(rss, url, options):
|
||||||
lim_time = LIM_TIME
|
lim_time = LIM_TIME
|
||||||
max_item = MAX_ITEM
|
max_item = MAX_ITEM
|
||||||
max_time = MAX_TIME
|
max_time = MAX_TIME
|
||||||
threads = THREADS
|
|
||||||
|
|
||||||
if options.cache:
|
if options.cache:
|
||||||
max_time = 0
|
max_time = 0
|
||||||
|
|
||||||
if options.mono:
|
now = datetime.now(tz.tzutc())
|
||||||
threads = 1
|
sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True)
|
||||||
|
for i, item in enumerate(sorted_items):
|
||||||
# set
|
|
||||||
def runner(queue):
|
|
||||||
while True:
|
|
||||||
value = queue.get()
|
|
||||||
try:
|
|
||||||
worker(*value)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
log('Thread Error: %s' % e.message)
|
|
||||||
queue.task_done()
|
|
||||||
|
|
||||||
def worker(i, item):
|
|
||||||
if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0:
|
if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0:
|
||||||
log('dropped')
|
log('dropped')
|
||||||
item.remove()
|
item.remove()
|
||||||
return
|
continue
|
||||||
|
|
||||||
item = ItemBefore(item, options)
|
item = ItemBefore(item, options)
|
||||||
|
|
||||||
if item is None:
|
if item is None:
|
||||||
return
|
continue
|
||||||
|
|
||||||
item = ItemFix(item, url)
|
item = ItemFix(item, url)
|
||||||
|
|
||||||
|
@ -410,7 +393,7 @@ def FeedGather(rss, url, options):
|
||||||
if not options.proxy:
|
if not options.proxy:
|
||||||
if ItemFill(item, options, url, True) is False:
|
if ItemFill(item, options, url, True) is False:
|
||||||
item.remove()
|
item.remove()
|
||||||
return
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if not options.proxy:
|
if not options.proxy:
|
||||||
|
@ -418,25 +401,6 @@ def FeedGather(rss, url, options):
|
||||||
|
|
||||||
item = ItemAfter(item, options)
|
item = ItemAfter(item, options)
|
||||||
|
|
||||||
queue = Queue()
|
|
||||||
|
|
||||||
for i in range(threads):
|
|
||||||
t = threading.Thread(target=runner, args=(queue,))
|
|
||||||
t.daemon = True
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
now = datetime.now(tz.tzutc())
|
|
||||||
sorted_items = sorted(rss.items, key=lambda x:x.updated or x.time or now, reverse=True)
|
|
||||||
for i, item in enumerate(sorted_items):
|
|
||||||
if threads == 1:
|
|
||||||
worker(*[i, item])
|
|
||||||
|
|
||||||
else:
|
|
||||||
queue.put([i, item])
|
|
||||||
|
|
||||||
if threads != 1:
|
|
||||||
queue.join()
|
|
||||||
|
|
||||||
if options.ad:
|
if options.ad:
|
||||||
new = rss.items.append()
|
new = rss.items.append()
|
||||||
new.title = "Are you hungry?"
|
new.title = "Are you hungry?"
|
||||||
|
|
Loading…
Reference in New Issue