Improve options and limits
New limits are possible: time limit, max number of item fetched, and max number of item taken from cache. Fill third argument is now Fast=True, which is self-explicit. (Complexity of the changes made separate commits impossible).master
parent
2a71fe07f2
commit
b78f0bfba5
45
morss.py
45
morss.py
|
@ -22,9 +22,11 @@ import urlparse
|
|||
|
||||
from readability import readability
|
||||
|
||||
MAX = 70
|
||||
DELAY=10
|
||||
TIMEOUT = 2
|
||||
LIM_ITEM = 100 # deletes what's beyond
|
||||
MAX_ITEM = 50 # cache-only beyond
|
||||
MAX_TIME = 7 # cache-only after
|
||||
DELAY = 10 # xml cache
|
||||
TIMEOUT = 2 # http timeout
|
||||
|
||||
OPTIONS = ['progress', 'cache']
|
||||
|
||||
|
@ -317,7 +319,7 @@ def EncDownload(url):
|
|||
|
||||
return (data, enc, con.geturl())
|
||||
|
||||
def Fill(rss, cache, feedurl="/", mode='feed'):
|
||||
def Fill(rss, cache, feedurl="/", fast=False):
|
||||
""" Returns True when it has done its best """
|
||||
|
||||
item = XMLMap(rss, ITEM_MAP, True)
|
||||
|
@ -368,7 +370,8 @@ def Fill(rss, cache, feedurl="/", mode='feed'):
|
|||
return True
|
||||
|
||||
# super-fast mode
|
||||
if mode == 'cache':
|
||||
if fast:
|
||||
log('skipped')
|
||||
return False
|
||||
|
||||
# download
|
||||
|
@ -413,16 +416,27 @@ def Gather(url, cachePath, mode='feed'):
|
|||
rss = lxml.objectify.fromstring(xml)
|
||||
root = rss.channel if hasattr(rss, 'channel') else rss
|
||||
root = XMLMap(root, RSS_MAP)
|
||||
size = len(root.item)
|
||||
|
||||
# set
|
||||
if MAX:
|
||||
for item in root.item[MAX:]:
|
||||
item.getparent().remove(item)
|
||||
for i,item in enumerate(root.item):
|
||||
startTime = time.time()
|
||||
for i, item in enumerate(root.item):
|
||||
if mode == 'progress':
|
||||
print "%s/%s" % (i+1, len(root.item))
|
||||
if MAX_ITEM == 0:
|
||||
print "%s/%s" % (i+1, size)
|
||||
else:
|
||||
print "%s/%s" % (i+1, min(MAX_ITEM, size))
|
||||
sys.stdout.flush()
|
||||
Fill(item, cache, mode)
|
||||
|
||||
if i+1 > LIM_ITEM > 0:
|
||||
item.getparent().remove(item)
|
||||
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
|
||||
if Fill(item, cache, url, True) is False:
|
||||
item.getparent().remove(item)
|
||||
else:
|
||||
Fill(item, cache, url)
|
||||
|
||||
log(len(root.item))
|
||||
|
||||
return root.tostring(xml_declaration=True, encoding='UTF-8')
|
||||
|
||||
|
@ -439,7 +453,6 @@ if __name__ == "__main__":
|
|||
|
||||
cache = os.getcwd() + '/cache'
|
||||
log(url)
|
||||
RSS = Gather(url, cache, options)
|
||||
else:
|
||||
url, options = parseOptions(OPTIONS)
|
||||
|
||||
|
@ -448,7 +461,13 @@ if __name__ == "__main__":
|
|||
sys.exit(1)
|
||||
|
||||
cache = os.path.expanduser('~') + '/.cache/morss'
|
||||
RSS = Gather(url, cache, options)
|
||||
|
||||
if options == 'progress':
|
||||
MAX_TIME = -1
|
||||
if options == 'cache':
|
||||
MAX_TIME = 0
|
||||
|
||||
RSS = Gather(url, cache, options)
|
||||
|
||||
if RSS is not False and options != 'progress':
|
||||
if 'REQUEST_URI' in os.environ or not os.getenv('DEBUG', False):
|
||||
|
|
Loading…
Reference in New Issue