Improve options and limits
New limits are possible: time limit, max number of item fetched, and max number of item taken from cache. Fill third argument is now Fast=True, which is self-explicit. (Complexity of the changes made separate commits impossible).master
parent
2a71fe07f2
commit
b78f0bfba5
45
morss.py
45
morss.py
|
@ -22,9 +22,11 @@ import urlparse
|
||||||
|
|
||||||
from readability import readability
|
from readability import readability
|
||||||
|
|
||||||
MAX = 70
|
LIM_ITEM = 100 # deletes what's beyond
|
||||||
DELAY=10
|
MAX_ITEM = 50 # cache-only beyond
|
||||||
TIMEOUT = 2
|
MAX_TIME = 7 # cache-only after
|
||||||
|
DELAY = 10 # xml cache
|
||||||
|
TIMEOUT = 2 # http timeout
|
||||||
|
|
||||||
OPTIONS = ['progress', 'cache']
|
OPTIONS = ['progress', 'cache']
|
||||||
|
|
||||||
|
@ -317,7 +319,7 @@ def EncDownload(url):
|
||||||
|
|
||||||
return (data, enc, con.geturl())
|
return (data, enc, con.geturl())
|
||||||
|
|
||||||
def Fill(rss, cache, feedurl="/", mode='feed'):
|
def Fill(rss, cache, feedurl="/", fast=False):
|
||||||
""" Returns True when it has done its best """
|
""" Returns True when it has done its best """
|
||||||
|
|
||||||
item = XMLMap(rss, ITEM_MAP, True)
|
item = XMLMap(rss, ITEM_MAP, True)
|
||||||
|
@ -368,7 +370,8 @@ def Fill(rss, cache, feedurl="/", mode='feed'):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# super-fast mode
|
# super-fast mode
|
||||||
if mode == 'cache':
|
if fast:
|
||||||
|
log('skipped')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# download
|
# download
|
||||||
|
@ -413,16 +416,27 @@ def Gather(url, cachePath, mode='feed'):
|
||||||
rss = lxml.objectify.fromstring(xml)
|
rss = lxml.objectify.fromstring(xml)
|
||||||
root = rss.channel if hasattr(rss, 'channel') else rss
|
root = rss.channel if hasattr(rss, 'channel') else rss
|
||||||
root = XMLMap(root, RSS_MAP)
|
root = XMLMap(root, RSS_MAP)
|
||||||
|
size = len(root.item)
|
||||||
|
|
||||||
# set
|
# set
|
||||||
if MAX:
|
startTime = time.time()
|
||||||
for item in root.item[MAX:]:
|
for i, item in enumerate(root.item):
|
||||||
item.getparent().remove(item)
|
|
||||||
for i,item in enumerate(root.item):
|
|
||||||
if mode == 'progress':
|
if mode == 'progress':
|
||||||
print "%s/%s" % (i+1, len(root.item))
|
if MAX_ITEM == 0:
|
||||||
|
print "%s/%s" % (i+1, size)
|
||||||
|
else:
|
||||||
|
print "%s/%s" % (i+1, min(MAX_ITEM, size))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
Fill(item, cache, mode)
|
|
||||||
|
if i+1 > LIM_ITEM > 0:
|
||||||
|
item.getparent().remove(item)
|
||||||
|
elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
|
||||||
|
if Fill(item, cache, url, True) is False:
|
||||||
|
item.getparent().remove(item)
|
||||||
|
else:
|
||||||
|
Fill(item, cache, url)
|
||||||
|
|
||||||
|
log(len(root.item))
|
||||||
|
|
||||||
return root.tostring(xml_declaration=True, encoding='UTF-8')
|
return root.tostring(xml_declaration=True, encoding='UTF-8')
|
||||||
|
|
||||||
|
@ -439,7 +453,6 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
cache = os.getcwd() + '/cache'
|
cache = os.getcwd() + '/cache'
|
||||||
log(url)
|
log(url)
|
||||||
RSS = Gather(url, cache, options)
|
|
||||||
else:
|
else:
|
||||||
url, options = parseOptions(OPTIONS)
|
url, options = parseOptions(OPTIONS)
|
||||||
|
|
||||||
|
@ -448,7 +461,13 @@ if __name__ == "__main__":
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
cache = os.path.expanduser('~') + '/.cache/morss'
|
cache = os.path.expanduser('~') + '/.cache/morss'
|
||||||
RSS = Gather(url, cache, options)
|
|
||||||
|
if options == 'progress':
|
||||||
|
MAX_TIME = -1
|
||||||
|
if options == 'cache':
|
||||||
|
MAX_TIME = 0
|
||||||
|
|
||||||
|
RSS = Gather(url, cache, options)
|
||||||
|
|
||||||
if RSS is not False and options != 'progress':
|
if RSS is not False and options != 'progress':
|
||||||
if 'REQUEST_URI' in os.environ or not os.getenv('DEBUG', False):
|
if 'REQUEST_URI' in os.environ or not os.getenv('DEBUG', False):
|
||||||
|
|
Loading…
Reference in New Issue