Improve options and limits
New limits are possible: time limit, max number of item fetched, and max number of item taken from cache. Fill third argument is now Fast=True, which is self-explicit. (Complexity of the changes made separate commits impossible).
This commit is contained in:
		
							
								
								
									
										45
									
								
								morss.py
									
									
									
									
									
								
							
							
						
						
									
										45
									
								
								morss.py
									
									
									
									
									
								
							@@ -22,9 +22,11 @@ import urlparse
 | 
			
		||||
 | 
			
		||||
from readability import readability
 | 
			
		||||
 | 
			
		||||
MAX = 70
 | 
			
		||||
DELAY=10
 | 
			
		||||
TIMEOUT = 2
 | 
			
		||||
LIM_ITEM = 100	# deletes what's beyond
 | 
			
		||||
MAX_ITEM = 50	# cache-only beyond
 | 
			
		||||
MAX_TIME = 7	# cache-only after
 | 
			
		||||
DELAY = 10	# xml cache
 | 
			
		||||
TIMEOUT = 2	# http timeout
 | 
			
		||||
 | 
			
		||||
OPTIONS = ['progress', 'cache']
 | 
			
		||||
 | 
			
		||||
@@ -317,7 +319,7 @@ def EncDownload(url):
 | 
			
		||||
 | 
			
		||||
	return (data, enc, con.geturl())
 | 
			
		||||
 | 
			
		||||
def Fill(rss, cache, feedurl="/", mode='feed'):
 | 
			
		||||
def Fill(rss, cache, feedurl="/", fast=False):
 | 
			
		||||
	""" Returns True when it has done its best """
 | 
			
		||||
 | 
			
		||||
	item = XMLMap(rss, ITEM_MAP, True)
 | 
			
		||||
@@ -368,7 +370,8 @@ def Fill(rss, cache, feedurl="/", mode='feed'):
 | 
			
		||||
			return True
 | 
			
		||||
 | 
			
		||||
	# super-fast mode
 | 
			
		||||
	if mode == 'cache':
 | 
			
		||||
	if fast:
 | 
			
		||||
		log('skipped')
 | 
			
		||||
		return False
 | 
			
		||||
 | 
			
		||||
	# download
 | 
			
		||||
@@ -413,16 +416,27 @@ def Gather(url, cachePath, mode='feed'):
 | 
			
		||||
	rss = lxml.objectify.fromstring(xml)
 | 
			
		||||
	root = rss.channel if hasattr(rss, 'channel') else rss
 | 
			
		||||
	root = XMLMap(root, RSS_MAP)
 | 
			
		||||
	size = len(root.item)
 | 
			
		||||
 | 
			
		||||
	# set
 | 
			
		||||
	if MAX:
 | 
			
		||||
		for item in root.item[MAX:]:
 | 
			
		||||
			item.getparent().remove(item)
 | 
			
		||||
	for i,item in enumerate(root.item):
 | 
			
		||||
	startTime = time.time()
 | 
			
		||||
	for i, item in enumerate(root.item):
 | 
			
		||||
		if mode == 'progress':
 | 
			
		||||
			print "%s/%s" % (i+1, len(root.item))
 | 
			
		||||
			if MAX_ITEM == 0:
 | 
			
		||||
				print "%s/%s" % (i+1, size)
 | 
			
		||||
			else:
 | 
			
		||||
				print "%s/%s" % (i+1, min(MAX_ITEM, size))
 | 
			
		||||
			sys.stdout.flush()
 | 
			
		||||
		Fill(item, cache, mode)
 | 
			
		||||
 | 
			
		||||
		if i+1 > LIM_ITEM > 0:
 | 
			
		||||
			item.getparent().remove(item)
 | 
			
		||||
		elif time.time() - startTime > MAX_TIME >= 0 or i+1 > MAX_ITEM > 0:
 | 
			
		||||
			if Fill(item, cache, url, True) is False:
 | 
			
		||||
				item.getparent().remove(item)
 | 
			
		||||
		else:
 | 
			
		||||
			Fill(item, cache, url)
 | 
			
		||||
 | 
			
		||||
	log(len(root.item))
 | 
			
		||||
 | 
			
		||||
	return root.tostring(xml_declaration=True, encoding='UTF-8')
 | 
			
		||||
 | 
			
		||||
@@ -439,7 +453,6 @@ if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
		cache = os.getcwd() + '/cache'
 | 
			
		||||
		log(url)
 | 
			
		||||
		RSS = Gather(url, cache, options)
 | 
			
		||||
	else:
 | 
			
		||||
		url, options = parseOptions(OPTIONS)
 | 
			
		||||
 | 
			
		||||
@@ -448,7 +461,13 @@ if __name__ == "__main__":
 | 
			
		||||
			sys.exit(1)
 | 
			
		||||
 | 
			
		||||
		cache =	os.path.expanduser('~') + '/.cache/morss'
 | 
			
		||||
		RSS = Gather(url, cache, options)
 | 
			
		||||
 | 
			
		||||
	if options == 'progress':
 | 
			
		||||
		MAX_TIME = -1
 | 
			
		||||
	if options == 'cache':
 | 
			
		||||
		MAX_TIME = 0
 | 
			
		||||
 | 
			
		||||
	RSS = Gather(url, cache, options)
 | 
			
		||||
 | 
			
		||||
	if RSS is not False and options != 'progress':
 | 
			
		||||
		if 'REQUEST_URI' in os.environ or not os.getenv('DEBUG', False):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user