More flexible parameters can be passed
Multiple parameters can now be passed. HTTP "API" has been improved, and url now have to be like "http://<path to morss>/:<param1>:<param2>/<url>". The code handling the parameters parsing is now way cleaner. Debug toggle is now a var, which can be changed with parameters. Also http logging is no longer done into a file, which tended to grow way too fast, while lacking an "error 403 protection", but instead the parameter ":debug" can be passed in the url, and the page will be delivered as "text/plain" with the debug written into it. Therefore some logging had to be moved around, so as not to output anything during http headers definition.master
parent
c25aec7107
commit
04840d9843
67
morss.py
67
morss.py
|
@ -30,7 +30,7 @@ MAX_TIME = 7 # cache-only after (in sec)
|
||||||
DELAY = 10*60 # xml cache (in sec)
|
DELAY = 10*60 # xml cache (in sec)
|
||||||
TIMEOUT = 2 # http timeout (in sec)
|
TIMEOUT = 2 # http timeout (in sec)
|
||||||
|
|
||||||
OPTIONS = ['progress', 'cache']
|
DEBUG = False
|
||||||
|
|
||||||
UA_RSS = 'Liferea/1.8.12 (Linux; fr_FR.utf8; http://liferea.sf.net/)'
|
UA_RSS = 'Liferea/1.8.12 (Linux; fr_FR.utf8; http://liferea.sf.net/)'
|
||||||
UA_HML = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
UA_HML = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
||||||
|
@ -45,12 +45,8 @@ if 'REQUEST_URI' in os.environ:
|
||||||
cgitb.enable()
|
cgitb.enable()
|
||||||
|
|
||||||
def log(txt):
|
def log(txt):
|
||||||
if not 'REQUEST_URI' in os.environ:
|
if DEBUG:
|
||||||
if os.getenv('DEBUG', False):
|
|
||||||
print repr(txt)
|
print repr(txt)
|
||||||
else:
|
|
||||||
with open('morss.log', 'a') as file:
|
|
||||||
file.write(repr(txt).encode('utf-8') + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
def lenHTML(txt):
|
def lenHTML(txt):
|
||||||
|
@ -81,30 +77,25 @@ def setContent(item, txt):
|
||||||
else:
|
else:
|
||||||
item.content = txt
|
item.content = txt
|
||||||
|
|
||||||
def parseOptions(available):
|
def parseOptions():
|
||||||
options = None
|
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
if 'REDIRECT_URL' in os.environ:
|
|
||||||
url = os.environ['REQUEST_URI'][1:]
|
url = os.environ['REQUEST_URI'][1:]
|
||||||
else:
|
|
||||||
url = os.environ['REQUEST_URI'][len(os.environ['SCRIPT_NAME'])+1:]
|
if 'REDIRECT_URL' not in os.environ:
|
||||||
|
url = url[len(os.environ['SCRIPT_NAME']):]
|
||||||
|
|
||||||
|
if url.startswith(':'):
|
||||||
|
options = url.split('/')[0].split(':')[1:]
|
||||||
|
url = url.split('/', 1)[1]
|
||||||
|
|
||||||
if urlparse.urlparse(url).scheme not in PROTOCOL:
|
if urlparse.urlparse(url).scheme not in PROTOCOL:
|
||||||
split = url.split('/', 1)
|
|
||||||
if len(split) and split[0] in available:
|
|
||||||
options = split[0]
|
|
||||||
url = split[1]
|
|
||||||
url = 'http://' + url
|
url = 'http://' + url
|
||||||
|
else:
|
||||||
|
if len(sys.argv) <= 1:
|
||||||
|
return (None, [])
|
||||||
|
|
||||||
else:
|
options = sys.argv[1:-1]
|
||||||
if len(sys.argv) == 3:
|
url = sys.argv[-1]
|
||||||
if sys.argv[1] in available:
|
|
||||||
options = sys.argv[1]
|
|
||||||
url = sys.argv[2]
|
|
||||||
elif len(sys.argv) == 2:
|
|
||||||
url = sys.argv[1]
|
|
||||||
else:
|
|
||||||
return (None, None)
|
|
||||||
|
|
||||||
if urlparse.urlparse(url).scheme not in PROTOCOL:
|
if urlparse.urlparse(url).scheme not in PROTOCOL:
|
||||||
url = 'http://' + url
|
url = 'http://' + url
|
||||||
|
@ -369,7 +360,7 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def Gather(url, cachePath, mode='feed'):
|
def Gather(url, cachePath, progress=False):
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
cache = Cache(cachePath, url)
|
cache = Cache(cachePath, url)
|
||||||
|
|
||||||
|
@ -394,7 +385,7 @@ def Gather(url, cachePath, mode='feed'):
|
||||||
# set
|
# set
|
||||||
startTime = time.time()
|
startTime = time.time()
|
||||||
for i, item in enumerate(rss.items):
|
for i, item in enumerate(rss.items):
|
||||||
if mode == 'progress':
|
if progress:
|
||||||
if MAX_ITEM == 0:
|
if MAX_ITEM == 0:
|
||||||
print '%s/%s' % (i+1, size)
|
print '%s/%s' % (i+1, size)
|
||||||
else:
|
else:
|
||||||
|
@ -414,22 +405,24 @@ def Gather(url, cachePath, mode='feed'):
|
||||||
return rss.tostring(xml_declaration=True, encoding='UTF-8')
|
return rss.tostring(xml_declaration=True, encoding='UTF-8')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
url, options = parseOptions(OPTIONS)
|
url, options = parseOptions()
|
||||||
log(url)
|
DEBUG = 'debug' in options
|
||||||
|
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
if 'HTTP_IF_NONE_MATCH' in os.environ:
|
if 'HTTP_IF_NONE_MATCH' in os.environ:
|
||||||
log('etag sent')
|
|
||||||
if time.time() - int(os.environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
if time.time() - int(os.environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
||||||
log('etag good')
|
|
||||||
print 'Status: 304'
|
print 'Status: 304'
|
||||||
print
|
print
|
||||||
|
log(url)
|
||||||
|
log('etag good')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
print 'Status: 200'
|
print 'Status: 200'
|
||||||
print 'ETag: "%s"' % int(time.time())
|
print 'ETag: "%s"' % int(time.time())
|
||||||
|
|
||||||
if options == 'progress':
|
if 'debug' in options:
|
||||||
|
print 'Content-Type: text/plain'
|
||||||
|
elif 'progress' in options:
|
||||||
print 'Content-Type: application/octet-stream'
|
print 'Content-Type: application/octet-stream'
|
||||||
else:
|
else:
|
||||||
print 'Content-Type: text/xml'
|
print 'Content-Type: text/xml'
|
||||||
|
@ -439,22 +432,22 @@ if __name__ == '__main__':
|
||||||
else:
|
else:
|
||||||
cache = os.path.expanduser('~') + '/.cache/morss'
|
cache = os.path.expanduser('~') + '/.cache/morss'
|
||||||
|
|
||||||
|
log(url)
|
||||||
if url is None:
|
if url is None:
|
||||||
print 'Please provide url.'
|
print 'Please provide url.'
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if options == 'progress':
|
if 'progress' in options:
|
||||||
MAX_TIME = -1
|
MAX_TIME = -1
|
||||||
if options == 'cache':
|
if 'cache' in options:
|
||||||
MAX_TIME = 0
|
MAX_TIME = 0
|
||||||
|
|
||||||
RSS = Gather(url, cache, options)
|
RSS = Gather(url, cache, 'progress' in options)
|
||||||
|
|
||||||
if RSS is not False and options != 'progress':
|
if RSS is not False and 'progress' not in options and not DEBUG:
|
||||||
if 'REQUEST_URI' in os.environ or not os.getenv('DEBUG', False):
|
|
||||||
print RSS
|
print RSS
|
||||||
|
|
||||||
if RSS is False and options != 'progress':
|
if RSS is False and 'progress' not in options:
|
||||||
print 'Error fetching feed.'
|
print 'Error fetching feed.'
|
||||||
|
|
||||||
log('done')
|
log('done')
|
||||||
|
|
Loading…
Reference in New Issue