Compare commits

..

10 Commits

Author SHA1 Message Date
pictuga 7c3091d64c morss: code spacing
One of those commits that make me feel useful
2020-03-21 23:41:46 +01:00
pictuga 37b4e144a9 morss: small fixes
Includes dropping off ftp support
2020-03-21 23:30:18 +01:00
pictuga bd4b7b5bb2 morss: convert HTML feeds to XML ones for completeness 2020-03-21 23:27:42 +01:00
pictuga 68d920d4b5 morss: make FeedFormat more flexible with encoding 2020-03-21 23:26:35 +01:00
pictuga 758ff404a8 morss: fix cgi_app silent output
*Must* return sth
2020-03-21 23:25:25 +01:00
pictuga 463530f02c morss: middleware to enforce encoding
bytes are always expected
2020-03-21 23:23:50 +01:00
pictuga ec0a28a91d morss: use middleware for wsgi apps 2020-03-21 23:23:21 +01:00
pictuga 421acb439d morss: make errors more readable over http 2020-03-21 23:08:29 +01:00
pictuga 42c5d09ccb morss: split "options" var into "raw_options" & "options"
To make it clearer who-is-what
2020-03-21 23:07:07 +01:00
pictuga 056de12484 morss: add sheet.xsl to file handled by http server 2020-03-21 23:06:28 +01:00
1 changed files with 97 additions and 27 deletions

View File

@ -18,6 +18,7 @@ from . import readabilite
import wsgiref.simple_server import wsgiref.simple_server
import wsgiref.handlers import wsgiref.handlers
import cgitb
try: try:
@ -44,7 +45,7 @@ THREADS = 10 # number of threads (1 for single-threaded)
DEBUG = False DEBUG = False
PORT = 8080 PORT = 8080
PROTOCOL = ['http', 'https', 'ftp'] PROTOCOL = ['http', 'https']
def filterOptions(options): def filterOptions(options):
@ -66,6 +67,7 @@ def log(txt, force=False):
if DEBUG or force: if DEBUG or force:
if 'REQUEST_URI' in os.environ: if 'REQUEST_URI' in os.environ:
open('morss.log', 'a').write("%s\n" % repr(txt)) open('morss.log', 'a').write("%s\n" % repr(txt))
else: else:
print(repr(txt)) print(repr(txt))
@ -73,6 +75,7 @@ def log(txt, force=False):
def len_html(txt): def len_html(txt):
if len(txt): if len(txt):
return len(lxml.html.fromstring(txt).text_content()) return len(lxml.html.fromstring(txt).text_content())
else: else:
return 0 return 0
@ -80,6 +83,7 @@ def len_html(txt):
def count_words(txt): def count_words(txt):
if len(txt): if len(txt):
return len(lxml.html.fromstring(txt).text_content().split()) return len(lxml.html.fromstring(txt).text_content().split())
return 0 return 0
@ -88,12 +92,14 @@ class Options:
if len(args): if len(args):
self.options = args self.options = args
self.options.update(options or {}) self.options.update(options or {})
else: else:
self.options = options or {} self.options = options or {}
def __getattr__(self, key): def __getattr__(self, key):
if key in self.options: if key in self.options:
return self.options[key] return self.options[key]
else: else:
return False return False
@ -107,17 +113,23 @@ class Options:
def parseOptions(options): def parseOptions(options):
""" Turns ['md=True'] into {'md':True} """ """ Turns ['md=True'] into {'md':True} """
out = {} out = {}
for option in options: for option in options:
split = option.split('=', 1) split = option.split('=', 1)
if len(split) > 1: if len(split) > 1:
if split[0].lower() == 'true': if split[0].lower() == 'true':
out[split[0]] = True out[split[0]] = True
elif split[0].lower() == 'false': elif split[0].lower() == 'false':
out[split[0]] = False out[split[0]] = False
else: else:
out[split[0]] = split[1] out[split[0]] = split[1]
else: else:
out[split[0]] = True out[split[0]] = True
return out return out
@ -208,6 +220,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
if len(match): if len(match):
link = match[0] link = match[0]
log(link) log(link)
else: else:
link = None link = None
@ -217,6 +230,7 @@ def ItemFill(item, options, feedurl='/', fast=False):
if len(match) and urlparse(match[0]).netloc != 'www.facebook.com': if len(match) and urlparse(match[0]).netloc != 'www.facebook.com':
link = match[0] link = match[0]
log(link) log(link)
else: else:
link = None link = None
@ -299,6 +313,7 @@ def UrlFix(url):
return url return url
def FeedFetch(url, options): def FeedFetch(url, options):
# allow for code execution for feedify # allow for code execution for feedify
pre = feedify.pre_worker(url) pre = feedify.pre_worker(url)
@ -327,7 +342,6 @@ def FeedFetch(url, options):
if options.items: if options.items:
# using custom rules # using custom rules
rss = feeds.FeedHTML(xml) rss = feeds.FeedHTML(xml)
feed.rule
rss.rules['items'] = options.items rss.rules['items'] = options.items
@ -340,6 +354,8 @@ def FeedFetch(url, options):
if options.item_time: if options.item_time:
rss.rules['item_time'] = options.item_time rss.rules['item_time'] = options.item_time
rss = rss.convert(feeds.FeedXML)
else: else:
try: try:
rss = feeds.parse(xml, url, contenttype) rss = feeds.parse(xml, url, contenttype)
@ -377,6 +393,7 @@ def FeedGather(rss, url, options):
value = queue.get() value = queue.get()
try: try:
worker(*value) worker(*value)
except Exception as e: except Exception as e:
log('Thread Error: %s' % e.message) log('Thread Error: %s' % e.message)
queue.task_done() queue.task_done()
@ -416,6 +433,7 @@ def FeedGather(rss, url, options):
for i, item in enumerate(list(rss.items)): for i, item in enumerate(list(rss.items)):
if threads == 1: if threads == 1:
worker(*[i, item]) worker(*[i, item])
else: else:
queue.put([i, item]) queue.put([i, item])
@ -435,37 +453,38 @@ def FeedGather(rss, url, options):
return rss return rss
def FeedFormat(rss, options): def FeedFormat(rss, options, encoding='utf-8'):
if options.callback: if options.callback:
if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None: if re.match(r'^[a-zA-Z0-9\.]+$', options.callback) is not None:
return '%s(%s)' % (options.callback, rss.tojson()) out = '%s(%s)' % (options.callback, rss.tojson(encoding='unicode'))
return out if encoding == 'unicode' else out.encode(encoding)
else: else:
raise MorssException('Invalid callback var name') raise MorssException('Invalid callback var name')
elif options.json: elif options.json:
if options.indent: if options.indent:
return rss.tojson(encoding='UTF-8', indent=4) return rss.tojson(encoding=encoding, indent=4)
else: else:
return rss.tojson(encoding='UTF-8') return rss.tojson(encoding=encoding)
elif options.csv: elif options.csv:
return rss.tocsv(encoding='UTF-8') return rss.tocsv(encoding=encoding)
elif options.reader: elif options.reader:
if options.indent: if options.indent:
return rss.tohtml(encoding='UTF-8', pretty_print=True) return rss.tohtml(encoding=encoding, pretty_print=True)
else: else:
return rss.tohtml(encoding='UTF-8') return rss.tohtml(encoding=encoding)
else: else:
if options.indent: if options.indent:
return rss.torss(xml_declaration=True, encoding='UTF-8', pretty_print=True) return rss.torss(xml_declaration=True, encoding=encoding, pretty_print=True)
else: else:
return rss.torss(xml_declaration=True, encoding='UTF-8') return rss.torss(xml_declaration=True, encoding=encoding)
def process(url, cache=None, options=None): def process(url, cache=None, options=None):
@ -499,7 +518,7 @@ def cgi_app(environ, start_response):
if url.startswith(':'): if url.startswith(':'):
split = url.split('/', 1) split = url.split('/', 1)
options = split[0].replace('|', '/').replace('\\\'', '\'').split(':')[1:] raw_options = split[0].replace('|', '/').replace('\\\'', '\'').split(':')[1:]
if len(split) > 1: if len(split) > 1:
url = split[1] url = split[1]
@ -507,10 +526,10 @@ def cgi_app(environ, start_response):
url = '' url = ''
else: else:
options = [] raw_options = []
# init # init
options = Options(filterOptions(parseOptions(options))) options = Options(filterOptions(parseOptions(raw_options)))
headers = {} headers = {}
global DEBUG global DEBUG
@ -551,18 +570,42 @@ def cgi_app(environ, start_response):
rss = FeedGather(rss, url, options) rss = FeedGather(rss, url, options)
out = FeedFormat(rss, options) out = FeedFormat(rss, options)
if not options.silent: if options.silent:
return out return ['']
else:
return [out]
def cgi_wrapper(environ, start_response): def middleware(func):
# simple http server for html and css " Decorator to turn a function into a wsgi middleware "
# This is called when parsing the code
def app_builder(app):
# This is called when doing app = cgi_wrapper(app)
def app_wrap(environ, start_response):
# This is called when a http request is being processed
return func(environ, start_response, app)
return app_wrap
return app_builder
@middleware
def cgi_file_handler(environ, start_response, app):
" Simple HTTP server to serve static files (.html, .css, etc.) "
files = { files = {
'': 'text/html', '': 'text/html',
'index.html': 'text/html'} 'index.html': 'text/html',
'sheet.xsl': 'text/xsl'}
if 'REQUEST_URI' in environ: if 'REQUEST_URI' in environ:
url = environ['REQUEST_URI'][1:] url = environ['REQUEST_URI'][1:]
else: else:
url = environ['PATH_INFO'][1:] url = environ['PATH_INFO'][1:]
@ -591,16 +634,29 @@ def cgi_wrapper(environ, start_response):
start_response(headers['status'], list(headers.items())) start_response(headers['status'], list(headers.items()))
return ['Error %s' % headers['status']] return ['Error %s' % headers['status']]
# actual morss use else:
return app(environ, start_response)
@middleware
def cgi_error_handler(environ, start_response, app):
try: try:
return [cgi_app(environ, start_response) or '(empty)'] return app(environ, start_response)
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
raise raise
except Exception as e: except Exception as e:
headers = {'status': '500 Oops', 'content-type': 'text/plain'} headers = {'status': '500 Oops', 'content-type': 'text/html'}
start_response(headers['status'], list(headers.items()), sys.exc_info()) start_response(headers['status'], list(headers.items()), sys.exc_info())
log('ERROR <%s>: %s' % (url, e.message), force=True) log('ERROR: %s' % repr(e), force=True)
return ['An error happened:\n%s' % e.message] return [cgitb.html(sys.exc_info())]
@middleware
def cgi_encode(environ, start_response, app):
out = app(environ, start_response)
return [x if isinstance(x, bytes) else x.encode('utf-8') for x in out]
def cli_app(): def cli_app():
@ -627,6 +683,7 @@ def isInt(string):
try: try:
int(string) int(string)
return True return True
except ValueError: except ValueError:
return False return False
@ -634,7 +691,12 @@ def isInt(string):
def main(): def main():
if 'REQUEST_URI' in os.environ: if 'REQUEST_URI' in os.environ:
# mod_cgi # mod_cgi
wsgiref.handlers.CGIHandler().run(cgi_wrapper)
app = cgi_app
app = cgi_error_handler(app)
app = cgi_encode(app)
wsgiref.handlers.CGIHandler().run(app)
elif len(sys.argv) <= 1 or isInt(sys.argv[1]) or '--root' in sys.argv[1:]: elif len(sys.argv) <= 1 or isInt(sys.argv[1]) or '--root' in sys.argv[1:]:
# start internal (basic) http server # start internal (basic) http server
@ -643,22 +705,30 @@ def main():
argPort = int(sys.argv[1]) argPort = int(sys.argv[1])
if argPort > 0: if argPort > 0:
port = argPort port = argPort
else: else:
raise MorssException('Port must be positive integer') raise MorssException('Port must be positive integer')
else: else:
port = PORT port = PORT
app = cgi_app
app = cgi_file_handler(app)
app = cgi_error_handler(app)
app = cgi_encode(app)
print('Serving http://localhost:%s/' % port) print('Serving http://localhost:%s/' % port)
httpd = wsgiref.simple_server.make_server('', port, cgi_wrapper) httpd = wsgiref.simple_server.make_server('', port, app)
httpd.serve_forever() httpd.serve_forever()
else: else:
# as a CLI app # as a CLI app
try: try:
cli_app() cli_app()
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
raise raise
except Exception as e: except Exception as e:
print('ERROR: %s' % e.message) print('ERROR: %s' % e.message)