split morss.py into __main__/cgi/cli.py
Should hopefully allow cleaner code in the future
This commit is contained in:
		
							
								
								
									
										3
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								main.py
									
									
									
									
									
								
							@@ -1,6 +1,7 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
from morss import main, cgi_standalone_app as application
 | 
			
		||||
from morss.__main__ import main
 | 
			
		||||
from morss.cgi import application
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,3 @@
 | 
			
		||||
# ran on `import morss`
 | 
			
		||||
from .morss import *
 | 
			
		||||
from .cgi import application
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,74 @@
 | 
			
		||||
# ran on `python -m morss`
 | 
			
		||||
from .morss import main
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
from . import cgi
 | 
			
		||||
from . import cli
 | 
			
		||||
 | 
			
		||||
from .morss import MorssException
 | 
			
		||||
 | 
			
		||||
import wsgiref.simple_server
 | 
			
		||||
import wsgiref.handlers
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PORT = 8080
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def isInt(string):
 | 
			
		||||
    try:
 | 
			
		||||
        int(string)
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    except ValueError:
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    if 'REQUEST_URI' in os.environ:
 | 
			
		||||
        # mod_cgi
 | 
			
		||||
 | 
			
		||||
        app = cgi.cgi_app
 | 
			
		||||
        app = cgi.cgi_dispatcher(app)
 | 
			
		||||
        app = cgi.cgi_error_handler(app)
 | 
			
		||||
        app = cgi.cgi_encode(app)
 | 
			
		||||
 | 
			
		||||
        wsgiref.handlers.CGIHandler().run(app)
 | 
			
		||||
 | 
			
		||||
    elif len(sys.argv) <= 1 or isInt(sys.argv[1]):
 | 
			
		||||
        # start internal (basic) http server
 | 
			
		||||
 | 
			
		||||
        if len(sys.argv) > 1 and isInt(sys.argv[1]):
 | 
			
		||||
            argPort = int(sys.argv[1])
 | 
			
		||||
            if argPort > 0:
 | 
			
		||||
                port = argPort
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                raise MorssException('Port must be positive integer')
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            port = PORT
 | 
			
		||||
 | 
			
		||||
        app = cgi.cgi_app
 | 
			
		||||
        app = cgi.cgi_file_handler(app)
 | 
			
		||||
        app = cgi.cgi_dispatcher(app)
 | 
			
		||||
        app = cgi.cgi_error_handler(app)
 | 
			
		||||
        app = cgi.cgi_encode(app)
 | 
			
		||||
 | 
			
		||||
        print('Serving http://localhost:%s/' % port)
 | 
			
		||||
        httpd = wsgiref.simple_server.make_server('', port, app)
 | 
			
		||||
        httpd.serve_forever()
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        # as a CLI app
 | 
			
		||||
        try:
 | 
			
		||||
            cli.cli_app()
 | 
			
		||||
 | 
			
		||||
        except (KeyboardInterrupt, SystemExit):
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print('ERROR: %s' % e.message)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										242
									
								
								morss/cgi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								morss/cgi.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,242 @@
 | 
			
		||||
import sys
 | 
			
		||||
import os.path
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
import cgitb
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    # python 2
 | 
			
		||||
    from urllib import unquote
 | 
			
		||||
except ImportError:
 | 
			
		||||
    # python 3
 | 
			
		||||
    from urllib.parse import unquote
 | 
			
		||||
 | 
			
		||||
from . import crawler
 | 
			
		||||
from . import readabilite
 | 
			
		||||
from .morss import FeedFetch, FeedGather, FeedFormat
 | 
			
		||||
from .morss import Options, filterOptions, parseOptions
 | 
			
		||||
from .morss import log, DELAY, DEBUG, MorssException
 | 
			
		||||
 | 
			
		||||
from . import cred
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_parse_environ(environ):
 | 
			
		||||
    # get options
 | 
			
		||||
 | 
			
		||||
    if 'REQUEST_URI' in environ:
 | 
			
		||||
        url = environ['REQUEST_URI'][1:]
 | 
			
		||||
    else:
 | 
			
		||||
        url = environ['PATH_INFO'][1:]
 | 
			
		||||
 | 
			
		||||
        if environ['QUERY_STRING']:
 | 
			
		||||
            url += '?' + environ['QUERY_STRING']
 | 
			
		||||
 | 
			
		||||
    url = re.sub(r'^/?(cgi/)?(morss.py|main.py)/', '', url)
 | 
			
		||||
 | 
			
		||||
    if url.startswith(':'):
 | 
			
		||||
        split = url.split('/', 1)
 | 
			
		||||
 | 
			
		||||
        raw_options = unquote(split[0]).replace('|', '/').replace('\\\'', '\'').split(':')[1:]
 | 
			
		||||
 | 
			
		||||
        if len(split) > 1:
 | 
			
		||||
            url = split[1]
 | 
			
		||||
        else:
 | 
			
		||||
            url = ''
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        raw_options = []
 | 
			
		||||
 | 
			
		||||
    # init
 | 
			
		||||
    options = Options(filterOptions(parseOptions(raw_options)))
 | 
			
		||||
 | 
			
		||||
    global DEBUG
 | 
			
		||||
    DEBUG = options.debug
 | 
			
		||||
 | 
			
		||||
    return (url, options)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_app(environ, start_response):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    headers = {}
 | 
			
		||||
 | 
			
		||||
    # headers
 | 
			
		||||
    headers['status'] = '200 OK'
 | 
			
		||||
    headers['cache-control'] = 'max-age=%s' % DELAY
 | 
			
		||||
    headers['x-content-type-options'] = 'nosniff' # safari work around
 | 
			
		||||
 | 
			
		||||
    if options.cors:
 | 
			
		||||
        headers['access-control-allow-origin'] = '*'
 | 
			
		||||
 | 
			
		||||
    if options.html:
 | 
			
		||||
        headers['content-type'] = 'text/html'
 | 
			
		||||
    elif options.txt or options.silent:
 | 
			
		||||
        headers['content-type'] = 'text/plain'
 | 
			
		||||
    elif options.json:
 | 
			
		||||
        headers['content-type'] = 'application/json'
 | 
			
		||||
    elif options.callback:
 | 
			
		||||
        headers['content-type'] = 'application/javascript'
 | 
			
		||||
    elif options.csv:
 | 
			
		||||
        headers['content-type'] = 'text/csv'
 | 
			
		||||
        headers['content-disposition'] = 'attachment; filename="feed.csv"'
 | 
			
		||||
    else:
 | 
			
		||||
        headers['content-type'] = 'text/xml'
 | 
			
		||||
 | 
			
		||||
    headers['content-type'] += '; charset=utf-8'
 | 
			
		||||
 | 
			
		||||
    crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db'))
 | 
			
		||||
 | 
			
		||||
    # get the work done
 | 
			
		||||
    url, rss = FeedFetch(url, options)
 | 
			
		||||
 | 
			
		||||
    start_response(headers['status'], list(headers.items()))
 | 
			
		||||
 | 
			
		||||
    rss = FeedGather(rss, url, options)
 | 
			
		||||
    out = FeedFormat(rss, options)
 | 
			
		||||
 | 
			
		||||
    if options.silent:
 | 
			
		||||
        return ['']
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        return [out]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def middleware(func):
 | 
			
		||||
    " Decorator to turn a function into a wsgi middleware "
 | 
			
		||||
    # This is called when parsing the "@middleware" code
 | 
			
		||||
 | 
			
		||||
    def app_builder(app):
 | 
			
		||||
        # This is called when doing app = cgi_wrapper(app)
 | 
			
		||||
 | 
			
		||||
        def app_wrap(environ, start_response):
 | 
			
		||||
            # This is called when a http request is being processed
 | 
			
		||||
 | 
			
		||||
            return func(environ, start_response, app)
 | 
			
		||||
 | 
			
		||||
        return app_wrap
 | 
			
		||||
 | 
			
		||||
    return app_builder
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_file_handler(environ, start_response, app):
 | 
			
		||||
    " Simple HTTP server to serve static files (.html, .css, etc.) "
 | 
			
		||||
 | 
			
		||||
    files = {
 | 
			
		||||
        '': 'text/html',
 | 
			
		||||
        'index.html': 'text/html',
 | 
			
		||||
        'sheet.xsl': 'text/xsl'}
 | 
			
		||||
 | 
			
		||||
    if 'REQUEST_URI' in environ:
 | 
			
		||||
        url = environ['REQUEST_URI'][1:]
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        url = environ['PATH_INFO'][1:]
 | 
			
		||||
 | 
			
		||||
    if url in files:
 | 
			
		||||
        headers = {}
 | 
			
		||||
 | 
			
		||||
        if url == '':
 | 
			
		||||
            url = 'index.html'
 | 
			
		||||
 | 
			
		||||
        paths = [os.path.join(sys.prefix, 'share/morss/www', url),
 | 
			
		||||
            os.path.join(os.path.dirname(__file__), '../www', url)]
 | 
			
		||||
 | 
			
		||||
        for path in paths:
 | 
			
		||||
            try:
 | 
			
		||||
                body = open(path, 'rb').read()
 | 
			
		||||
 | 
			
		||||
                headers['status'] = '200 OK'
 | 
			
		||||
                headers['content-type'] = files[url]
 | 
			
		||||
                start_response(headers['status'], list(headers.items()))
 | 
			
		||||
                return [body]
 | 
			
		||||
 | 
			
		||||
            except IOError:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            # the for loop did not return, so here we are, i.e. no file found
 | 
			
		||||
            headers['status'] = '404 Not found'
 | 
			
		||||
            start_response(headers['status'], list(headers.items()))
 | 
			
		||||
            return ['Error %s' % headers['status']]
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_get(environ, start_response):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    # get page
 | 
			
		||||
    req = crawler.adv_get(url=url, timeout=TIMEOUT)
 | 
			
		||||
 | 
			
		||||
    if req['contenttype'] in ['text/html', 'application/xhtml+xml', 'application/xml']:
 | 
			
		||||
        if options.get == 'page':
 | 
			
		||||
            html = readabilite.parse(req['data'], encoding=req['encoding'])
 | 
			
		||||
            html.make_links_absolute(req['url'])
 | 
			
		||||
 | 
			
		||||
            kill_tags = ['script', 'iframe', 'noscript']
 | 
			
		||||
 | 
			
		||||
            for tag in kill_tags:
 | 
			
		||||
                for elem in html.xpath('//'+tag):
 | 
			
		||||
                    elem.getparent().remove(elem)
 | 
			
		||||
 | 
			
		||||
            output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
 | 
			
		||||
 | 
			
		||||
        elif options.get == 'article':
 | 
			
		||||
            output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            raise MorssException('no :get option passed')
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        output = req['data']
 | 
			
		||||
 | 
			
		||||
    # return html page
 | 
			
		||||
    headers = {'status': '200 OK', 'content-type': 'text/html; charset=utf-8', 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse
 | 
			
		||||
    start_response(headers['status'], list(headers.items()))
 | 
			
		||||
    return [output]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
dispatch_table = {
 | 
			
		||||
    'get': cgi_get,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_dispatcher(environ, start_response, app):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    for key in dispatch_table.keys():
 | 
			
		||||
        if key in options:
 | 
			
		||||
            return dispatch_table[key](environ, start_response)
 | 
			
		||||
 | 
			
		||||
    return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_error_handler(environ, start_response, app):
 | 
			
		||||
    try:
 | 
			
		||||
        return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
    except (KeyboardInterrupt, SystemExit):
 | 
			
		||||
        raise
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        headers = {'status': '500 Oops', 'content-type': 'text/html'}
 | 
			
		||||
        start_response(headers['status'], list(headers.items()), sys.exc_info())
 | 
			
		||||
        log('ERROR: %s' % repr(e), force=True)
 | 
			
		||||
        return [cgitb.html(sys.exc_info())]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_encode(environ, start_response, app):
 | 
			
		||||
    out = app(environ, start_response)
 | 
			
		||||
    return [x if isinstance(x, bytes) else str(x).encode('utf-8') for x in out]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
application = cgi_app
 | 
			
		||||
application = cgi_file_handler(application)
 | 
			
		||||
application = cgi_dispatcher(application)
 | 
			
		||||
application = cgi_error_handler(application)
 | 
			
		||||
application = cgi_encode(application)
 | 
			
		||||
							
								
								
									
										26
									
								
								morss/cli.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								morss/cli.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,26 @@
 | 
			
		||||
import sys
 | 
			
		||||
import os.path
 | 
			
		||||
 | 
			
		||||
from . import crawler
 | 
			
		||||
from .morss import FeedFetch, FeedGather, FeedFormat
 | 
			
		||||
from .morss import Options, filterOptions, parseOptions
 | 
			
		||||
from .morss import log, DEBUG
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cli_app():
 | 
			
		||||
    options = Options(filterOptions(parseOptions(sys.argv[1:-1])))
 | 
			
		||||
    url = sys.argv[-1]
 | 
			
		||||
 | 
			
		||||
    global DEBUG
 | 
			
		||||
    DEBUG = options.debug
 | 
			
		||||
 | 
			
		||||
    crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db'))
 | 
			
		||||
 | 
			
		||||
    url, rss = FeedFetch(url, options)
 | 
			
		||||
    rss = FeedGather(rss, url, options)
 | 
			
		||||
    out = FeedFormat(rss, options, 'unicode')
 | 
			
		||||
 | 
			
		||||
    if not options.silent:
 | 
			
		||||
        print(out)
 | 
			
		||||
 | 
			
		||||
    log('done')
 | 
			
		||||
							
								
								
									
										305
									
								
								morss/morss.py
									
									
									
									
									
								
							
							
						
						
									
										305
									
								
								morss/morss.py
									
									
									
									
									
								
							@@ -1,6 +1,4 @@
 | 
			
		||||
import sys
 | 
			
		||||
import os
 | 
			
		||||
import os.path
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
@@ -16,20 +14,14 @@ from . import feeds
 | 
			
		||||
from . import crawler
 | 
			
		||||
from . import readabilite
 | 
			
		||||
 | 
			
		||||
import wsgiref.simple_server
 | 
			
		||||
import wsgiref.handlers
 | 
			
		||||
import cgitb
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    # python 2
 | 
			
		||||
    from httplib import HTTPException
 | 
			
		||||
    from urllib import unquote
 | 
			
		||||
    from urlparse import urlparse, urljoin, parse_qs
 | 
			
		||||
except ImportError:
 | 
			
		||||
    # python 3
 | 
			
		||||
    from http.client import HTTPException
 | 
			
		||||
    from urllib.parse import unquote
 | 
			
		||||
    from urllib.parse import urlparse, urljoin, parse_qs
 | 
			
		||||
 | 
			
		||||
MAX_ITEM = 5  # cache-only beyond
 | 
			
		||||
@@ -42,7 +34,6 @@ DELAY = 10 * 60  # xml cache & ETag cache (in sec)
 | 
			
		||||
TIMEOUT = 4  # http timeout (in sec)
 | 
			
		||||
 | 
			
		||||
DEBUG = False
 | 
			
		||||
PORT = 8080
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def filterOptions(options):
 | 
			
		||||
@@ -437,299 +428,3 @@ def process(url, cache=None, options=None):
 | 
			
		||||
    rss = FeedGather(rss, url, options)
 | 
			
		||||
 | 
			
		||||
    return FeedFormat(rss, options, 'unicode')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_parse_environ(environ):
 | 
			
		||||
    # get options
 | 
			
		||||
 | 
			
		||||
    if 'REQUEST_URI' in environ:
 | 
			
		||||
        url = environ['REQUEST_URI'][1:]
 | 
			
		||||
    else:
 | 
			
		||||
        url = environ['PATH_INFO'][1:]
 | 
			
		||||
 | 
			
		||||
        if environ['QUERY_STRING']:
 | 
			
		||||
            url += '?' + environ['QUERY_STRING']
 | 
			
		||||
 | 
			
		||||
    url = re.sub(r'^/?(cgi/)?(morss.py|main.py)/', '', url)
 | 
			
		||||
 | 
			
		||||
    if url.startswith(':'):
 | 
			
		||||
        split = url.split('/', 1)
 | 
			
		||||
 | 
			
		||||
        raw_options = unquote(split[0]).replace('|', '/').replace('\\\'', '\'').split(':')[1:]
 | 
			
		||||
 | 
			
		||||
        if len(split) > 1:
 | 
			
		||||
            url = split[1]
 | 
			
		||||
        else:
 | 
			
		||||
            url = ''
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        raw_options = []
 | 
			
		||||
 | 
			
		||||
    # init
 | 
			
		||||
    options = Options(filterOptions(parseOptions(raw_options)))
 | 
			
		||||
 | 
			
		||||
    global DEBUG
 | 
			
		||||
    DEBUG = options.debug
 | 
			
		||||
 | 
			
		||||
    return (url, options)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_app(environ, start_response):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    headers = {}
 | 
			
		||||
 | 
			
		||||
    # headers
 | 
			
		||||
    headers['status'] = '200 OK'
 | 
			
		||||
    headers['cache-control'] = 'max-age=%s' % DELAY
 | 
			
		||||
    headers['x-content-type-options'] = 'nosniff' # safari work around
 | 
			
		||||
 | 
			
		||||
    if options.cors:
 | 
			
		||||
        headers['access-control-allow-origin'] = '*'
 | 
			
		||||
 | 
			
		||||
    if options.html:
 | 
			
		||||
        headers['content-type'] = 'text/html'
 | 
			
		||||
    elif options.txt or options.silent:
 | 
			
		||||
        headers['content-type'] = 'text/plain'
 | 
			
		||||
    elif options.json:
 | 
			
		||||
        headers['content-type'] = 'application/json'
 | 
			
		||||
    elif options.callback:
 | 
			
		||||
        headers['content-type'] = 'application/javascript'
 | 
			
		||||
    elif options.csv:
 | 
			
		||||
        headers['content-type'] = 'text/csv'
 | 
			
		||||
        headers['content-disposition'] = 'attachment; filename="feed.csv"'
 | 
			
		||||
    else:
 | 
			
		||||
        headers['content-type'] = 'text/xml'
 | 
			
		||||
 | 
			
		||||
    headers['content-type'] += '; charset=utf-8'
 | 
			
		||||
 | 
			
		||||
    crawler.default_cache = crawler.SQLiteCache(os.path.join(os.getcwd(), 'morss-cache.db'))
 | 
			
		||||
 | 
			
		||||
    # get the work done
 | 
			
		||||
    url, rss = FeedFetch(url, options)
 | 
			
		||||
 | 
			
		||||
    start_response(headers['status'], list(headers.items()))
 | 
			
		||||
 | 
			
		||||
    rss = FeedGather(rss, url, options)
 | 
			
		||||
    out = FeedFormat(rss, options)
 | 
			
		||||
 | 
			
		||||
    if options.silent:
 | 
			
		||||
        return ['']
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        return [out]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def middleware(func):
 | 
			
		||||
    " Decorator to turn a function into a wsgi middleware "
 | 
			
		||||
    # This is called when parsing the "@middleware" code
 | 
			
		||||
 | 
			
		||||
    def app_builder(app):
 | 
			
		||||
        # This is called when doing app = cgi_wrapper(app)
 | 
			
		||||
 | 
			
		||||
        def app_wrap(environ, start_response):
 | 
			
		||||
            # This is called when a http request is being processed
 | 
			
		||||
 | 
			
		||||
            return func(environ, start_response, app)
 | 
			
		||||
 | 
			
		||||
        return app_wrap
 | 
			
		||||
 | 
			
		||||
    return app_builder
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_file_handler(environ, start_response, app):
 | 
			
		||||
    " Simple HTTP server to serve static files (.html, .css, etc.) "
 | 
			
		||||
 | 
			
		||||
    files = {
 | 
			
		||||
        '': 'text/html',
 | 
			
		||||
        'index.html': 'text/html',
 | 
			
		||||
        'sheet.xsl': 'text/xsl'}
 | 
			
		||||
 | 
			
		||||
    if 'REQUEST_URI' in environ:
 | 
			
		||||
        url = environ['REQUEST_URI'][1:]
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        url = environ['PATH_INFO'][1:]
 | 
			
		||||
 | 
			
		||||
    if url in files:
 | 
			
		||||
        headers = {}
 | 
			
		||||
 | 
			
		||||
        if url == '':
 | 
			
		||||
            url = 'index.html'
 | 
			
		||||
 | 
			
		||||
        paths = [os.path.join(sys.prefix, 'share/morss/www', url),
 | 
			
		||||
            os.path.join(os.path.dirname(__file__), '../www', url)]
 | 
			
		||||
 | 
			
		||||
        for path in paths:
 | 
			
		||||
            try:
 | 
			
		||||
                body = open(path, 'rb').read()
 | 
			
		||||
 | 
			
		||||
                headers['status'] = '200 OK'
 | 
			
		||||
                headers['content-type'] = files[url]
 | 
			
		||||
                start_response(headers['status'], list(headers.items()))
 | 
			
		||||
                return [body]
 | 
			
		||||
 | 
			
		||||
            except IOError:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            # the for loop did not return, so here we are, i.e. no file found
 | 
			
		||||
            headers['status'] = '404 Not found'
 | 
			
		||||
            start_response(headers['status'], list(headers.items()))
 | 
			
		||||
            return ['Error %s' % headers['status']]
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cgi_get(environ, start_response):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    # get page
 | 
			
		||||
    req = crawler.adv_get(url=url, timeout=TIMEOUT)
 | 
			
		||||
 | 
			
		||||
    if req['contenttype'] in ['text/html', 'application/xhtml+xml', 'application/xml']:
 | 
			
		||||
        if options.get == 'page':
 | 
			
		||||
            html = readabilite.parse(req['data'], encoding=req['encoding'])
 | 
			
		||||
            html.make_links_absolute(req['url'])
 | 
			
		||||
 | 
			
		||||
            kill_tags = ['script', 'iframe', 'noscript']
 | 
			
		||||
 | 
			
		||||
            for tag in kill_tags:
 | 
			
		||||
                for elem in html.xpath('//'+tag):
 | 
			
		||||
                    elem.getparent().remove(elem)
 | 
			
		||||
 | 
			
		||||
            output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
 | 
			
		||||
 | 
			
		||||
        elif options.get == 'article':
 | 
			
		||||
            output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            raise MorssException('no :get option passed')
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        output = req['data']
 | 
			
		||||
 | 
			
		||||
    # return html page
 | 
			
		||||
    headers = {'status': '200 OK', 'content-type': 'text/html; charset=utf-8', 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse
 | 
			
		||||
    start_response(headers['status'], list(headers.items()))
 | 
			
		||||
    return [output]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
dispatch_table = {
 | 
			
		||||
    'get': cgi_get,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_dispatcher(environ, start_response, app):
 | 
			
		||||
    url, options = cgi_parse_environ(environ)
 | 
			
		||||
 | 
			
		||||
    for key in dispatch_table.keys():
 | 
			
		||||
        if key in options:
 | 
			
		||||
            return dispatch_table[key](environ, start_response)
 | 
			
		||||
 | 
			
		||||
    return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_error_handler(environ, start_response, app):
 | 
			
		||||
    try:
 | 
			
		||||
        return app(environ, start_response)
 | 
			
		||||
 | 
			
		||||
    except (KeyboardInterrupt, SystemExit):
 | 
			
		||||
        raise
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        headers = {'status': '500 Oops', 'content-type': 'text/html'}
 | 
			
		||||
        start_response(headers['status'], list(headers.items()), sys.exc_info())
 | 
			
		||||
        log('ERROR: %s' % repr(e), force=True)
 | 
			
		||||
        return [cgitb.html(sys.exc_info())]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@middleware
 | 
			
		||||
def cgi_encode(environ, start_response, app):
 | 
			
		||||
    out = app(environ, start_response)
 | 
			
		||||
    return [x if isinstance(x, bytes) else str(x).encode('utf-8') for x in out]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cgi_standalone_app = cgi_encode(cgi_error_handler(cgi_dispatcher(cgi_file_handler(cgi_app))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cli_app():
 | 
			
		||||
    options = Options(filterOptions(parseOptions(sys.argv[1:-1])))
 | 
			
		||||
    url = sys.argv[-1]
 | 
			
		||||
 | 
			
		||||
    global DEBUG
 | 
			
		||||
    DEBUG = options.debug
 | 
			
		||||
 | 
			
		||||
    crawler.default_cache = crawler.SQLiteCache(os.path.expanduser('~/.cache/morss-cache.db'))
 | 
			
		||||
 | 
			
		||||
    url, rss = FeedFetch(url, options)
 | 
			
		||||
    rss = FeedGather(rss, url, options)
 | 
			
		||||
    out = FeedFormat(rss, options, 'unicode')
 | 
			
		||||
 | 
			
		||||
    if not options.silent:
 | 
			
		||||
        print(out)
 | 
			
		||||
 | 
			
		||||
    log('done')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def isInt(string):
 | 
			
		||||
    try:
 | 
			
		||||
        int(string)
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    except ValueError:
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    if 'REQUEST_URI' in os.environ:
 | 
			
		||||
        # mod_cgi
 | 
			
		||||
 | 
			
		||||
        app = cgi_app
 | 
			
		||||
        app = cgi_dispatcher(app)
 | 
			
		||||
        app = cgi_error_handler(app)
 | 
			
		||||
        app = cgi_encode(app)
 | 
			
		||||
 | 
			
		||||
        wsgiref.handlers.CGIHandler().run(app)
 | 
			
		||||
 | 
			
		||||
    elif len(sys.argv) <= 1 or isInt(sys.argv[1]):
 | 
			
		||||
        # start internal (basic) http server
 | 
			
		||||
 | 
			
		||||
        if len(sys.argv) > 1 and isInt(sys.argv[1]):
 | 
			
		||||
            argPort = int(sys.argv[1])
 | 
			
		||||
            if argPort > 0:
 | 
			
		||||
                port = argPort
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                raise MorssException('Port must be positive integer')
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            port = PORT
 | 
			
		||||
 | 
			
		||||
        app = cgi_app
 | 
			
		||||
        app = cgi_file_handler(app)
 | 
			
		||||
        app = cgi_dispatcher(app)
 | 
			
		||||
        app = cgi_error_handler(app)
 | 
			
		||||
        app = cgi_encode(app)
 | 
			
		||||
 | 
			
		||||
        print('Serving http://localhost:%s/' % port)
 | 
			
		||||
        httpd = wsgiref.simple_server.make_server('', port, app)
 | 
			
		||||
        httpd.serve_forever()
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        # as a CLI app
 | 
			
		||||
        try:
 | 
			
		||||
            cli_app()
 | 
			
		||||
 | 
			
		||||
        except (KeyboardInterrupt, SystemExit):
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print('ERROR: %s' % e.message)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user