wsgi: limit supported mimetypes & return actual mimetype
continuous-integration/drone/push Build is passing Details

master
pictuga 2022-01-23 11:44:07 +01:00
parent e81f6b173f
commit 4d6d3c9239
2 changed files with 22 additions and 16 deletions

View File

@ -59,7 +59,9 @@ except NameError:
MIMETYPE = { MIMETYPE = {
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml', 'application/xhtml+xml'], 'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml', 'application/xhtml+xml'],
'rss': ['application/rss+xml', 'application/rdf+xml', 'application/atom+xml'], 'rss': ['application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
'html': ['text/html', 'application/xhtml+xml', 'application/xml']} 'html': ['text/html', 'application/xhtml+xml', 'application/xml'],
'json': ['application/json'],
}
DEFAULT_UAS = [ DEFAULT_UAS = [

View File

@ -192,6 +192,7 @@ def cgi_get(environ, start_response):
url, options = cgi_parse_environ(environ) url, options = cgi_parse_environ(environ)
# get page # get page
if options['get'] in ('page', 'article'):
req = crawler.adv_get(url=url, timeout=TIMEOUT) req = crawler.adv_get(url=url, timeout=TIMEOUT)
if req['contenttype'] in crawler.MIMETYPE['html']: if req['contenttype'] in crawler.MIMETYPE['html']:
@ -207,17 +208,20 @@ def cgi_get(environ, start_response):
output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html') output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
elif options['get'] == 'article': else: # i.e. options['get'] == 'article'
output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug) output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
elif req['contenttype'] in crawler.MIMETYPE['xml'] + crawler.MIMETYPE['rss'] + crawler.MIMETYPE['json']:
output = req['data']
else:
raise MorssException('unsupported mimetype')
else: else:
raise MorssException('no :get option passed') raise MorssException('no :get option passed')
else:
output = req['data']
# return html page # return html page
headers = {'status': '200 OK', 'content-type': 'text/html; charset=utf-8', 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse headers = {'status': '200 OK', 'content-type': req['contenttype'], 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse
start_response(headers['status'], list(headers.items())) start_response(headers['status'], list(headers.items()))
return [output] return [output]