wsgi: limit supported mimetypes & return actual mimetype
	
		
			
	
		
	
	
		
	
		
			All checks were successful
		
		
	
	
		
			
				
	
				continuous-integration/drone/push Build is passing
				
			
		
		
	
	
				
					
				
			
		
			All checks were successful
		
		
	
	continuous-integration/drone/push Build is passing
				
			This commit is contained in:
		@@ -59,7 +59,9 @@ except NameError:
 | 
				
			|||||||
MIMETYPE = {
 | 
					MIMETYPE = {
 | 
				
			||||||
    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml', 'application/xhtml+xml'],
 | 
					    'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml', 'application/xhtml+xml'],
 | 
				
			||||||
    'rss': ['application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
 | 
					    'rss': ['application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
 | 
				
			||||||
    'html': ['text/html', 'application/xhtml+xml', 'application/xml']}
 | 
					    'html': ['text/html', 'application/xhtml+xml', 'application/xml'],
 | 
				
			||||||
 | 
					    'json': ['application/json'],
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEFAULT_UAS = [
 | 
					DEFAULT_UAS = [
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -192,6 +192,7 @@ def cgi_get(environ, start_response):
 | 
				
			|||||||
    url, options = cgi_parse_environ(environ)
 | 
					    url, options = cgi_parse_environ(environ)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # get page
 | 
					    # get page
 | 
				
			||||||
 | 
					    if options['get'] in ('page', 'article'):
 | 
				
			||||||
        req = crawler.adv_get(url=url, timeout=TIMEOUT)
 | 
					        req = crawler.adv_get(url=url, timeout=TIMEOUT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if req['contenttype'] in crawler.MIMETYPE['html']:
 | 
					        if req['contenttype'] in crawler.MIMETYPE['html']:
 | 
				
			||||||
@@ -207,17 +208,20 @@ def cgi_get(environ, start_response):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
 | 
					                output = lxml.etree.tostring(html.getroottree(), encoding='utf-8', method='html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        elif options['get'] == 'article':
 | 
					            else: # i.e. options['get'] == 'article'
 | 
				
			||||||
                output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
 | 
					                output = readabilite.get_article(req['data'], url=req['url'], encoding_in=req['encoding'], encoding_out='utf-8', debug=options.debug)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        elif req['contenttype'] in crawler.MIMETYPE['xml'] + crawler.MIMETYPE['rss'] + crawler.MIMETYPE['json']:
 | 
				
			||||||
 | 
					            output = req['data']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            raise MorssException('unsupported mimetype')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        raise MorssException('no :get option passed')
 | 
					        raise MorssException('no :get option passed')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        output = req['data']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # return html page
 | 
					    # return html page
 | 
				
			||||||
    headers = {'status': '200 OK', 'content-type': 'text/html; charset=utf-8', 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse
 | 
					    headers = {'status': '200 OK', 'content-type': req['contenttype'], 'X-Frame-Options': 'SAMEORIGIN'} # SAMEORIGIN to avoid potential abuse
 | 
				
			||||||
    start_response(headers['status'], list(headers.items()))
 | 
					    start_response(headers['status'], list(headers.items()))
 | 
				
			||||||
    return [output]
 | 
					    return [output]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user