feedify: accept xpath rules passed as parameters
This commit is contained in:
		@@ -114,7 +114,8 @@ def pre_worker(url):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Builder(object):
 | 
			
		||||
    def __init__(self, link, data):
 | 
			
		||||
    def __init__(self, link, data, rule=None):
 | 
			
		||||
 | 
			
		||||
        self.link = link
 | 
			
		||||
        self.data = data
 | 
			
		||||
        self.rule = rule
 | 
			
		||||
@@ -124,7 +125,8 @@ class Builder(object):
 | 
			
		||||
        if isinstance(self.data, bytes):
 | 
			
		||||
            self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace')
 | 
			
		||||
 | 
			
		||||
        self.rule = get_rule(link)
 | 
			
		||||
        if self.rule is None:
 | 
			
		||||
            self.rule = get_rule(link)
 | 
			
		||||
 | 
			
		||||
        if self.rule['mode'] == 'xpath':
 | 
			
		||||
            self.doc = lxml.html.fromstring(self.data)
 | 
			
		||||
 
 | 
			
		||||
@@ -348,7 +348,9 @@ def FeedFetch(url, options):
 | 
			
		||||
        delay = 0
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        con = crawler.custom_handler('xml', True, delay, options.encoding, not feedify.supported(url)).open(url, timeout=TIMEOUT * 2) # feedify.supported(url) to use full crawler if using feedify
 | 
			
		||||
        con = crawler.custom_handler('xml', True, delay, options.encoding,
 | 
			
		||||
            not feedify.supported(url) or not options.items).open(url, timeout=TIMEOUT * 2)
 | 
			
		||||
            # feedify.supported(url) to use full crawler if using feedify
 | 
			
		||||
        xml = con.read()
 | 
			
		||||
 | 
			
		||||
    except (IOError, HTTPException):
 | 
			
		||||
@@ -360,10 +362,29 @@ def FeedFetch(url, options):
 | 
			
		||||
        rss = feeds.parse(xml)
 | 
			
		||||
 | 
			
		||||
    elif feedify.supported(url):
 | 
			
		||||
        # using config file-based feedify
 | 
			
		||||
        feed = feedify.Builder(url, xml)
 | 
			
		||||
        feed.build()
 | 
			
		||||
        rss = feed.feed
 | 
			
		||||
 | 
			
		||||
    elif options.items:
 | 
			
		||||
        # using argument-based feedify
 | 
			
		||||
        rule = {'items': options.items}
 | 
			
		||||
        rule['mode'] = 'xpath'
 | 
			
		||||
 | 
			
		||||
        if options.item_title:
 | 
			
		||||
            rule['item_title'] = options.item_title
 | 
			
		||||
        if options.item_link:
 | 
			
		||||
            rule['item_link'] = options.item_link
 | 
			
		||||
        if options.item_content:
 | 
			
		||||
            rule['item_content'] = options.item_content
 | 
			
		||||
        if options.item_time:
 | 
			
		||||
            rule['item_time'] = options.item_time
 | 
			
		||||
 | 
			
		||||
        feed = feedify.Builder(url, xml, rule)
 | 
			
		||||
        feed.build()
 | 
			
		||||
        rss = feed.feed
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        log('random page')
 | 
			
		||||
        log(contenttype)
 | 
			
		||||
@@ -504,11 +525,14 @@ def cgi_app(environ, start_response):
 | 
			
		||||
 | 
			
		||||
    if url.startswith(':'):
 | 
			
		||||
        split = url.split('/', 1)
 | 
			
		||||
        options = split[0].split(':')[1:]
 | 
			
		||||
 | 
			
		||||
        options = split[0].replace('|', '/').split(':')[1:]
 | 
			
		||||
 | 
			
		||||
        if len(split) > 1:
 | 
			
		||||
            url = split[1]
 | 
			
		||||
        else:
 | 
			
		||||
            url = ''
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        options = []
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user