diff --git a/morss/feedify.py b/morss/feedify.py index c6a1316..c01aed4 100644 --- a/morss/feedify.py +++ b/morss/feedify.py @@ -114,7 +114,8 @@ def pre_worker(url): class Builder(object): - def __init__(self, link, data): + def __init__(self, link, data, rule=None): + self.link = link self.data = data self.rule = rule @@ -124,7 +125,8 @@ class Builder(object): if isinstance(self.data, bytes): self.data = self.data.decode(crawler.detect_encoding(self.data), 'replace') - self.rule = get_rule(link) + if self.rule is None: + self.rule = get_rule(link) if self.rule['mode'] == 'xpath': self.doc = lxml.html.fromstring(self.data) diff --git a/morss/morss.py b/morss/morss.py index 4d83e41..8ec8a15 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -348,7 +348,9 @@ def FeedFetch(url, options): delay = 0 try: - con = crawler.custom_handler('xml', True, delay, options.encoding, not feedify.supported(url)).open(url, timeout=TIMEOUT * 2) # feedify.supported(url) to use full crawler if using feedify + con = crawler.custom_handler('xml', True, delay, options.encoding, + not feedify.supported(url) or not options.items).open(url, timeout=TIMEOUT * 2) + # feedify.supported(url) to use full crawler if using feedify xml = con.read() except (IOError, HTTPException): @@ -360,10 +362,29 @@ def FeedFetch(url, options): rss = feeds.parse(xml) elif feedify.supported(url): + # using config file-based feedify feed = feedify.Builder(url, xml) feed.build() rss = feed.feed + elif options.items: + # using argument-based feedify + rule = {'items': options.items} + rule['mode'] = 'xpath' + + if options.item_title: + rule['item_title'] = options.item_title + if options.item_link: + rule['item_link'] = options.item_link + if options.item_content: + rule['item_content'] = options.item_content + if options.item_time: + rule['item_time'] = options.item_time + + feed = feedify.Builder(url, xml, rule) + feed.build() + rss = feed.feed + else: log('random page') log(contenttype) @@ -504,11 +525,14 @@ def cgi_app(environ, start_response): if url.startswith(':'): split = url.split('/', 1) - options = split[0].split(':')[1:] + + options = split[0].replace('|', '/').split(':')[1:] + if len(split) > 1: url = split[1] else: url = '' + else: options = []