From da5442a1dcdb12b7a28595d5859db8c158e04d44 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 29 Aug 2021 00:17:28 +0200 Subject: [PATCH] feedify: support any type (json, xml, html) --- morss/feeds.py | 20 +++++++++++++------- morss/morss.py | 24 ++++++++++++++++-------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/morss/feeds.py b/morss/feeds.py index e293951..dab4062 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -88,16 +88,21 @@ def parse_rules(filename=None): return rules -def parse(data, url=None, encoding=None): +def parse(data, url=None, encoding=None, ruleset=None): " Determine which ruleset to use " - rulesets = parse_rules() + if ruleset is not None: + rulesets = [ruleset] + + else: + rulesets = parse_rules().values() + parsers = [FeedXML, FeedHTML, FeedJSON] # 1) Look for a ruleset based on path if url is not None: - for ruleset in rulesets.values(): + for ruleset in rulesets: if 'path' in ruleset: for path in ruleset['path']: if fnmatch(url, path): @@ -111,9 +116,6 @@ def parse(data, url=None, encoding=None): # 3b) See if .items matches anything for parser in parsers: - ruleset_candidates = [x for x in rulesets.values() if x['mode'] == parser.mode and 'path' not in x] - # 'path' as they should have been caught beforehands - try: feed = parser(data, encoding=encoding) @@ -124,13 +126,17 @@ def parse(data, url=None, encoding=None): else: # parsing worked, now we try the rulesets + ruleset_candidates = [x for x in rulesets if x.get('mode', None) in (parser.mode, None) and 'path' not in x] + # 'path' as they should have been caught beforehands + # try anyway if no 'mode' specified + for ruleset in ruleset_candidates: feed.rules = ruleset try: feed.items[0] - except (AttributeError, IndexError): + except (AttributeError, IndexError, TypeError): # parsing and or item picking did not work out pass diff --git a/morss/morss.py b/morss/morss.py index a41e83a..00a6040 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -104,6 +104,13 @@ class Options: def __contains__(self, key): return key in self.options + def get(self, key, default=None): + if key in self.options: + return self.options[key] + + else: + return default + def ItemFix(item, options, feedurl='/'): """ Improves feed items (absolute links, resolve feedburner links, etc) """ @@ -276,22 +283,23 @@ def FeedFetch(url, options): if options.items: # using custom rules - rss = feeds.FeedHTML(req['data'], encoding=req['encoding']) + ruleset = {} - rss.rules['title'] = options.title if options.title else '//head/title' - rss.rules['desc'] = options.desc if options.desc else '//head/meta[@name="description"]/@content' + ruleset['items'] = options.items - rss.rules['items'] = options.items + ruleset['title'] = options.get('title', '//head/title') + ruleset['desc'] = options.get('desc', '//head/meta[@name="description"]/@content') - rss.rules['item_title'] = options.item_title if options.item_title else '.' - rss.rules['item_link'] = options.item_link if options.item_link else './@href|.//a/@href|ancestor::a/@href' + ruleset['item_title'] = options.get('item_title', '.') + ruleset['item_link'] = options.get('item_link', './@href|.//a/@href|ancestor::a/@href') if options.item_content: - rss.rules['item_content'] = options.item_content + ruleset['item_content'] = options.item_content if options.item_time: - rss.rules['item_time'] = options.item_time + ruleset['item_time'] = options.item_time + rss = feeds.parse(req['data'], encoding=req['encoding'], ruleset=ruleset) rss = rss.convert(feeds.FeedXML) else: