feedify: support any type (json, xml, html)
parent
f9d7794bcc
commit
da5442a1dc
|
@ -88,16 +88,21 @@ def parse_rules(filename=None):
|
||||||
return rules
|
return rules
|
||||||
|
|
||||||
|
|
||||||
def parse(data, url=None, encoding=None):
|
def parse(data, url=None, encoding=None, ruleset=None):
|
||||||
" Determine which ruleset to use "
|
" Determine which ruleset to use "
|
||||||
|
|
||||||
rulesets = parse_rules()
|
if ruleset is not None:
|
||||||
|
rulesets = [ruleset]
|
||||||
|
|
||||||
|
else:
|
||||||
|
rulesets = parse_rules().values()
|
||||||
|
|
||||||
parsers = [FeedXML, FeedHTML, FeedJSON]
|
parsers = [FeedXML, FeedHTML, FeedJSON]
|
||||||
|
|
||||||
# 1) Look for a ruleset based on path
|
# 1) Look for a ruleset based on path
|
||||||
|
|
||||||
if url is not None:
|
if url is not None:
|
||||||
for ruleset in rulesets.values():
|
for ruleset in rulesets:
|
||||||
if 'path' in ruleset:
|
if 'path' in ruleset:
|
||||||
for path in ruleset['path']:
|
for path in ruleset['path']:
|
||||||
if fnmatch(url, path):
|
if fnmatch(url, path):
|
||||||
|
@ -111,9 +116,6 @@ def parse(data, url=None, encoding=None):
|
||||||
# 3b) See if .items matches anything
|
# 3b) See if .items matches anything
|
||||||
|
|
||||||
for parser in parsers:
|
for parser in parsers:
|
||||||
ruleset_candidates = [x for x in rulesets.values() if x['mode'] == parser.mode and 'path' not in x]
|
|
||||||
# 'path' as they should have been caught beforehands
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
feed = parser(data, encoding=encoding)
|
feed = parser(data, encoding=encoding)
|
||||||
|
|
||||||
|
@ -124,13 +126,17 @@ def parse(data, url=None, encoding=None):
|
||||||
else:
|
else:
|
||||||
# parsing worked, now we try the rulesets
|
# parsing worked, now we try the rulesets
|
||||||
|
|
||||||
|
ruleset_candidates = [x for x in rulesets if x.get('mode', None) in (parser.mode, None) and 'path' not in x]
|
||||||
|
# 'path' as they should have been caught beforehands
|
||||||
|
# try anyway if no 'mode' specified
|
||||||
|
|
||||||
for ruleset in ruleset_candidates:
|
for ruleset in ruleset_candidates:
|
||||||
feed.rules = ruleset
|
feed.rules = ruleset
|
||||||
|
|
||||||
try:
|
try:
|
||||||
feed.items[0]
|
feed.items[0]
|
||||||
|
|
||||||
except (AttributeError, IndexError):
|
except (AttributeError, IndexError, TypeError):
|
||||||
# parsing and or item picking did not work out
|
# parsing and or item picking did not work out
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,13 @@ class Options:
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
return key in self.options
|
return key in self.options
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
if key in self.options:
|
||||||
|
return self.options[key]
|
||||||
|
|
||||||
|
else:
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
def ItemFix(item, options, feedurl='/'):
|
def ItemFix(item, options, feedurl='/'):
|
||||||
""" Improves feed items (absolute links, resolve feedburner links, etc) """
|
""" Improves feed items (absolute links, resolve feedburner links, etc) """
|
||||||
|
@ -276,22 +283,23 @@ def FeedFetch(url, options):
|
||||||
|
|
||||||
if options.items:
|
if options.items:
|
||||||
# using custom rules
|
# using custom rules
|
||||||
rss = feeds.FeedHTML(req['data'], encoding=req['encoding'])
|
ruleset = {}
|
||||||
|
|
||||||
rss.rules['title'] = options.title if options.title else '//head/title'
|
ruleset['items'] = options.items
|
||||||
rss.rules['desc'] = options.desc if options.desc else '//head/meta[@name="description"]/@content'
|
|
||||||
|
|
||||||
rss.rules['items'] = options.items
|
ruleset['title'] = options.get('title', '//head/title')
|
||||||
|
ruleset['desc'] = options.get('desc', '//head/meta[@name="description"]/@content')
|
||||||
|
|
||||||
rss.rules['item_title'] = options.item_title if options.item_title else '.'
|
ruleset['item_title'] = options.get('item_title', '.')
|
||||||
rss.rules['item_link'] = options.item_link if options.item_link else './@href|.//a/@href|ancestor::a/@href'
|
ruleset['item_link'] = options.get('item_link', './@href|.//a/@href|ancestor::a/@href')
|
||||||
|
|
||||||
if options.item_content:
|
if options.item_content:
|
||||||
rss.rules['item_content'] = options.item_content
|
ruleset['item_content'] = options.item_content
|
||||||
|
|
||||||
if options.item_time:
|
if options.item_time:
|
||||||
rss.rules['item_time'] = options.item_time
|
ruleset['item_time'] = options.item_time
|
||||||
|
|
||||||
|
rss = feeds.parse(req['data'], encoding=req['encoding'], ruleset=ruleset)
|
||||||
rss = rss.convert(feeds.FeedXML)
|
rss = rss.convert(feeds.FeedXML)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue