Add feedify, and use it in morss

2013-09-25 12:36:21 +02:00
parent 9bc4417be3
commit da14242bcf
2 changed files with 74 additions and 0 deletions
--- a/feedify.py
+++ b/feedify.py
@@ -0,0 +1,69 @@
 #!/usr/bin/env python
 from ConfigParser import ConfigParser
 from fnmatch import fnmatch
 import feeds
 import re
 import urllib2
 import lxml.html
 import urlparse
 def toclass(query):
 	pattern = r'\[class=([^\]]+)\]'
 	repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
 	return re.sub(pattern, repl, query)
 def getRule(link=URL):
 	config = ConfigParser()
 	config.read('feedify.ini')
 	for section in config.sections():
 		values = dict(config.items(section))
 		values['path'] = values['path'].split('\n')[1:]
 		for path in values['path']:
 			if fnmatch(link, path):
 				return values
 	return False
 def supported(link):
 	return getRule(link) is not False
 def getString(expr, html):
 	match = html.xpath(toclass(expr))
 	if len(match):
 		return match[0].text_content()
 	else:
 		return ''
 def build(link, data=None):
 	rule = getRule(link)
 	if rule is False:
 		return False
 	if data is None:
 		data = urllib2.urlopen(link).read()
 	html = lxml.html.fromstring(data)
 	feed = feeds.FeedParserAtom()
 	if 'title' in rule:
 		feed.title = html.xpath(toclass(rule['title']))[0]
 	if 'items' in rule:
 		for item in html.xpath(toclass(rule['items'])):
 			feedItem = {}
 			if 'item_title' in rule:
 				feedItem['title'] = item.xpath(toclass(rule['item_title']))[0]
 			if 'item_link' in rule:
 				url = item.xpath(toclass(rule['item_link']))[0]
 				url = urlparse.urljoin(link, url)
 				feedItem['link'] = url
 			if 'item_desc' in rule:
 				feedItem['desc'] = lxml.html.tostring(item.xpath(toclass(rule['item_desc']))[0], encoding='unicode')
 			if 'item_content' in rule:
 				feedItem['content'] = lxml.html.tostring(item.xpath(toclass(rule['item_content']))[0])
 			feed.items.append(feedItem)
 	return feed
--- a/morss.py
+++ b/morss.py
@@ -13,6 +13,7 @@ import lxml.html.clean
 import lxml.builder
 import feeds
 import feedify
 import httplib
 import urllib2
@@ -377,6 +378,8 @@ def Gather(url, cachePath, progress=False):
 		if xml[:5] == '<?xml' or con.info().type in MIMETYPE['xml']:
 			style = 'normal'
 		elif feedify.supported(url):
 			style = 'feedify'
 		elif con.info().type in MIMETYPE['html']:
 			style = 'html'
 		else:
@@ -389,6 +392,8 @@ def Gather(url, cachePath, progress=False):
 	if style == 'normal':
 		rss = feeds.parse(xml)
 	elif style == 'feedify':
 		xml = decodeHTML(xml)
 		rss = feedify.build(url, xml)
 	elif style == 'html':
 		match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")