Add feedify, and use it in morss
This commit is contained in:
		
							
								
								
									
										69
									
								
								feedify.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								feedify.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
from ConfigParser import ConfigParser
 | 
			
		||||
from fnmatch import fnmatch
 | 
			
		||||
import feeds
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
import urllib2
 | 
			
		||||
import lxml.html
 | 
			
		||||
import urlparse
 | 
			
		||||
 | 
			
		||||
def toclass(query):
 | 
			
		||||
	pattern = r'\[class=([^\]]+)\]'
 | 
			
		||||
	repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
 | 
			
		||||
	return re.sub(pattern, repl, query)
 | 
			
		||||
 | 
			
		||||
def getRule(link=URL):
 | 
			
		||||
	config = ConfigParser()
 | 
			
		||||
	config.read('feedify.ini')
 | 
			
		||||
 | 
			
		||||
	for section in config.sections():
 | 
			
		||||
		values = dict(config.items(section))
 | 
			
		||||
		values['path'] = values['path'].split('\n')[1:]
 | 
			
		||||
		for path in values['path']:
 | 
			
		||||
			if fnmatch(link, path):
 | 
			
		||||
				return values
 | 
			
		||||
	return False
 | 
			
		||||
 | 
			
		||||
def supported(link):
 | 
			
		||||
	return getRule(link) is not False
 | 
			
		||||
 | 
			
		||||
def getString(expr, html):
 | 
			
		||||
	match = html.xpath(toclass(expr))
 | 
			
		||||
	if len(match):
 | 
			
		||||
		return match[0].text_content()
 | 
			
		||||
	else:
 | 
			
		||||
		return ''
 | 
			
		||||
 | 
			
		||||
def build(link, data=None):
 | 
			
		||||
	rule = getRule(link)
 | 
			
		||||
	if rule is False:
 | 
			
		||||
		return False
 | 
			
		||||
 | 
			
		||||
	if data is None:
 | 
			
		||||
		data = urllib2.urlopen(link).read()
 | 
			
		||||
 | 
			
		||||
	html = lxml.html.fromstring(data)
 | 
			
		||||
	feed = feeds.FeedParserAtom()
 | 
			
		||||
 | 
			
		||||
	if 'title' in rule:
 | 
			
		||||
		feed.title = html.xpath(toclass(rule['title']))[0]
 | 
			
		||||
 | 
			
		||||
	if 'items' in rule:
 | 
			
		||||
		for item in html.xpath(toclass(rule['items'])):
 | 
			
		||||
			feedItem = {}
 | 
			
		||||
 | 
			
		||||
			if 'item_title' in rule:
 | 
			
		||||
				feedItem['title'] = item.xpath(toclass(rule['item_title']))[0]
 | 
			
		||||
			if 'item_link' in rule:
 | 
			
		||||
				url = item.xpath(toclass(rule['item_link']))[0]
 | 
			
		||||
				url = urlparse.urljoin(link, url)
 | 
			
		||||
				feedItem['link'] = url
 | 
			
		||||
			if 'item_desc' in rule:
 | 
			
		||||
				feedItem['desc'] = lxml.html.tostring(item.xpath(toclass(rule['item_desc']))[0], encoding='unicode')
 | 
			
		||||
			if 'item_content' in rule:
 | 
			
		||||
				feedItem['content'] = lxml.html.tostring(item.xpath(toclass(rule['item_content']))[0])
 | 
			
		||||
 | 
			
		||||
			feed.items.append(feedItem)
 | 
			
		||||
	return feed
 | 
			
		||||
							
								
								
									
										5
									
								
								morss.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								morss.py
									
									
									
									
									
								
							@@ -13,6 +13,7 @@ import lxml.html.clean
 | 
			
		||||
import lxml.builder
 | 
			
		||||
 | 
			
		||||
import feeds
 | 
			
		||||
import feedify
 | 
			
		||||
 | 
			
		||||
import httplib
 | 
			
		||||
import urllib2
 | 
			
		||||
@@ -377,6 +378,8 @@ def Gather(url, cachePath, progress=False):
 | 
			
		||||
 | 
			
		||||
		if xml[:5] == '<?xml' or con.info().type in MIMETYPE['xml']:
 | 
			
		||||
			style = 'normal'
 | 
			
		||||
		elif feedify.supported(url):
 | 
			
		||||
			style = 'feedify'
 | 
			
		||||
		elif con.info().type in MIMETYPE['html']:
 | 
			
		||||
			style = 'html'
 | 
			
		||||
		else:
 | 
			
		||||
@@ -389,6 +392,8 @@ def Gather(url, cachePath, progress=False):
 | 
			
		||||
 | 
			
		||||
	if style == 'normal':
 | 
			
		||||
		rss = feeds.parse(xml)
 | 
			
		||||
	elif style == 'feedify':
 | 
			
		||||
		xml = decodeHTML(xml)
 | 
			
		||||
		rss = feedify.build(url, xml)
 | 
			
		||||
	elif style == 'html':
 | 
			
		||||
		match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user