Add support for JSON APIs in feedify
This commit is contained in:
		
							
								
								
									
										168
									
								
								feedify.py
									
									
									
									
									
								
							
							
						
						
									
										168
									
								
								feedify.py
									
									
									
									
									
								
							@@ -3,10 +3,12 @@
 | 
				
			|||||||
from ConfigParser import ConfigParser
 | 
					from ConfigParser import ConfigParser
 | 
				
			||||||
from fnmatch import fnmatch
 | 
					from fnmatch import fnmatch
 | 
				
			||||||
import feeds
 | 
					import feeds
 | 
				
			||||||
 | 
					import morss
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import urllib2
 | 
					import urllib2
 | 
				
			||||||
import lxml.html
 | 
					import lxml.html
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
import urlparse
 | 
					import urlparse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def toclass(query):
 | 
					def toclass(query):
 | 
				
			||||||
@@ -29,49 +31,139 @@ def getRule(link):
 | 
				
			|||||||
def supported(link):
 | 
					def supported(link):
 | 
				
			||||||
	return getRule(link) is not False
 | 
						return getRule(link) is not False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def getString(html, expr):
 | 
					def formatString(string, getter, error=False):
 | 
				
			||||||
	matches = html.xpath(toclass(expr))
 | 
						out = ""
 | 
				
			||||||
	if len(matches):
 | 
						char = string[0]
 | 
				
			||||||
		out = ''
 | 
					
 | 
				
			||||||
		for match in matches:
 | 
						follow = string[1:]
 | 
				
			||||||
			if isinstance(match, basestring):
 | 
					
 | 
				
			||||||
				out += match
 | 
						if char == '"':
 | 
				
			||||||
			elif isinstance(match, lxml.html.HtmlElement):
 | 
							match = follow.partition('"')
 | 
				
			||||||
				out += lxml.html.tostring(match)
 | 
							out = match[0]
 | 
				
			||||||
		return out
 | 
							if len(match) >= 2:
 | 
				
			||||||
 | 
								next = match[2]
 | 
				
			||||||
 | 
							else:
 | 
				
			||||||
 | 
								next = None
 | 
				
			||||||
 | 
						elif char == '{':
 | 
				
			||||||
 | 
							match = follow.partition('}')
 | 
				
			||||||
 | 
							try:
 | 
				
			||||||
 | 
								test = formatString(match[0], getter, True)
 | 
				
			||||||
 | 
							except ValueError, KeyError:
 | 
				
			||||||
 | 
								pass
 | 
				
			||||||
 | 
							else:
 | 
				
			||||||
 | 
								out = test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							next = match[2]
 | 
				
			||||||
 | 
						elif char == ' ':
 | 
				
			||||||
 | 
							next = follow
 | 
				
			||||||
 | 
						elif re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string):
 | 
				
			||||||
 | 
							match = re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string).groups()
 | 
				
			||||||
 | 
							rawValue = getter(match[0])
 | 
				
			||||||
 | 
							print repr(rawValue)
 | 
				
			||||||
 | 
							if not isinstance(rawValue, basestring):
 | 
				
			||||||
 | 
								if match[1] is not None:
 | 
				
			||||||
 | 
									out = match[1].join(rawValue)
 | 
				
			||||||
 | 
								else:
 | 
				
			||||||
 | 
									out = ''.join(rawValue)
 | 
				
			||||||
 | 
							if not out and error:
 | 
				
			||||||
 | 
								raise ValueError
 | 
				
			||||||
 | 
							next = match[2]
 | 
				
			||||||
	else:
 | 
						else:
 | 
				
			||||||
		return ''
 | 
							raise ValueError('bogus string')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def build(link, data=None):
 | 
						if next is not None and len(next):
 | 
				
			||||||
	rule = getRule(link)
 | 
							return out + formatString(next, getter, error)
 | 
				
			||||||
	if rule is False:
 | 
						else:
 | 
				
			||||||
		return False
 | 
							return out
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if data is None:
 | 
					class Builder(object):
 | 
				
			||||||
		data = urllib2.urlopen(link).read()
 | 
						def __init__(self, link, data=None):
 | 
				
			||||||
 | 
							self.link = link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	html = lxml.html.fromstring(data)
 | 
							if data is None:
 | 
				
			||||||
	feed = feeds.FeedParserAtom()
 | 
								data = urllib2.urlopen(link).read()
 | 
				
			||||||
 | 
							self.data = data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if 'title' in rule:
 | 
							self.rule = getRule(link)
 | 
				
			||||||
		feed.title = getString(html, rule['title'])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if 'items' in rule:
 | 
							if self.rule['mode'] == 'xpath':
 | 
				
			||||||
		for item in html.xpath(toclass(rule['items'])):
 | 
								self.data = morss.decodeHTML(self.data)
 | 
				
			||||||
			feedItem = {}
 | 
								self.doc = lxml.html.fromstring(self.data)
 | 
				
			||||||
 | 
							elif self.rule['mode'] == 'json':
 | 
				
			||||||
 | 
								self.doc = json.loads(data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if 'item_title' in rule:
 | 
							self.feed = feeds.FeedParserAtom()
 | 
				
			||||||
				feedItem['title'] = getString(item, rule['item_title'])
 | 
					 | 
				
			||||||
			if 'item_link' in rule:
 | 
					 | 
				
			||||||
				url = getString(item, rule['item_link'])
 | 
					 | 
				
			||||||
				url = urlparse.urljoin(link, url)
 | 
					 | 
				
			||||||
				feedItem['link'] = url
 | 
					 | 
				
			||||||
			if 'item_desc' in rule:
 | 
					 | 
				
			||||||
				feedItem['desc'] = getString(item, rule['item_desc'])
 | 
					 | 
				
			||||||
			if 'item_content' in rule:
 | 
					 | 
				
			||||||
				feedItem['content'] = getString(item, rule['item_content'])
 | 
					 | 
				
			||||||
			if 'item_time' in rule:
 | 
					 | 
				
			||||||
				feedItem['updated'] = getString(item, rule['item_time'])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
			feed.items.append(feedItem)
 | 
						def raw(self, html, expr):
 | 
				
			||||||
	return feed
 | 
							if self.rule['mode'] == 'xpath':
 | 
				
			||||||
 | 
								print 1, toclass(expr)
 | 
				
			||||||
 | 
								return html.xpath(toclass(expr))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							elif self.rule['mode'] == 'json':
 | 
				
			||||||
 | 
								a = [html]
 | 
				
			||||||
 | 
								b = []
 | 
				
			||||||
 | 
								for x in expr.strip(".").split("."):
 | 
				
			||||||
 | 
									match = re.search(r'^([^\[]+)(?:\[([0-9]+)\])?$', x).groups()
 | 
				
			||||||
 | 
									for elem in a:
 | 
				
			||||||
 | 
										if isinstance(elem, dict):
 | 
				
			||||||
 | 
											kids = elem.get(match[0])
 | 
				
			||||||
 | 
											if kids is None:
 | 
				
			||||||
 | 
												pass
 | 
				
			||||||
 | 
											elif isinstance(kids, list):
 | 
				
			||||||
 | 
												[b.append(i) for i in kids]
 | 
				
			||||||
 | 
											elif isinstance(kids, basestring):
 | 
				
			||||||
 | 
												b.append(kids.replace('\n', '<br/>'))
 | 
				
			||||||
 | 
											else:
 | 
				
			||||||
 | 
												b.append(kids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if match[1] is None:
 | 
				
			||||||
 | 
										a = b
 | 
				
			||||||
 | 
									else:
 | 
				
			||||||
 | 
										if len(b)-1 >= int(match[1]):
 | 
				
			||||||
 | 
											a = [b[int(match[1])]]
 | 
				
			||||||
 | 
										else:
 | 
				
			||||||
 | 
											a = []
 | 
				
			||||||
 | 
									b = []
 | 
				
			||||||
 | 
								return a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def strings(self, html, expr):
 | 
				
			||||||
 | 
							if self.rule['mode'] == 'xpath':
 | 
				
			||||||
 | 
								out = []
 | 
				
			||||||
 | 
								for match in self.raw(html, expr):
 | 
				
			||||||
 | 
									if isinstance(match, basestring):
 | 
				
			||||||
 | 
										out.append(match)
 | 
				
			||||||
 | 
									elif isinstance(match, lxml.html.HtmlElement):
 | 
				
			||||||
 | 
										out.append(lxml.html.tostring(match))
 | 
				
			||||||
 | 
								return out
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							elif self.rule['mode'] == 'json':
 | 
				
			||||||
 | 
								return self.raw(html, expr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def string(self, html, expr):
 | 
				
			||||||
 | 
							getter = lambda x: self.strings(html, x)
 | 
				
			||||||
 | 
							return formatString(self.rule[expr], getter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def build(self):
 | 
				
			||||||
 | 
							if 'title' in self.rule:
 | 
				
			||||||
 | 
								self.feed.title = self.string(self.doc, 'title')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if 'items' in self.rule:
 | 
				
			||||||
 | 
								matches = self.raw(self.doc, self.rule['items'])
 | 
				
			||||||
 | 
								if matches and len(matches):
 | 
				
			||||||
 | 
									for item in matches:
 | 
				
			||||||
 | 
										feedItem = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										if 'item_title' in self.rule:
 | 
				
			||||||
 | 
											feedItem['title'] = self.string(item, 'item_title')
 | 
				
			||||||
 | 
										if 'item_link' in self.rule:
 | 
				
			||||||
 | 
											url = self.string(item, 'item_link')
 | 
				
			||||||
 | 
											url = urlparse.urljoin(self.link, url)
 | 
				
			||||||
 | 
											feedItem['link'] = url
 | 
				
			||||||
 | 
										if 'item_desc' in self.rule:
 | 
				
			||||||
 | 
											feedItem['desc'] = self.string(item, 'item_desc')
 | 
				
			||||||
 | 
										if 'item_content' in self.rule:
 | 
				
			||||||
 | 
											feedItem['content'] = self.string(item, 'item_content')
 | 
				
			||||||
 | 
										if 'item_time' in self.rule:
 | 
				
			||||||
 | 
											feedItem['updated'] = self.string(item, 'item_time')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										self.feed.items.append(feedItem)
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										5
									
								
								morss.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								morss.py
									
									
									
									
									
								
							@@ -410,8 +410,9 @@ def Gather(url, cachePath, options):
 | 
				
			|||||||
	if style == 'normal':
 | 
						if style == 'normal':
 | 
				
			||||||
		rss = feeds.parse(xml)
 | 
							rss = feeds.parse(xml)
 | 
				
			||||||
	elif style == 'feedify':
 | 
						elif style == 'feedify':
 | 
				
			||||||
		xml = decodeHTML(xml)
 | 
							feed = feedify.Builder(url, xml)
 | 
				
			||||||
		rss = feedify.build(url, xml)
 | 
							feed.build()
 | 
				
			||||||
 | 
							rss = feed.feed
 | 
				
			||||||
	elif style == 'html':
 | 
						elif style == 'html':
 | 
				
			||||||
		match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
 | 
							match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
 | 
				
			||||||
		if len(match):
 | 
							if len(match):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user