Improve feedify string grabbing
parent
050be7690d
commit
685bc7988e
|
@ -6,6 +6,6 @@ path=
|
||||||
title= //head/title/text()
|
title= //head/title/text()
|
||||||
items= //div[class=tweet]
|
items= //div[class=tweet]
|
||||||
|
|
||||||
item_title= (.//span[class=username]//text())[2]
|
item_title= .//span[class=username]//text()
|
||||||
item_link= .//a[class=details]/@href
|
item_link= .//a[class=details]/@href
|
||||||
item_content= .//p[class=tweet-text]
|
item_content= .//p[class=tweet-text]
|
||||||
|
|
26
feedify.py
26
feedify.py
|
@ -14,7 +14,7 @@ def toclass(query):
|
||||||
repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
|
repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
|
||||||
return re.sub(pattern, repl, query)
|
return re.sub(pattern, repl, query)
|
||||||
|
|
||||||
def getRule(link=URL):
|
def getRule(link):
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
config.read('feedify.ini')
|
config.read('feedify.ini')
|
||||||
|
|
||||||
|
@ -29,10 +29,16 @@ def getRule(link=URL):
|
||||||
def supported(link):
|
def supported(link):
|
||||||
return getRule(link) is not False
|
return getRule(link) is not False
|
||||||
|
|
||||||
def getString(expr, html):
|
def getString(html, expr):
|
||||||
match = html.xpath(toclass(expr))
|
matches = html.xpath(toclass(expr))
|
||||||
if len(match):
|
if len(matches):
|
||||||
return match[0].text_content()
|
out = ''
|
||||||
|
for match in matches:
|
||||||
|
if isinstance(match, basestring):
|
||||||
|
out += match
|
||||||
|
elif isinstance(match, lxml.html.HtmlElement):
|
||||||
|
out += lxml.html.tostring(match)
|
||||||
|
return out
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
@ -48,22 +54,22 @@ def build(link, data=None):
|
||||||
feed = feeds.FeedParserAtom()
|
feed = feeds.FeedParserAtom()
|
||||||
|
|
||||||
if 'title' in rule:
|
if 'title' in rule:
|
||||||
feed.title = html.xpath(toclass(rule['title']))[0]
|
feed.title = getString(html, rule['title'])
|
||||||
|
|
||||||
if 'items' in rule:
|
if 'items' in rule:
|
||||||
for item in html.xpath(toclass(rule['items'])):
|
for item in html.xpath(toclass(rule['items'])):
|
||||||
feedItem = {}
|
feedItem = {}
|
||||||
|
|
||||||
if 'item_title' in rule:
|
if 'item_title' in rule:
|
||||||
feedItem['title'] = item.xpath(toclass(rule['item_title']))[0]
|
feedItem['title'] = getString(item, rule['item_title'])
|
||||||
if 'item_link' in rule:
|
if 'item_link' in rule:
|
||||||
url = item.xpath(toclass(rule['item_link']))[0]
|
url = getString(item, rule['item_link'])
|
||||||
url = urlparse.urljoin(link, url)
|
url = urlparse.urljoin(link, url)
|
||||||
feedItem['link'] = url
|
feedItem['link'] = url
|
||||||
if 'item_desc' in rule:
|
if 'item_desc' in rule:
|
||||||
feedItem['desc'] = lxml.html.tostring(item.xpath(toclass(rule['item_desc']))[0], encoding='unicode')
|
feedItem['desc'] = getString(item, rule['item_desc'])
|
||||||
if 'item_content' in rule:
|
if 'item_content' in rule:
|
||||||
feedItem['content'] = lxml.html.tostring(item.xpath(toclass(rule['item_content']))[0])
|
feedItem['content'] = getString(item, rule['item_content'])
|
||||||
|
|
||||||
feed.items.append(feedItem)
|
feed.items.append(feedItem)
|
||||||
return feed
|
return feed
|
||||||
|
|
Loading…
Reference in New Issue