Make most of the code pep8-compliant
Thanks a lot to github.com/SamuelMarks for his nice workmaster
parent
da0a8feadd
commit
f01efb7334
|
@ -1,23 +1,25 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import urlparse
|
||||||
|
import urllib2
|
||||||
|
|
||||||
from ConfigParser import ConfigParser
|
from ConfigParser import ConfigParser
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
import feeds
|
import feeds
|
||||||
import morss
|
import morss
|
||||||
import re
|
|
||||||
|
|
||||||
import urllib2
|
|
||||||
import lxml.html
|
|
||||||
import json
|
|
||||||
import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
def toclass(query):
|
def to_class(query):
|
||||||
pattern = r'\[class=([^\]]+)\]'
|
pattern = r'\[class=([^\]]+)\]'
|
||||||
repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
|
repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
|
||||||
return re.sub(pattern, repl, query)
|
return re.sub(pattern, repl, query)
|
||||||
|
|
||||||
def getRule(link):
|
|
||||||
|
def get_rule(link):
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
config.read('feedify.ini')
|
config.read('feedify.ini')
|
||||||
|
|
||||||
|
@ -29,10 +31,12 @@ def getRule(link):
|
||||||
return values
|
return values
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def supported(link):
|
|
||||||
return getRule(link) is not False
|
|
||||||
|
|
||||||
def formatString(string, getter, error=False):
|
def supported(link):
|
||||||
|
return get_rule(link) is not False
|
||||||
|
|
||||||
|
|
||||||
|
def format_string(string, getter, error=False):
|
||||||
out = ""
|
out = ""
|
||||||
char = string[0]
|
char = string[0]
|
||||||
|
|
||||||
|
@ -42,41 +46,42 @@ def formatString(string, getter, error=False):
|
||||||
match = follow.partition('"')
|
match = follow.partition('"')
|
||||||
out = match[0]
|
out = match[0]
|
||||||
if len(match) >= 2:
|
if len(match) >= 2:
|
||||||
next = match[2]
|
next_match = match[2]
|
||||||
else:
|
else:
|
||||||
next = None
|
next_match = None
|
||||||
elif char == '{':
|
elif char == '{':
|
||||||
match = follow.partition('}')
|
match = follow.partition('}')
|
||||||
try:
|
try:
|
||||||
test = formatString(match[0], getter, True)
|
test = format_string(match[0], getter, True)
|
||||||
except ValueError, KeyError:
|
except (ValueError, KeyError):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
out = test
|
out = test
|
||||||
|
|
||||||
next = match[2]
|
next_match = match[2]
|
||||||
elif char == ' ':
|
elif char == ' ':
|
||||||
next = follow
|
next_match = follow
|
||||||
elif re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string):
|
elif re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string):
|
||||||
match = re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string).groups()
|
match = re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string).groups()
|
||||||
rawValue = getter(match[0])
|
raw_value = getter(match[0])
|
||||||
if not isinstance(rawValue, basestring):
|
if not isinstance(raw_value, basestring):
|
||||||
if match[1] is not None:
|
if match[1] is not None:
|
||||||
out = match[1].join(rawValue)
|
out = match[1].join(raw_value)
|
||||||
else:
|
else:
|
||||||
out = ''.join(rawValue)
|
out = ''.join(raw_value)
|
||||||
if not out and error:
|
if not out and error:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
next = match[2]
|
next_match = match[2]
|
||||||
else:
|
else:
|
||||||
raise ValueError('bogus string')
|
raise ValueError('bogus string')
|
||||||
|
|
||||||
if next is not None and len(next):
|
if next_match is not None and len(next_match):
|
||||||
return out + formatString(next, getter, error)
|
return out + format_string(next_match, getter, error)
|
||||||
else:
|
else:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def PreWorker(url, cache):
|
|
||||||
|
def pre_worker(url, cache):
|
||||||
if urlparse.urlparse(url).netloc == 'itunes.apple.com':
|
if urlparse.urlparse(url).netloc == 'itunes.apple.com':
|
||||||
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
||||||
if match:
|
if match:
|
||||||
|
@ -84,6 +89,7 @@ def PreWorker(url, cache):
|
||||||
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
|
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
|
||||||
cache.set('redirect', redirect)
|
cache.set('redirect', redirect)
|
||||||
|
|
||||||
|
|
||||||
class Builder(object):
|
class Builder(object):
|
||||||
def __init__(self, link, data=None, cache=False):
|
def __init__(self, link, data=None, cache=False):
|
||||||
self.link = link
|
self.link = link
|
||||||
|
@ -93,11 +99,11 @@ class Builder(object):
|
||||||
data = urllib2.urlopen(link).read()
|
data = urllib2.urlopen(link).read()
|
||||||
self.data = data
|
self.data = data
|
||||||
|
|
||||||
self.rule = getRule(link)
|
self.rule = get_rule(link)
|
||||||
|
|
||||||
if self.rule['mode'] == 'xpath':
|
if self.rule['mode'] == 'xpath':
|
||||||
if not isinstance(self.data, unicode):
|
if not isinstance(self.data, unicode):
|
||||||
self.data = self.data.decode(morss.detEncoding(self.data), 'replace')
|
self.data = self.data.decode(morss.detect_encoding(self.data), 'replace')
|
||||||
self.doc = lxml.html.fromstring(self.data)
|
self.doc = lxml.html.fromstring(self.data)
|
||||||
elif self.rule['mode'] == 'json':
|
elif self.rule['mode'] == 'json':
|
||||||
self.doc = json.loads(data)
|
self.doc = json.loads(data)
|
||||||
|
@ -106,7 +112,7 @@ class Builder(object):
|
||||||
|
|
||||||
def raw(self, html, expr):
|
def raw(self, html, expr):
|
||||||
if self.rule['mode'] == 'xpath':
|
if self.rule['mode'] == 'xpath':
|
||||||
return html.xpath(toclass(expr))
|
return html.xpath(to_class(expr))
|
||||||
|
|
||||||
elif self.rule['mode'] == 'json':
|
elif self.rule['mode'] == 'json':
|
||||||
a = [html]
|
a = [html]
|
||||||
|
@ -119,7 +125,7 @@ class Builder(object):
|
||||||
if kids is None:
|
if kids is None:
|
||||||
pass
|
pass
|
||||||
elif isinstance(kids, list):
|
elif isinstance(kids, list):
|
||||||
[b.append(i) for i in kids]
|
b += kids
|
||||||
elif isinstance(kids, basestring):
|
elif isinstance(kids, basestring):
|
||||||
b.append(kids.replace('\n', '<br/>'))
|
b.append(kids.replace('\n', '<br/>'))
|
||||||
else:
|
else:
|
||||||
|
@ -128,7 +134,7 @@ class Builder(object):
|
||||||
if match[1] is None:
|
if match[1] is None:
|
||||||
a = b
|
a = b
|
||||||
else:
|
else:
|
||||||
if len(b)-1 >= int(match[1]):
|
if len(b) - 1 >= int(match[1]):
|
||||||
a = [b[int(match[1])]]
|
a = [b[int(match[1])]]
|
||||||
else:
|
else:
|
||||||
a = []
|
a = []
|
||||||
|
@ -150,7 +156,7 @@ class Builder(object):
|
||||||
|
|
||||||
def string(self, html, expr):
|
def string(self, html, expr):
|
||||||
getter = lambda x: self.strings(html, x)
|
getter = lambda x: self.strings(html, x)
|
||||||
return formatString(self.rule[expr], getter)
|
return format_string(self.rule[expr], getter)
|
||||||
|
|
||||||
def build(self):
|
def build(self):
|
||||||
if 'title' in self.rule:
|
if 'title' in self.rule:
|
||||||
|
@ -160,23 +166,22 @@ class Builder(object):
|
||||||
matches = self.raw(self.doc, self.rule['items'])
|
matches = self.raw(self.doc, self.rule['items'])
|
||||||
if matches and len(matches):
|
if matches and len(matches):
|
||||||
for item in matches:
|
for item in matches:
|
||||||
feedItem = {}
|
feed_item = {}
|
||||||
|
|
||||||
if 'item_title' in self.rule:
|
if 'item_title' in self.rule:
|
||||||
feedItem['title'] = self.string(item, 'item_title')
|
feed_item['title'] = self.string(item, 'item_title')
|
||||||
if 'item_link' in self.rule:
|
if 'item_link' in self.rule:
|
||||||
url = self.string(item, 'item_link')
|
url = self.string(item, 'item_link')
|
||||||
url = urlparse.urljoin(self.link, url)
|
url = urlparse.urljoin(self.link, url)
|
||||||
feedItem['link'] = url
|
feed_item['link'] = url
|
||||||
if 'item_desc' in self.rule:
|
if 'item_desc' in self.rule:
|
||||||
feedItem['desc'] = self.string(item, 'item_desc')
|
feed_item['desc'] = self.string(item, 'item_desc')
|
||||||
if 'item_content' in self.rule:
|
if 'item_content' in self.rule:
|
||||||
feedItem['content'] = self.string(item, 'item_content')
|
feed_item['content'] = self.string(item, 'item_content')
|
||||||
if 'item_time' in self.rule:
|
if 'item_time' in self.rule:
|
||||||
feedItem['updated'] = self.string(item, 'item_time')
|
feed_item['updated'] = self.string(item, 'item_time')
|
||||||
if 'item_id' in self.rule:
|
if 'item_id' in self.rule:
|
||||||
feedItem['id'] = self.string(item, 'item_id')
|
feed_item['id'] = self.string(item, 'item_id')
|
||||||
feedItem['isPermaLink'] = False
|
feed_item['isPermaLink'] = False
|
||||||
|
|
||||||
self.feed.items.append(feedItem)
|
|
||||||
|
|
||||||
|
self.feed.items.append(feed_item)
|
||||||
|
|
418
morss/feeds.py
418
morss/feeds.py
|
@ -1,14 +1,16 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import dateutil.parser
|
|
||||||
from dateutil import tz
|
|
||||||
import re
|
|
||||||
|
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
import re
|
||||||
import json
|
import json
|
||||||
import csv
|
import csv
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from dateutil import tz
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from wheezy.template.engine import Engine
|
from wheezy.template.engine import Engine
|
||||||
|
@ -26,21 +28,22 @@ except ImportError:
|
||||||
|
|
||||||
Element = etree.Element
|
Element = etree.Element
|
||||||
|
|
||||||
NSMAP = {'atom': 'http://www.w3.org/2005/Atom',
|
NSMAP = {'atom': 'http://www.w3.org/2005/Atom',
|
||||||
'atom03': 'http://purl.org/atom/ns#',
|
'atom03': 'http://purl.org/atom/ns#',
|
||||||
'media': 'http://search.yahoo.com/mrss/',
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
||||||
'slash': 'http://purl.org/rss/1.0/modules/slash/',
|
'slash': 'http://purl.org/rss/1.0/modules/slash/',
|
||||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||||
'content': 'http://purl.org/rss/1.0/modules/content/',
|
'content': 'http://purl.org/rss/1.0/modules/content/',
|
||||||
'rssfake': 'http://purl.org/rss/1.0/'}
|
'rssfake': 'http://purl.org/rss/1.0/'}
|
||||||
|
|
||||||
|
|
||||||
def load(url):
|
def load(url):
|
||||||
import urllib2
|
|
||||||
d = urllib2.urlopen(url).read()
|
d = urllib2.urlopen(url).read()
|
||||||
return parse(d)
|
return parse(d)
|
||||||
|
|
||||||
def tagNS(tag, nsmap=NSMAP):
|
|
||||||
|
def tag_NS(tag, nsmap=NSMAP):
|
||||||
match = re.search(r'^\{([^\}]+)\}(.*)$', tag)
|
match = re.search(r'^\{([^\}]+)\}(.*)$', tag)
|
||||||
if match:
|
if match:
|
||||||
match = match.groups()
|
match = match.groups()
|
||||||
|
@ -55,15 +58,19 @@ def tagNS(tag, nsmap=NSMAP):
|
||||||
return "{%s}%s" % (nsmap[match[0]], match[1].lower())
|
return "{%s}%s" % (nsmap[match[0]], match[1].lower())
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
def innerHTML(xml):
|
|
||||||
|
def inner_html(xml):
|
||||||
return (xml.text or '') + ''.join([etree.tostring(child) for child in xml.iterchildren()])
|
return (xml.text or '') + ''.join([etree.tostring(child) for child in xml.iterchildren()])
|
||||||
|
|
||||||
def cleanNode(xml):
|
|
||||||
|
def clean_node(xml):
|
||||||
[xml.remove(child) for child in xml.iterchildren()]
|
[xml.remove(child) for child in xml.iterchildren()]
|
||||||
|
|
||||||
|
|
||||||
class FeedException(Exception):
|
class FeedException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def parse(data):
|
def parse(data):
|
||||||
# encoding
|
# encoding
|
||||||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||||
|
@ -80,15 +87,16 @@ def parse(data):
|
||||||
# rss
|
# rss
|
||||||
match = doc.xpath("//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF", namespaces=NSMAP)
|
match = doc.xpath("//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF", namespaces=NSMAP)
|
||||||
if len(match):
|
if len(match):
|
||||||
mtable = { 'rdf:rdf': FeedParserRSS, 'channel': FeedParserRSS,
|
m_table = {'rdf:rdf': FeedParserRSS, 'channel': FeedParserRSS,
|
||||||
'atom03:feed': FeedParserAtom, 'atom:feed': FeedParserAtom }
|
'atom03:feed': FeedParserAtom, 'atom:feed': FeedParserAtom}
|
||||||
match = match[0]
|
match = match[0]
|
||||||
tag = tagNS(match.tag)
|
tag = tag_NS(match.tag)
|
||||||
if tag in mtable:
|
if tag in m_table:
|
||||||
return mtable[tag](doc, tag)
|
return m_table[tag](doc, tag)
|
||||||
|
|
||||||
raise FeedException('unknown feed type')
|
raise FeedException('unknown feed type')
|
||||||
|
|
||||||
|
|
||||||
class FeedBase(object):
|
class FeedBase(object):
|
||||||
"""
|
"""
|
||||||
Base for xml-related classes, which provides simple wrappers around xpath
|
Base for xml-related classes, which provides simple wrappers around xpath
|
||||||
|
@ -135,7 +143,7 @@ class FeedBase(object):
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def xgetCreate(self, table):
|
def xget_create(self, table):
|
||||||
""" Returns an element, and creates it when not present """
|
""" Returns an element, and creates it when not present """
|
||||||
value = table[self.tag]
|
value = table[self.tag]
|
||||||
if not isinstance(value, tuple):
|
if not isinstance(value, tuple):
|
||||||
|
@ -145,7 +153,7 @@ class FeedBase(object):
|
||||||
if match is not None:
|
if match is not None:
|
||||||
return match
|
return match
|
||||||
else:
|
else:
|
||||||
element = etree.Element(tagNS(new))
|
element = etree.Element(tag_NS(new))
|
||||||
self.root.append(element)
|
self.root.append(element)
|
||||||
return element
|
return element
|
||||||
|
|
||||||
|
@ -158,58 +166,62 @@ class FeedBase(object):
|
||||||
""" Returns string using lxml. Arguments passed to tostring """
|
""" Returns string using lxml. Arguments passed to tostring """
|
||||||
return etree.tostring(self.xml, pretty_print=True, **k)
|
return etree.tostring(self.xml, pretty_print=True, **k)
|
||||||
|
|
||||||
|
|
||||||
class FeedDescriptor(object):
|
class FeedDescriptor(object):
|
||||||
"""
|
"""
|
||||||
Descriptor which gives off elements based on "self.getName" and
|
Descriptor which gives off elements based on "self.getName" and
|
||||||
"self.setName" as getter/setters. Looks far better, and avoids duplicates
|
"self.setName" as getter/setters. Looks far better, and avoids duplicates
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.nname = name[0].upper() + name[1:]
|
|
||||||
|
|
||||||
def __get__(self, instance, owner):
|
def __get__(self, instance, owner):
|
||||||
getter = getattr(instance, 'get%s' % self.nname)
|
getter = getattr(instance, 'get_%s' % self.name)
|
||||||
return getter()
|
return getter()
|
||||||
|
|
||||||
def __set__(self, instance, value):
|
def __set__(self, instance, value):
|
||||||
setter = getattr(instance, 'set%s' % self.nname)
|
setter = getattr(instance, 'set_%s' % self.name)
|
||||||
return setter(value)
|
return setter(value)
|
||||||
|
|
||||||
def __delete__(self, instance):
|
def __delete__(self, instance):
|
||||||
deleter = getattr(instance, 'del%s' % self.nname)
|
deleter = getattr(instance, 'del_%s' % self.name)
|
||||||
return deleter()
|
return deleter()
|
||||||
|
|
||||||
|
|
||||||
class FeedTime(FeedDescriptor):
|
class FeedTime(FeedDescriptor):
|
||||||
def __get__(self, instance, owner):
|
def __get__(self, instance, owner):
|
||||||
getter = getattr(instance, 'get%s' % self.nname)
|
getter = getattr(instance, 'get_%s' % self.name)
|
||||||
raw = getter()
|
raw = getter()
|
||||||
try:
|
try:
|
||||||
time = parseTime(raw)
|
time = parse_time(raw)
|
||||||
return time
|
return time
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __set__(self, instance, value):
|
def __set__(self, instance, value):
|
||||||
try:
|
try:
|
||||||
time = parseTime(value)
|
time = parse_time(value)
|
||||||
raw = time.strftime(instance.timeFormat)
|
raw = time.strftime(instance.timeFormat)
|
||||||
setter = getattr(instance, 'set%s' % self.nname)
|
setter = getattr(instance, 'set_%s' % self.name)
|
||||||
return setter(raw)
|
return setter(raw)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FeedBool(FeedDescriptor):
|
class FeedBool(FeedDescriptor):
|
||||||
def __get__(self, instance, owner):
|
def __get__(self, instance, owner):
|
||||||
getter = getattr(instance, 'get%s' % self.nname)
|
getter = getattr(instance, 'get_%s' % self.name)
|
||||||
raw = getter()
|
raw = getter()
|
||||||
return (raw or '').lower() != 'false'
|
return (raw or '').lower() != 'false'
|
||||||
|
|
||||||
def __set__(self, instance, value):
|
def __set__(self, instance, value):
|
||||||
raw = 'true' if value else 'false'
|
raw = 'true' if value else 'false'
|
||||||
setter = getattr(instance, 'set%s' % self.nname)
|
setter = getattr(instance, 'set_%s' % self.name)
|
||||||
return setter(raw)
|
return setter(raw)
|
||||||
|
|
||||||
def parseTime(value):
|
|
||||||
|
def parse_time(value):
|
||||||
if isinstance(value, basestring):
|
if isinstance(value, basestring):
|
||||||
if re.match(r'^[0-9]+$', value):
|
if re.match(r'^[0-9]+$', value):
|
||||||
return datetime.fromtimestamp(int(value), tz.tzutc())
|
return datetime.fromtimestamp(int(value), tz.tzutc())
|
||||||
|
@ -222,6 +234,7 @@ def parseTime(value):
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class FeedList(object):
|
class FeedList(object):
|
||||||
"""
|
"""
|
||||||
Class to map a list of xml elements against a list of matching objects,
|
Class to map a list of xml elements against a list of matching objects,
|
||||||
|
@ -231,14 +244,15 @@ class FeedList(object):
|
||||||
|
|
||||||
Comes with its very own descriptor.
|
Comes with its very own descriptor.
|
||||||
"""
|
"""
|
||||||
def __init__(self, parent, getter, tag, childClass):
|
|
||||||
|
def __init__(self, parent, getter, tag, child_class):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.getter = getter
|
self.getter = getter
|
||||||
self.childClass = childClass
|
self.childClass = child_class
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
self._children = {} # id(xml) => FeedItem
|
self._children = {} # id(xml) => FeedItem
|
||||||
|
|
||||||
def getChildren(self):
|
def get_children(self):
|
||||||
children = self.getter()
|
children = self.getter()
|
||||||
out = []
|
out = []
|
||||||
for child in children:
|
for child in children:
|
||||||
|
@ -269,7 +283,7 @@ class FeedList(object):
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return self.getChildren()[key]
|
return self.get_children()[key]
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
child = self.getter()[key]
|
child = self.getter()[key]
|
||||||
|
@ -282,22 +296,24 @@ class FeedList(object):
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.getter())
|
return len(self.getter())
|
||||||
|
|
||||||
|
|
||||||
class FeedListDescriptor(object):
|
class FeedListDescriptor(object):
|
||||||
"""
|
"""
|
||||||
Descriptor for FeedList
|
Descriptor for FeedList
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.items = {} # id(instance) => FeedList
|
self.items = {} # id(instance) => FeedList
|
||||||
|
|
||||||
def __get__(self, instance, owner=None):
|
def __get__(self, instance, owner=None):
|
||||||
key = id(instance)
|
key = id(instance)
|
||||||
if key in self.items:
|
if key in self.items:
|
||||||
return self.items[key]
|
return self.items[key]
|
||||||
else:
|
else:
|
||||||
getter = getattr(instance, 'get%s' % self.name.title())
|
getter = getattr(instance, 'get_%s' % self.name)
|
||||||
className = globals()[getattr(instance, '%sClass' % self.name)]
|
class_name = globals()[getattr(instance, '%sClass' % self.name)]
|
||||||
self.items[key] = FeedList(instance, getter, instance.tag, className)
|
self.items[key] = FeedList(instance, getter, instance.tag, class_name)
|
||||||
return self.items[key]
|
return self.items[key]
|
||||||
|
|
||||||
def __set__(self, instance, value):
|
def __set__(self, instance, value):
|
||||||
|
@ -305,6 +321,7 @@ class FeedListDescriptor(object):
|
||||||
[x.remove() for x in [x for x in f.items]]
|
[x.remove() for x in [x for x in f.items]]
|
||||||
[feedlist.append(x) for x in value]
|
[feedlist.append(x) for x in value]
|
||||||
|
|
||||||
|
|
||||||
class FeedParser(FeedBase):
|
class FeedParser(FeedBase):
|
||||||
itemsClass = 'FeedItem'
|
itemsClass = 'FeedItem'
|
||||||
mimetype = 'application/xml'
|
mimetype = 'application/xml'
|
||||||
|
@ -318,27 +335,25 @@ class FeedParser(FeedBase):
|
||||||
self.root = self.xml.xpath("//atom03:feed|//atom:feed|//channel|//rssfake:channel", namespaces=NSMAP)[0]
|
self.root = self.xml.xpath("//atom03:feed|//atom:feed|//channel|//rssfake:channel", namespaces=NSMAP)[0]
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setTitle(self, value):
|
def set_title(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delTitle(self):
|
def del_title(self):
|
||||||
self.title = ""
|
self.title = ""
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
def getDesc(self):
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delDesc(self):
|
def del_desc(self):
|
||||||
self.desc = ""
|
self.desc = ""
|
||||||
|
|
||||||
|
def get_items(self):
|
||||||
def getItems(self):
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
title = FeedDescriptor('title')
|
title = FeedDescriptor('title')
|
||||||
|
@ -355,7 +370,8 @@ class FeedParser(FeedBase):
|
||||||
out = StringIO()
|
out = StringIO()
|
||||||
c = csv.writer(out, dialect=csv.excel)
|
c = csv.writer(out, dialect=csv.excel)
|
||||||
for item in self.items:
|
for item in self.items:
|
||||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if isinstance(x[1], basestring)]
|
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
|
||||||
|
isinstance(x[1], basestring)]
|
||||||
c.writerow(row)
|
c.writerow(row)
|
||||||
out.seek(0)
|
out.seek(0)
|
||||||
return out.read()
|
return out.read()
|
||||||
|
@ -367,7 +383,8 @@ class FeedParser(FeedBase):
|
||||||
loader = DictLoader({'reader': open('reader.html.template').read()})
|
loader = DictLoader({'reader': open('reader.html.template').read()})
|
||||||
engine = Engine(loader=loader, extensions=[CoreExtension()])
|
engine = Engine(loader=loader, extensions=[CoreExtension()])
|
||||||
template = engine.get_template('reader')
|
template = engine.get_template('reader')
|
||||||
return template.render({'feed':self}).encode('utf-8')
|
return template.render({'feed': self}).encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
class FeedParserRSS(FeedParser):
|
class FeedParserRSS(FeedParser):
|
||||||
"""
|
"""
|
||||||
|
@ -375,161 +392,153 @@ class FeedParserRSS(FeedParser):
|
||||||
"""
|
"""
|
||||||
itemsClass = 'FeedItemRSS'
|
itemsClass = 'FeedItemRSS'
|
||||||
mimetype = 'application/rss+xml'
|
mimetype = 'application/rss+xml'
|
||||||
base = { 'rdf:rdf': '<?xml version="1.0" encoding="utf-8"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"><channel rdf:about="http://example.org/rss.rdf"></channel></rdf:RDF>',
|
base = {
|
||||||
'channel': '<?xml version="1.0" encoding="utf-8"?><rss version="2.0"><channel></channel></rss>'}
|
'rdf:rdf': '<?xml version="1.0" encoding="utf-8"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"><channel rdf:about="http://example.org/rss.rdf"></channel></rdf:RDF>',
|
||||||
|
'channel': '<?xml version="1.0" encoding="utf-8"?><rss version="2.0"><channel></channel></rss>'}
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return self.xval('rssfake:title|title')
|
return self.xval('rssfake:title|title')
|
||||||
|
|
||||||
def setTitle(self, value):
|
def set_title(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:title|title')
|
return self.xdel('rssfake:title|title')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:title',
|
table = {'rdf:rdf': 'rssfake:title',
|
||||||
'channel': 'title'}
|
'channel': 'title'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
def getDesc(self):
|
|
||||||
return self.xval('rssfake:description|description')
|
return self.xval('rssfake:description|description')
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:description|description')
|
return self.xdel('rssfake:description|description')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:description',
|
table = {'rdf:rdf': 'rssfake:description',
|
||||||
'channel': 'description'}
|
'channel': 'description'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_items(self):
|
||||||
def getItems(self):
|
|
||||||
return self.xpath('rssfake:item|item')
|
return self.xpath('rssfake:item|item')
|
||||||
|
|
||||||
|
|
||||||
class FeedParserAtom(FeedParser):
|
class FeedParserAtom(FeedParser):
|
||||||
"""
|
"""
|
||||||
Atom Parser
|
Atom Parser
|
||||||
"""
|
"""
|
||||||
itemsClass = 'FeedItemAtom'
|
itemsClass = 'FeedItemAtom'
|
||||||
mimetype = 'application/atom+xml'
|
mimetype = 'application/atom+xml'
|
||||||
base = { 'atom:feed': '<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>',
|
base = {'atom:feed': '<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>',
|
||||||
'atom03:feed': '<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#"></feed>'}
|
'atom03:feed': '<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#"></feed>'}
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return self.xval('atom:title|atom03:title')
|
return self.xval('atom:title|atom03:title')
|
||||||
|
|
||||||
def setTitle(self, value):
|
def set_title(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xval('atom:title|atom03:title')
|
return self.xval('atom:title|atom03:title')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:title',
|
table = {'atom:feed': 'atom:title',
|
||||||
'atom03:feed': 'atom03:title'}
|
'atom03:feed': 'atom03:title'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
def getDesc(self):
|
|
||||||
return self.xval('atom:subtitle|atom03:subtitle')
|
return self.xval('atom:subtitle|atom03:subtitle')
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:subtitle|atom03:subtitle')
|
return self.xdel('atom:subtitle|atom03:subtitle')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:subtitle',
|
table = {'atom:feed': 'atom:subtitle',
|
||||||
'atom03:feed': 'atom03:subtitle'}
|
'atom03:feed': 'atom03:subtitle'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_items(self):
|
||||||
def getItems(self):
|
|
||||||
return self.xpath('atom:entry|atom03:entry')
|
return self.xpath('atom:entry|atom03:entry')
|
||||||
|
|
||||||
|
|
||||||
class FeedItem(FeedBase):
|
class FeedItem(FeedBase):
|
||||||
timeFormat = ''
|
timeFormat = ''
|
||||||
dic = ('title', 'link', 'desc', 'content', 'id', 'isPermaLink', 'time', 'updated')
|
dic = ('title', 'link', 'desc', 'content', 'id', 'is_permalink', 'time', 'updated')
|
||||||
|
|
||||||
def __init__(self, xml=None, tag='atom:feed'):
|
def __init__(self, xml=None, tag='atom:feed'):
|
||||||
if xml is None:
|
if xml is None:
|
||||||
xml = Element(tagNS(self.base[tag]))
|
xml = Element(tag_NS(self.base[tag]))
|
||||||
|
|
||||||
self.root = self.xml = xml
|
self.root = self.xml = xml
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setTitle(self):
|
def set_title(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delTitle(self):
|
def del_title(self):
|
||||||
self.title = ""
|
self.title = ""
|
||||||
|
|
||||||
|
def get_link(self):
|
||||||
def getLink(self):
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setLink(self, value):
|
def set_link(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delLink(self):
|
def del_link(self):
|
||||||
self.link = ""
|
self.link = ""
|
||||||
|
|
||||||
|
def get_is_permalink(self):
|
||||||
def getIsPermaLink(self):
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setIsPermaLink(self, value):
|
def set_is_permalink(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
def getDesc(self):
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delDesc(self):
|
def del_desc(self):
|
||||||
self.desc = ""
|
self.desc = ""
|
||||||
|
|
||||||
|
def get_content(self):
|
||||||
def getContent(self):
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setContent(self, value):
|
def set_content(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delContent(self):
|
def del_content(self):
|
||||||
self.content = ""
|
self.content = ""
|
||||||
|
|
||||||
|
def get_id(self):
|
||||||
def getId(self):
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setId(self, value):
|
def set_id(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delId(self):
|
def del_id(self):
|
||||||
self.id = ""
|
self.id = ""
|
||||||
|
|
||||||
|
def get_time(self):
|
||||||
def getTime(self):
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def setTime(self, value):
|
def set_time(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delTime(self):
|
def delTime(self):
|
||||||
self.time = None
|
self.time = None
|
||||||
|
|
||||||
|
def get_updated(self):
|
||||||
def getUpdated(self):
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def setUpdated(self, value):
|
def set_updated(self, value):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def delUpdated(self):
|
def del_updated(self):
|
||||||
self.updated = None
|
self.updated = None
|
||||||
|
|
||||||
title = FeedDescriptor('title')
|
title = FeedDescriptor('title')
|
||||||
|
@ -537,11 +546,11 @@ class FeedItem(FeedBase):
|
||||||
description = desc = FeedDescriptor('desc')
|
description = desc = FeedDescriptor('desc')
|
||||||
content = FeedDescriptor('content')
|
content = FeedDescriptor('content')
|
||||||
id = FeedDescriptor('id')
|
id = FeedDescriptor('id')
|
||||||
isPermaLink = FeedBool('isPermaLink')
|
is_permalink = FeedBool('is_permalink')
|
||||||
time = FeedTime('time')
|
time = FeedTime('time')
|
||||||
updated = FeedTime('updated')
|
updated = FeedTime('updated')
|
||||||
|
|
||||||
def pushContent(self, value):
|
def push_content(self, value):
|
||||||
if not self.desc and self.content:
|
if not self.desc and self.content:
|
||||||
self.desc = self.content
|
self.desc = self.content
|
||||||
|
|
||||||
|
@ -550,201 +559,192 @@ class FeedItem(FeedBase):
|
||||||
def remove(self):
|
def remove(self):
|
||||||
self.xml.getparent().remove(self.xml)
|
self.xml.getparent().remove(self.xml)
|
||||||
|
|
||||||
|
|
||||||
class FeedItemRSS(FeedItem):
|
class FeedItemRSS(FeedItem):
|
||||||
timeFormat = '%a, %d %b %Y %H:%M:%S %Z'
|
timeFormat = '%a, %d %b %Y %H:%M:%S %Z'
|
||||||
base = { 'rdf:rdf': 'rssfake:item',
|
base = {'rdf:rdf': 'rssfake:item',
|
||||||
'channel': 'item'}
|
'channel': 'item'}
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return self.xval('rssfake:title|title')
|
return self.xval('rssfake:title|title')
|
||||||
|
|
||||||
def setTitle(self, value):
|
def set_title(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:title|title')
|
return self.xdel('rssfake:title|title')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:title',
|
table = {'rdf:rdf': 'rssfake:title',
|
||||||
'channel': 'title'}
|
'channel': 'title'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_link(self):
|
||||||
def getLink(self):
|
|
||||||
return self.xval('rssfake:link|link')
|
return self.xval('rssfake:link|link')
|
||||||
|
|
||||||
def setLink(self, value):
|
def set_link(self, value):
|
||||||
if self.isPermaLink and self.id == self.link != value:
|
if self.is_permalink and self.id == self.link != value:
|
||||||
self.isPermaLink = False
|
self.is_permalink = False
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:link',
|
table = {'rdf:rdf': 'rssfake:link',
|
||||||
'channel': 'link'}
|
'channel': 'link'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
|
||||||
def getDesc(self):
|
def get_desc(self):
|
||||||
return self.xval('rssfake:description|description')
|
return self.xval('rssfake:description|description')
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:description|description')
|
return self.xdel('rssfake:description|description')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:description',
|
table = {'rdf:rdf': 'rssfake:description',
|
||||||
'channel': 'description'}
|
'channel': 'description'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_content(self):
|
||||||
def getContent(self):
|
|
||||||
return self.xval('content:encoded')
|
return self.xval('content:encoded')
|
||||||
|
|
||||||
def setContent(self, value):
|
def set_content(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('content:encoded')
|
return self.xdel('content:encoded')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'content:encoded',
|
table = {'rdf:rdf': 'content:encoded',
|
||||||
'channel': 'content:encoded'}
|
'channel': 'content:encoded'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_id(self):
|
||||||
def getId(self):
|
|
||||||
return self.xval('rssfake:guid|guid')
|
return self.xval('rssfake:guid|guid')
|
||||||
|
|
||||||
def setId(self, value):
|
def set_id(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:guid|guid')
|
return self.xdel('rssfake:guid|guid')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:guid',
|
table = {'rdf:rdf': 'rssfake:guid',
|
||||||
'channel': 'guid'}
|
'channel': 'guid'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_is_permalink(self):
|
||||||
def getIsPermaLink(self):
|
|
||||||
return self.xget('rssfake:guid/@isPermaLink|guid/@isPermaLink')
|
return self.xget('rssfake:guid/@isPermaLink|guid/@isPermaLink')
|
||||||
|
|
||||||
def setIsPermaLink(self, value):
|
def set_is_permalink(self, value):
|
||||||
table = { 'rdf:rdf': 'rssfake:guid',
|
table = {'rdf:rdf': 'rssfake:guid',
|
||||||
'channel': 'guid'}
|
'channel': 'guid'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.attrib['isPermaLink'] = value
|
element.attrib['isPermaLink'] = value
|
||||||
|
|
||||||
|
def get_time(self):
|
||||||
def getTime(self):
|
|
||||||
return self.xval('rssfake:pubDate|pubDate')
|
return self.xval('rssfake:pubDate|pubDate')
|
||||||
|
|
||||||
def setTime(self, value):
|
def set_time(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('rssfake:pubDate|pubDate')
|
return self.xdel('rssfake:pubDate|pubDate')
|
||||||
|
|
||||||
table = { 'rdf:rdf': 'rssfake:pubDate',
|
table = {'rdf:rdf': 'rssfake:pubDate',
|
||||||
'channel': 'pubDate'}
|
'channel': 'pubDate'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
|
||||||
class FeedItemAtom(FeedItem):
|
class FeedItemAtom(FeedItem):
|
||||||
timeFormat = '%Y-%m-%dT%H:%M:%SZ'
|
timeFormat = '%Y-%m-%dT%H:%M:%SZ'
|
||||||
base = { 'atom:feed': 'atom:entry',
|
base = {'atom:feed': 'atom:entry',
|
||||||
'atom03:feed': 'atom03:entry'}
|
'atom03:feed': 'atom03:entry'}
|
||||||
|
|
||||||
def getTitle(self):
|
def get_title(self):
|
||||||
return self.xval('atom:title|atom03:title')
|
return self.xval('atom:title|atom03:title')
|
||||||
|
|
||||||
def setTitle(self, value):
|
def set_title(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:title|atom03:title')
|
return self.xdel('atom:title|atom03:title')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:title',
|
table = {'atom:feed': 'atom:title',
|
||||||
'atom03:feed': 'atom03:title'}
|
'atom03:feed': 'atom03:title'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_link(self):
|
||||||
def getLink(self):
|
|
||||||
return self.xget('(atom:link|atom03:link)[@rel="alternate" or not(@rel)]/@href')
|
return self.xget('(atom:link|atom03:link)[@rel="alternate" or not(@rel)]/@href')
|
||||||
|
|
||||||
def setLink(self, value):
|
def set_link(self, value):
|
||||||
table = { 'atom:feed': ('atom:link', 'atom:link[@rel="alternate" or not(@rel)]'),
|
table = {'atom:feed': ('atom:link', 'atom:link[@rel="alternate" or not(@rel)]'),
|
||||||
'atom03:feed': ('atom03:link', 'atom03:link[@rel="alternate" or not(@rel)]')}
|
'atom03:feed': ('atom03:link', 'atom03:link[@rel="alternate" or not(@rel)]')}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.attrib['href'] = value
|
element.attrib['href'] = value
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
def getDesc(self):
|
|
||||||
# default "type" is "text"
|
# default "type" is "text"
|
||||||
element = self.xget('atom:summary|atom03:summary')
|
element = self.xget('atom:summary|atom03:summary')
|
||||||
if element is not None:
|
if element is not None:
|
||||||
return innerHTML(element)
|
return inner_html(element)
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setDesc(self, value):
|
def set_desc(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:summary|atom03:summary')
|
return self.xdel('atom:summary|atom03:summary')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:summary',
|
table = {'atom:feed': 'atom:summary',
|
||||||
'atom03:feed': 'atom03:summary'}
|
'atom03:feed': 'atom03:summary'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
if element.attrib.get('type', '') == 'xhtml':
|
if element.attrib.get('type', '') == 'xhtml':
|
||||||
cleanNode(element)
|
clean_node(element)
|
||||||
element.attrib['type'] = 'html'
|
element.attrib['type'] = 'html'
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_content(self):
|
||||||
def getContent(self):
|
|
||||||
element = self.xget('atom:content|atom03:content')
|
element = self.xget('atom:content|atom03:content')
|
||||||
if element is not None:
|
if element is not None:
|
||||||
return innerHTML(element)
|
return inner_html(element)
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def setContent(self, value):
|
def set_content(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:content|atom03:content')
|
return self.xdel('atom:content|atom03:content')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:content',
|
table = {'atom:feed': 'atom:content',
|
||||||
'atom03:feed': 'atom03:content'}
|
'atom03:feed': 'atom03:content'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
if element.attrib.get('type', '') == 'xhtml':
|
if element.attrib.get('type', '') == 'xhtml':
|
||||||
cleanNode(element)
|
clean_node(element)
|
||||||
element.attrib['type'] = 'html'
|
element.attrib['type'] = 'html'
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_id(self):
|
||||||
def getId(self):
|
|
||||||
return self.xval('atom:id|atom03:id')
|
return self.xval('atom:id|atom03:id')
|
||||||
|
|
||||||
def setId(self, value):
|
def set_id(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:id|atom03:id')
|
return self.xdel('atom:id|atom03:id')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:id',
|
table = {'atom:feed': 'atom:id',
|
||||||
'atom03:feed': 'atom03:id'}
|
'atom03:feed': 'atom03:id'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_time(self):
|
||||||
def getTime(self):
|
|
||||||
return self.xval('atom:published|atom03:published')
|
return self.xval('atom:published|atom03:published')
|
||||||
|
|
||||||
def setTime(self, value):
|
def set_time(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:published|atom03:published')
|
return self.xdel('atom:published|atom03:published')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:published',
|
table = {'atom:feed': 'atom:published',
|
||||||
'atom03:feed': 'atom03:published'}
|
'atom03:feed': 'atom03:published'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
||||||
|
def get_updated(self):
|
||||||
def getUpdated(self):
|
|
||||||
return self.xval('atom:updated|atom03:updated')
|
return self.xval('atom:updated|atom03:updated')
|
||||||
|
|
||||||
def setUpdated(self, value):
|
def set_updated(self, value):
|
||||||
if not value:
|
if not value:
|
||||||
return self.xdel('atom:updated|atom03:updated')
|
return self.xdel('atom:updated|atom03:updated')
|
||||||
|
|
||||||
table = { 'atom:feed': 'atom:updated',
|
table = {'atom:feed': 'atom:updated',
|
||||||
'atom03:feed': 'atom03:updated'}
|
'atom03:feed': 'atom03:updated'}
|
||||||
element = self.xgetCreate(table)
|
element = self.xget_create(table)
|
||||||
element.text = value
|
element.text = value
|
||||||
|
|
156
morss/morss.py
156
morss/morss.py
|
@ -31,21 +31,22 @@ from StringIO import StringIO
|
||||||
from readability import readability
|
from readability import readability
|
||||||
from html2text import HTML2Text
|
from html2text import HTML2Text
|
||||||
|
|
||||||
LIM_ITEM = 100 # deletes what's beyond
|
LIM_ITEM = 100 # deletes what's beyond
|
||||||
LIM_TIME = 7 # deletes what's after
|
LIM_TIME = 7 # deletes what's after
|
||||||
MAX_ITEM = 50 # cache-only beyond
|
MAX_ITEM = 50 # cache-only beyond
|
||||||
MAX_TIME = 7 # cache-only after (in sec)
|
MAX_TIME = 7 # cache-only after (in sec)
|
||||||
DELAY = 10*60 # xml cache & ETag cache (in sec)
|
DELAY = 10 * 60 # xml cache & ETag cache (in sec)
|
||||||
TIMEOUT = 2 # http timeout (in sec)
|
TIMEOUT = 2 # http timeout (in sec)
|
||||||
THREADS = 10 # number of threads (1 for single-threaded)
|
THREADS = 10 # number of threads (1 for single-threaded)
|
||||||
|
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
|
|
||||||
UA_RSS = 'Liferea/1.8.12 (Linux; fr_FR.utf8; http://liferea.sf.net/)'
|
UA_RSS = 'Liferea/1.8.12 (Linux; fr_FR.utf8; http://liferea.sf.net/)'
|
||||||
UA_HTML = 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0'
|
UA_HTML = 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0'
|
||||||
|
|
||||||
MIMETYPE = { 'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
MIMETYPE = {
|
||||||
'html': ['text/html', 'application/xhtml+xml', 'application/xml']}
|
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
||||||
|
'html': ['text/html', 'application/xhtml+xml', 'application/xml']}
|
||||||
|
|
||||||
FBAPPID = "<insert yours>"
|
FBAPPID = "<insert yours>"
|
||||||
FBSECRET = "<insert yours>"
|
FBSECRET = "<insert yours>"
|
||||||
|
@ -57,11 +58,14 @@ if 'SCRIPT_NAME' in os.environ:
|
||||||
httplib.HTTPConnection.debuglevel = 1
|
httplib.HTTPConnection.debuglevel = 1
|
||||||
|
|
||||||
import cgitb
|
import cgitb
|
||||||
|
|
||||||
cgitb.enable()
|
cgitb.enable()
|
||||||
|
|
||||||
|
|
||||||
class MorssException(Exception):
|
class MorssException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def log(txt, force=False):
|
def log(txt, force=False):
|
||||||
if DEBUG or force:
|
if DEBUG or force:
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
|
@ -70,17 +74,18 @@ def log(txt, force=False):
|
||||||
print repr(txt)
|
print repr(txt)
|
||||||
|
|
||||||
|
|
||||||
def lenHTML(txt):
|
def len_html(txt):
|
||||||
if len(txt):
|
if len(txt):
|
||||||
return len(lxml.html.fromstring(txt).text_content())
|
return len(lxml.html.fromstring(txt).text_content())
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def countWord(txt):
|
|
||||||
|
def count_words(txt):
|
||||||
if len(txt):
|
if len(txt):
|
||||||
return len(lxml.html.fromstring(txt).text_content().split())
|
return len(lxml.html.fromstring(txt).text_content().split())
|
||||||
else:
|
return 0
|
||||||
return 0
|
|
||||||
|
|
||||||
class Options:
|
class Options:
|
||||||
def __init__(self, options=None):
|
def __init__(self, options=None):
|
||||||
|
@ -95,9 +100,11 @@ class Options:
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
return key in self.options
|
return key in self.options
|
||||||
|
|
||||||
|
|
||||||
class Cache:
|
class Cache:
|
||||||
""" Light, error-prone caching system. """
|
""" Light, error-prone caching system. """
|
||||||
def __init__(self, folder=None, key='cache', lifespan=10*24*3600):
|
|
||||||
|
def __init__(self, folder=None, key='cache', lifespan=10 * 24 * 3600):
|
||||||
self._key = key
|
self._key = key
|
||||||
self._dir = folder
|
self._dir = folder
|
||||||
self._lifespan = lifespan
|
self._lifespan = lifespan
|
||||||
|
@ -108,7 +115,7 @@ class Cache:
|
||||||
self._hash = "NO CACHE"
|
self._hash = "NO CACHE"
|
||||||
return
|
return
|
||||||
|
|
||||||
maxsize = os.statvfs('./').f_namemax - len(self._dir) - 1 - 4 # ".tmp"
|
maxsize = os.statvfs('./').f_namemax - len(self._dir) - 1 - 4 # ".tmp"
|
||||||
self._hash = urllib.quote_plus(self._key)[:maxsize]
|
self._hash = urllib.quote_plus(self._key)[:maxsize]
|
||||||
|
|
||||||
self._file = self._dir + '/' + self._hash
|
self._file = self._dir + '/' + self._hash
|
||||||
|
@ -178,13 +185,16 @@ class Cache:
|
||||||
else:
|
else:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class SimpleDownload(urllib2.HTTPCookieProcessor):
|
class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
"""
|
"""
|
||||||
Custom urllib2 handler to download a page, using etag/last-modified headers,
|
Custom urllib2 handler to download a page, using etag/last-modified headers,
|
||||||
to save bandwidth. The given headers are added back into the header on error
|
to save bandwidth. The given headers are added back into the header on error
|
||||||
304 for easier use.
|
304 for easier use.
|
||||||
"""
|
"""
|
||||||
def __init__(self, cache="", etag=None, lastmodified=None, useragent=UA_HTML, decode=True, cookiejar=None, accept=None, strict=False):
|
|
||||||
|
def __init__(self, cache="", etag=None, lastmodified=None, useragent=UA_HTML, decode=True, cookiejar=None,
|
||||||
|
accept=None, strict=False):
|
||||||
urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
|
urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
self.etag = etag
|
self.etag = etag
|
||||||
|
@ -214,7 +224,7 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
out = {}
|
out = {}
|
||||||
rank = 1.1
|
rank = 1.1
|
||||||
for group in self.accept:
|
for group in self.accept:
|
||||||
rank = rank - 0.1
|
rank -= 0.1
|
||||||
|
|
||||||
if isinstance(group, basestring):
|
if isinstance(group, basestring):
|
||||||
if group in MIMETYPE:
|
if group in MIMETYPE:
|
||||||
|
@ -228,9 +238,9 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
out[mime] = rank
|
out[mime] = rank
|
||||||
|
|
||||||
if not self.strict:
|
if not self.strict:
|
||||||
out['*/*'] = rank-0.1
|
out['*/*'] = rank - 0.1
|
||||||
|
|
||||||
string = ','.join([x+';q={0:.1}'.format(out[x]) if out[x] != 1 else x for x in out])
|
string = ','.join([x + ';q={0:.1}'.format(out[x]) if out[x] != 1 else x for x in out])
|
||||||
req.add_unredirected_header('Accept', string)
|
req.add_unredirected_header('Accept', string)
|
||||||
|
|
||||||
return req
|
return req
|
||||||
|
@ -259,20 +269,20 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
if resp.info().type in MIMETYPE['html']:
|
if resp.info().type in MIMETYPE['html']:
|
||||||
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
||||||
if match:
|
if match:
|
||||||
newurl = match.groups()[0]
|
new_url = match.groups()[0]
|
||||||
log('redirect: %s' % newurl)
|
log('redirect: %s' % new_url)
|
||||||
|
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
new_headers = dict((k, v) for k, v in req.headers.items()
|
||||||
if k.lower() not in ('content-length', 'content-type'))
|
if k.lower() not in ('content-length', 'content-type'))
|
||||||
new = urllib2.Request(newurl,
|
new = urllib2.Request(new_url,
|
||||||
headers=newheaders,
|
headers=new_headers,
|
||||||
origin_req_host=req.get_origin_req_host(),
|
origin_req_host=req.get_origin_req_host(),
|
||||||
unverifiable=True)
|
unverifiable=True)
|
||||||
|
|
||||||
return self.parent.open(new, timeout=req.timeout)
|
return self.parent.open(new, timeout=req.timeout)
|
||||||
|
|
||||||
# encoding
|
# encoding
|
||||||
enc = detEncoding(data, resp)
|
enc = detect_encoding(data, resp)
|
||||||
|
|
||||||
if enc:
|
if enc:
|
||||||
data = data.decode(enc, 'replace')
|
data = data.decode(enc, 'replace')
|
||||||
|
@ -290,7 +300,8 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
|
|
||||||
def detEncoding(data, con=None):
|
|
||||||
|
def detect_encoding(data, con=None):
|
||||||
if con is not None and con.headers.getparam('charset'):
|
if con is not None and con.headers.getparam('charset'):
|
||||||
log('header')
|
log('header')
|
||||||
return con.headers.getparam('charset')
|
return con.headers.getparam('charset')
|
||||||
|
@ -306,6 +317,7 @@ def detEncoding(data, con=None):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def Fix(item, feedurl='/'):
|
def Fix(item, feedurl='/'):
|
||||||
""" Improves feed items (absolute links, resolve feedburner links, etc) """
|
""" Improves feed items (absolute links, resolve feedburner links, etc) """
|
||||||
|
|
||||||
|
@ -358,7 +370,8 @@ def Fix(item, feedurl='/'):
|
||||||
match = re.search('/([0-9a-zA-Z]{20,})/story01.htm$', item.link)
|
match = re.search('/([0-9a-zA-Z]{20,})/story01.htm$', item.link)
|
||||||
if match:
|
if match:
|
||||||
url = match.groups()[0].split('0')
|
url = match.groups()[0].split('0')
|
||||||
t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'H':',', 'I':'_', 'L':'http://', 'S':'www.', 'N':'.com', 'O':'.co.uk'}
|
t = {'A': '0', 'B': '.', 'C': '/', 'D': '?', 'E': '-', 'H': ',', 'I': '_', 'L': 'http://', 'S': 'www.',
|
||||||
|
'N': '.com', 'O': '.co.uk'}
|
||||||
item.link = ''.join([(t[s[0]] if s[0] in t else '=') + s[1:] for s in url[1:]])
|
item.link = ''.join([(t[s[0]] if s[0] in t else '=') + s[1:] for s in url[1:]])
|
||||||
log(item.link)
|
log(item.link)
|
||||||
|
|
||||||
|
@ -371,6 +384,7 @@ def Fix(item, feedurl='/'):
|
||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
def Fill(item, cache, feedurl='/', fast=False):
|
def Fill(item, cache, feedurl='/', fast=False):
|
||||||
""" Returns True when it has done its best """
|
""" Returns True when it has done its best """
|
||||||
|
|
||||||
|
@ -381,8 +395,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
log(item.link)
|
log(item.link)
|
||||||
|
|
||||||
# content already provided?
|
# content already provided?
|
||||||
count_content = countWord(item.content)
|
count_content = count_words(item.content)
|
||||||
count_desc = countWord(item.desc)
|
count_desc = count_words(item.desc)
|
||||||
|
|
||||||
if max(count_content, count_desc) > 500:
|
if max(count_content, count_desc) > 500:
|
||||||
if count_desc > count_content:
|
if count_desc > count_content:
|
||||||
|
@ -392,7 +406,7 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
log('long enough')
|
log('long enough')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if count_content > 5*count_desc > 0 and count_content > 50:
|
if count_content > 5 * count_desc > 0 and count_content > 50:
|
||||||
log('content bigger enough')
|
log('content bigger enough')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -432,7 +446,7 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
log('old error')
|
log('old error')
|
||||||
else:
|
else:
|
||||||
log('cached')
|
log('cached')
|
||||||
item.pushContent(cache.get(link))
|
item.push_content(cache.get(link))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# super-fast mode
|
# super-fast mode
|
||||||
|
@ -457,8 +471,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
|
|
||||||
out = readability.Document(data, url=con.url).summary(True)
|
out = readability.Document(data, url=con.url).summary(True)
|
||||||
|
|
||||||
if countWord(out) > max(count_content, count_desc) > 0:
|
if count_words(out) > max(count_content, count_desc) > 0:
|
||||||
item.pushContent(out)
|
item.push_content(out)
|
||||||
cache.set(link, out)
|
cache.set(link, out)
|
||||||
else:
|
else:
|
||||||
log('not bigger enough')
|
log('not bigger enough')
|
||||||
|
@ -467,7 +481,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def Init(url, cachePath, options):
|
|
||||||
|
def Init(url, cache_path, options):
|
||||||
# url clean up
|
# url clean up
|
||||||
log(url)
|
log(url)
|
||||||
|
|
||||||
|
@ -481,14 +496,15 @@ def Init(url, cachePath, options):
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
|
|
||||||
# cache
|
# cache
|
||||||
cache = Cache(cachePath, url)
|
cache = Cache(cache_path, url)
|
||||||
log(cache._hash)
|
log(cache._hash)
|
||||||
|
|
||||||
return (url, cache)
|
return (url, cache)
|
||||||
|
|
||||||
|
|
||||||
def Fetch(url, cache, options):
|
def Fetch(url, cache, options):
|
||||||
# do some useful facebook work
|
# do some useful facebook work
|
||||||
feedify.PreWorker(url, cache)
|
feedify.pre_worker(url, cache)
|
||||||
|
|
||||||
if 'redirect' in cache:
|
if 'redirect' in cache:
|
||||||
url = cache.get('redirect')
|
url = cache.get('redirect')
|
||||||
|
@ -502,8 +518,9 @@ def Fetch(url, cache, options):
|
||||||
style = cache.get('style')
|
style = cache.get('style')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
opener = SimpleDownload(cache.get(url), cache.get('etag'), cache.get('lastmodified'), accept=('xml','html'))
|
opener = SimpleDownload(cache.get(url), cache.get('etag'), cache.get('lastmodified'),
|
||||||
con = urllib2.build_opener(opener).open(url, timeout=TIMEOUT*2)
|
accept=('xml', 'html'))
|
||||||
|
con = urllib2.build_opener(opener).open(url, timeout=TIMEOUT * 2)
|
||||||
xml = con.read()
|
xml = con.read()
|
||||||
except (IOError, httplib.HTTPException):
|
except (IOError, httplib.HTTPException):
|
||||||
raise MorssException('Error downloading feed')
|
raise MorssException('Error downloading feed')
|
||||||
|
@ -540,7 +557,8 @@ def Fetch(url, cache, options):
|
||||||
feed.build()
|
feed.build()
|
||||||
rss = feed.feed
|
rss = feed.feed
|
||||||
elif style == 'html':
|
elif style == 'html':
|
||||||
match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
|
match = lxml.html.fromstring(xml).xpath(
|
||||||
|
"//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
|
||||||
if len(match):
|
if len(match):
|
||||||
link = urlparse.urljoin(url, match[0])
|
link = urlparse.urljoin(url, match[0])
|
||||||
log('rss redirect: %s' % link)
|
log('rss redirect: %s' % link)
|
||||||
|
@ -552,13 +570,13 @@ def Fetch(url, cache, options):
|
||||||
log('random page')
|
log('random page')
|
||||||
raise MorssException('Link provided is not a valid feed')
|
raise MorssException('Link provided is not a valid feed')
|
||||||
|
|
||||||
|
|
||||||
cache.save()
|
cache.save()
|
||||||
return rss
|
return rss
|
||||||
|
|
||||||
|
|
||||||
def Gather(rss, url, cache, options):
|
def Gather(rss, url, cache, options):
|
||||||
size = len(rss.items)
|
size = len(rss.items)
|
||||||
startTime = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# custom settings
|
# custom settings
|
||||||
lim_item = LIM_ITEM
|
lim_item = LIM_ITEM
|
||||||
|
@ -580,14 +598,14 @@ def Gather(rss, url, cache, options):
|
||||||
queue.task_done()
|
queue.task_done()
|
||||||
|
|
||||||
def worker(i, item):
|
def worker(i, item):
|
||||||
if time.time() - startTime > lim_time >= 0 or i+1 > lim_item >= 0:
|
if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0:
|
||||||
log('dropped')
|
log('dropped')
|
||||||
item.remove()
|
item.remove()
|
||||||
return
|
return
|
||||||
|
|
||||||
item = Fix(item, url)
|
item = Fix(item, url)
|
||||||
|
|
||||||
if time.time() - startTime > max_time >= 0 or i+1 > max_item >= 0:
|
if time.time() - start_time > max_time >= 0 or i + 1 > max_item >= 0:
|
||||||
if not options.proxy:
|
if not options.proxy:
|
||||||
if Fill(item, cache, url, True) is False:
|
if Fill(item, cache, url, True) is False:
|
||||||
item.remove()
|
item.remove()
|
||||||
|
@ -617,10 +635,11 @@ def Gather(rss, url, cache, options):
|
||||||
new.time = "5 Oct 2013 22:42"
|
new.time = "5 Oct 2013 22:42"
|
||||||
|
|
||||||
log(len(rss.items))
|
log(len(rss.items))
|
||||||
log(time.time() - startTime)
|
log(time.time() - start_time)
|
||||||
|
|
||||||
return rss
|
return rss
|
||||||
|
|
||||||
|
|
||||||
def After(rss, options):
|
def After(rss, options):
|
||||||
for i, item in enumerate(rss.items):
|
for i, item in enumerate(rss.items):
|
||||||
|
|
||||||
|
@ -662,8 +681,9 @@ def After(rss, options):
|
||||||
else:
|
else:
|
||||||
return rss.tostring(xml_declaration=True, encoding='UTF-8')
|
return rss.tostring(xml_declaration=True, encoding='UTF-8')
|
||||||
|
|
||||||
|
|
||||||
def process(url, cache=None, options=None):
|
def process(url, cache=None, options=None):
|
||||||
if options == None:
|
if not options:
|
||||||
options = []
|
options = []
|
||||||
|
|
||||||
options = Options(options)
|
options = Options(options)
|
||||||
|
@ -673,6 +693,7 @@ def process(url, cache=None, options=None):
|
||||||
|
|
||||||
return After(rss, options)
|
return After(rss, options)
|
||||||
|
|
||||||
|
|
||||||
def cgi_app(environ, start_response):
|
def cgi_app(environ, start_response):
|
||||||
# get options
|
# get options
|
||||||
if 'REQUEST_URI' in environ:
|
if 'REQUEST_URI' in environ:
|
||||||
|
@ -696,7 +717,8 @@ def cgi_app(environ, start_response):
|
||||||
DEBUG = options.debug
|
DEBUG = options.debug
|
||||||
|
|
||||||
if 'HTTP_IF_NONE_MATCH' in environ:
|
if 'HTTP_IF_NONE_MATCH' in environ:
|
||||||
if not options.force and not options.facebook and time.time() - int(environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
if not options.force and not options.facebook and time.time() - int(
|
||||||
|
environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
||||||
headers['status'] = '304 Not Modified'
|
headers['status'] = '304 Not Modified'
|
||||||
start_response(headers['status'], headers.items())
|
start_response(headers['status'], headers.items())
|
||||||
log(url)
|
log(url)
|
||||||
|
@ -722,30 +744,31 @@ def cgi_app(environ, start_response):
|
||||||
url, cache = Init(url, os.getcwd() + '/cache', options)
|
url, cache = Init(url, os.getcwd() + '/cache', options)
|
||||||
|
|
||||||
if options.facebook:
|
if options.facebook:
|
||||||
doFacebook(url, environ, headers, options, cache)
|
do_facebook(url, environ, headers, options, cache)
|
||||||
start_response(headers['status'], headers.items())
|
start_response(headers['status'], headers.items())
|
||||||
return
|
return
|
||||||
|
|
||||||
# get the work done
|
# get the work done
|
||||||
RSS = Fetch(url, cache, options)
|
rss = Fetch(url, cache, options)
|
||||||
|
|
||||||
if headers['content-type'] == 'text/xml':
|
if headers['content-type'] == 'text/xml':
|
||||||
headers['content-type'] = RSS.mimetype
|
headers['content-type'] = rss.mimetype
|
||||||
|
|
||||||
start_response(headers['status'], headers.items())
|
start_response(headers['status'], headers.items())
|
||||||
|
|
||||||
RSS = Gather(RSS, url, cache, options)
|
rss = Gather(rss, url, cache, options)
|
||||||
|
|
||||||
if not DEBUG and not options.silent:
|
if not DEBUG and not options.silent:
|
||||||
return After(RSS, options)
|
return After(rss, options)
|
||||||
|
|
||||||
log('done')
|
log('done')
|
||||||
|
|
||||||
|
|
||||||
def cgi_wrapper(environ, start_response):
|
def cgi_wrapper(environ, start_response):
|
||||||
# simple http server for html and css
|
# simple http server for html and css
|
||||||
files = {
|
files = {
|
||||||
'': 'text/html',
|
'': 'text/html',
|
||||||
'index.html': 'text/html'}
|
'index.html': 'text/html'}
|
||||||
|
|
||||||
if 'REQUEST_URI' in environ:
|
if 'REQUEST_URI' in environ:
|
||||||
url = environ['REQUEST_URI'][1:]
|
url = environ['REQUEST_URI'][1:]
|
||||||
|
@ -774,13 +797,12 @@ def cgi_wrapper(environ, start_response):
|
||||||
except (KeyboardInterrupt, SystemExit):
|
except (KeyboardInterrupt, SystemExit):
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
headers = {}
|
headers = {'status': '500 Oops', 'content-type': 'text/plain'}
|
||||||
headers['status'] = '500 Oops'
|
|
||||||
headers['content-type'] = 'text/plain'
|
|
||||||
start_response(headers['status'], headers.items(), sys.exc_info())
|
start_response(headers['status'], headers.items(), sys.exc_info())
|
||||||
log('ERROR: %s' % e.message, force=True)
|
log('ERROR: %s' % e.message, force=True)
|
||||||
return 'An error happened'
|
return 'An error happened'
|
||||||
|
|
||||||
|
|
||||||
def cli_app():
|
def cli_app():
|
||||||
options = Options(sys.argv[1:-1])
|
options = Options(sys.argv[1:-1])
|
||||||
url = sys.argv[-1]
|
url = sys.argv[-1]
|
||||||
|
@ -789,15 +811,16 @@ def cli_app():
|
||||||
DEBUG = options.debug
|
DEBUG = options.debug
|
||||||
|
|
||||||
url, cache = Init(url, os.path.expanduser('~/.cache/morss'), options)
|
url, cache = Init(url, os.path.expanduser('~/.cache/morss'), options)
|
||||||
RSS = Fetch(url, cache, options)
|
rss = Fetch(url, cache, options)
|
||||||
RSS = Gather(RSS, url, cache, options)
|
rss = Gather(rss, url, cache, options)
|
||||||
|
|
||||||
if not DEBUG and not options.silent:
|
if not DEBUG and not options.silent:
|
||||||
print After(RSS, options)
|
print After(rss, options)
|
||||||
|
|
||||||
log('done')
|
log('done')
|
||||||
|
|
||||||
def doFacebook(url, environ, headers, options, cache):
|
|
||||||
|
def do_facebook(url, environ, headers, options, cache):
|
||||||
log('fb stuff')
|
log('fb stuff')
|
||||||
|
|
||||||
query = urlparse.urlparse(url).query
|
query = urlparse.urlparse(url).query
|
||||||
|
@ -805,11 +828,13 @@ def doFacebook(url, environ, headers, options, cache):
|
||||||
if 'code' in query:
|
if 'code' in query:
|
||||||
# get real token from code
|
# get real token from code
|
||||||
code = urlparse.parse_qs(query)['code'][0]
|
code = urlparse.parse_qs(query)['code'][0]
|
||||||
eurl = "https://graph.facebook.com/oauth/access_token?client_id={app_id}&redirect_uri={redirect_uri}&client_secret={app_secret}&code={code_parameter}".format(app_id=FBAPPID, app_secret=FBSECRET, code_parameter=code, redirect_uri=environ['SCRIPT_URI'])
|
eurl = "https://graph.facebook.com/oauth/access_token?client_id={app_id}&redirect_uri={redirect_uri}&client_secret={app_secret}&code={code_parameter}".format(
|
||||||
|
app_id=FBAPPID, app_secret=FBSECRET, code_parameter=code, redirect_uri=environ['SCRIPT_URI'])
|
||||||
token = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())['access_token'][0]
|
token = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())['access_token'][0]
|
||||||
|
|
||||||
# get long-lived access token
|
# get long-lived access token
|
||||||
eurl = "https://graph.facebook.com/oauth/access_token?grant_type=fb_exchange_token&client_id={app_id}&client_secret={app_secret}&fb_exchange_token={short_lived_token}".format(app_id=FBAPPID, app_secret=FBSECRET, short_lived_token=token)
|
eurl = "https://graph.facebook.com/oauth/access_token?grant_type=fb_exchange_token&client_id={app_id}&client_secret={app_secret}&fb_exchange_token={short_lived_token}".format(
|
||||||
|
app_id=FBAPPID, app_secret=FBSECRET, short_lived_token=token)
|
||||||
values = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())
|
values = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())
|
||||||
|
|
||||||
ltoken = values['access_token'][0]
|
ltoken = values['access_token'][0]
|
||||||
|
@ -824,6 +849,7 @@ def doFacebook(url, environ, headers, options, cache):
|
||||||
log('fb done')
|
log('fb done')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
wsgiref.handlers.CGIHandler().run(cgi_wrapper)
|
wsgiref.handlers.CGIHandler().run(cgi_wrapper)
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -1,7 +1,8 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
package_name = 'morss'
|
package_name = 'morss'
|
||||||
setup( name=package_name,
|
setup(
|
||||||
|
name=package_name,
|
||||||
description='Get full-text RSS feeds',
|
description='Get full-text RSS feeds',
|
||||||
author='pictuga',
|
author='pictuga',
|
||||||
author_email='contact at author name dot com',
|
author_email='contact at author name dot com',
|
||||||
|
|
Loading…
Reference in New Issue