Make most of the code pep8-compliant
Thanks a lot to github.com/SamuelMarks for his nice workmaster
parent
da0a8feadd
commit
f01efb7334
|
@ -1,23 +1,25 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import json
|
||||
import urlparse
|
||||
import urllib2
|
||||
|
||||
from ConfigParser import ConfigParser
|
||||
from fnmatch import fnmatch
|
||||
import lxml.html
|
||||
|
||||
import feeds
|
||||
import morss
|
||||
import re
|
||||
|
||||
import urllib2
|
||||
import lxml.html
|
||||
import json
|
||||
import urlparse
|
||||
|
||||
|
||||
def toclass(query):
|
||||
def to_class(query):
|
||||
pattern = r'\[class=([^\]]+)\]'
|
||||
repl = r'[@class and contains(concat(" ", normalize-space(@class), " "), " \1 ")]'
|
||||
return re.sub(pattern, repl, query)
|
||||
|
||||
def getRule(link):
|
||||
|
||||
def get_rule(link):
|
||||
config = ConfigParser()
|
||||
config.read('feedify.ini')
|
||||
|
||||
|
@ -29,10 +31,12 @@ def getRule(link):
|
|||
return values
|
||||
return False
|
||||
|
||||
def supported(link):
|
||||
return getRule(link) is not False
|
||||
|
||||
def formatString(string, getter, error=False):
|
||||
def supported(link):
|
||||
return get_rule(link) is not False
|
||||
|
||||
|
||||
def format_string(string, getter, error=False):
|
||||
out = ""
|
||||
char = string[0]
|
||||
|
||||
|
@ -42,41 +46,42 @@ def formatString(string, getter, error=False):
|
|||
match = follow.partition('"')
|
||||
out = match[0]
|
||||
if len(match) >= 2:
|
||||
next = match[2]
|
||||
next_match = match[2]
|
||||
else:
|
||||
next = None
|
||||
next_match = None
|
||||
elif char == '{':
|
||||
match = follow.partition('}')
|
||||
try:
|
||||
test = formatString(match[0], getter, True)
|
||||
except ValueError, KeyError:
|
||||
test = format_string(match[0], getter, True)
|
||||
except (ValueError, KeyError):
|
||||
pass
|
||||
else:
|
||||
out = test
|
||||
|
||||
next = match[2]
|
||||
next_match = match[2]
|
||||
elif char == ' ':
|
||||
next = follow
|
||||
next_match = follow
|
||||
elif re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string):
|
||||
match = re.search(r'^([^{}<>" ]+)(?:<"([^>]+)">)?(.*)$', string).groups()
|
||||
rawValue = getter(match[0])
|
||||
if not isinstance(rawValue, basestring):
|
||||
raw_value = getter(match[0])
|
||||
if not isinstance(raw_value, basestring):
|
||||
if match[1] is not None:
|
||||
out = match[1].join(rawValue)
|
||||
out = match[1].join(raw_value)
|
||||
else:
|
||||
out = ''.join(rawValue)
|
||||
out = ''.join(raw_value)
|
||||
if not out and error:
|
||||
raise ValueError
|
||||
next = match[2]
|
||||
next_match = match[2]
|
||||
else:
|
||||
raise ValueError('bogus string')
|
||||
|
||||
if next is not None and len(next):
|
||||
return out + formatString(next, getter, error)
|
||||
if next_match is not None and len(next_match):
|
||||
return out + format_string(next_match, getter, error)
|
||||
else:
|
||||
return out
|
||||
|
||||
def PreWorker(url, cache):
|
||||
|
||||
def pre_worker(url, cache):
|
||||
if urlparse.urlparse(url).netloc == 'itunes.apple.com':
|
||||
match = re.search('/id([0-9]+)(\?.*)?$', url)
|
||||
if match:
|
||||
|
@ -84,6 +89,7 @@ def PreWorker(url, cache):
|
|||
redirect = 'https://itunes.apple.com/lookup?id={id}'.format(id=iid)
|
||||
cache.set('redirect', redirect)
|
||||
|
||||
|
||||
class Builder(object):
|
||||
def __init__(self, link, data=None, cache=False):
|
||||
self.link = link
|
||||
|
@ -93,11 +99,11 @@ class Builder(object):
|
|||
data = urllib2.urlopen(link).read()
|
||||
self.data = data
|
||||
|
||||
self.rule = getRule(link)
|
||||
self.rule = get_rule(link)
|
||||
|
||||
if self.rule['mode'] == 'xpath':
|
||||
if not isinstance(self.data, unicode):
|
||||
self.data = self.data.decode(morss.detEncoding(self.data), 'replace')
|
||||
self.data = self.data.decode(morss.detect_encoding(self.data), 'replace')
|
||||
self.doc = lxml.html.fromstring(self.data)
|
||||
elif self.rule['mode'] == 'json':
|
||||
self.doc = json.loads(data)
|
||||
|
@ -106,7 +112,7 @@ class Builder(object):
|
|||
|
||||
def raw(self, html, expr):
|
||||
if self.rule['mode'] == 'xpath':
|
||||
return html.xpath(toclass(expr))
|
||||
return html.xpath(to_class(expr))
|
||||
|
||||
elif self.rule['mode'] == 'json':
|
||||
a = [html]
|
||||
|
@ -119,7 +125,7 @@ class Builder(object):
|
|||
if kids is None:
|
||||
pass
|
||||
elif isinstance(kids, list):
|
||||
[b.append(i) for i in kids]
|
||||
b += kids
|
||||
elif isinstance(kids, basestring):
|
||||
b.append(kids.replace('\n', '<br/>'))
|
||||
else:
|
||||
|
@ -150,7 +156,7 @@ class Builder(object):
|
|||
|
||||
def string(self, html, expr):
|
||||
getter = lambda x: self.strings(html, x)
|
||||
return formatString(self.rule[expr], getter)
|
||||
return format_string(self.rule[expr], getter)
|
||||
|
||||
def build(self):
|
||||
if 'title' in self.rule:
|
||||
|
@ -160,23 +166,22 @@ class Builder(object):
|
|||
matches = self.raw(self.doc, self.rule['items'])
|
||||
if matches and len(matches):
|
||||
for item in matches:
|
||||
feedItem = {}
|
||||
feed_item = {}
|
||||
|
||||
if 'item_title' in self.rule:
|
||||
feedItem['title'] = self.string(item, 'item_title')
|
||||
feed_item['title'] = self.string(item, 'item_title')
|
||||
if 'item_link' in self.rule:
|
||||
url = self.string(item, 'item_link')
|
||||
url = urlparse.urljoin(self.link, url)
|
||||
feedItem['link'] = url
|
||||
feed_item['link'] = url
|
||||
if 'item_desc' in self.rule:
|
||||
feedItem['desc'] = self.string(item, 'item_desc')
|
||||
feed_item['desc'] = self.string(item, 'item_desc')
|
||||
if 'item_content' in self.rule:
|
||||
feedItem['content'] = self.string(item, 'item_content')
|
||||
feed_item['content'] = self.string(item, 'item_content')
|
||||
if 'item_time' in self.rule:
|
||||
feedItem['updated'] = self.string(item, 'item_time')
|
||||
feed_item['updated'] = self.string(item, 'item_time')
|
||||
if 'item_id' in self.rule:
|
||||
feedItem['id'] = self.string(item, 'item_id')
|
||||
feedItem['isPermaLink'] = False
|
||||
|
||||
self.feed.items.append(feedItem)
|
||||
feed_item['id'] = self.string(item, 'item_id')
|
||||
feed_item['isPermaLink'] = False
|
||||
|
||||
self.feed.items.append(feed_item)
|
||||
|
|
308
morss/feeds.py
308
morss/feeds.py
|
@ -1,14 +1,16 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from lxml import etree
|
||||
from datetime import datetime
|
||||
import dateutil.parser
|
||||
from dateutil import tz
|
||||
import re
|
||||
|
||||
from StringIO import StringIO
|
||||
|
||||
import re
|
||||
import json
|
||||
import csv
|
||||
import urllib2
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import tz
|
||||
import dateutil.parser
|
||||
|
||||
try:
|
||||
from wheezy.template.engine import Engine
|
||||
|
@ -35,12 +37,13 @@ NSMAP = {'atom': 'http://www.w3.org/2005/Atom',
|
|||
'content': 'http://purl.org/rss/1.0/modules/content/',
|
||||
'rssfake': 'http://purl.org/rss/1.0/'}
|
||||
|
||||
|
||||
def load(url):
|
||||
import urllib2
|
||||
d = urllib2.urlopen(url).read()
|
||||
return parse(d)
|
||||
|
||||
def tagNS(tag, nsmap=NSMAP):
|
||||
|
||||
def tag_NS(tag, nsmap=NSMAP):
|
||||
match = re.search(r'^\{([^\}]+)\}(.*)$', tag)
|
||||
if match:
|
||||
match = match.groups()
|
||||
|
@ -55,15 +58,19 @@ def tagNS(tag, nsmap=NSMAP):
|
|||
return "{%s}%s" % (nsmap[match[0]], match[1].lower())
|
||||
return tag
|
||||
|
||||
def innerHTML(xml):
|
||||
|
||||
def inner_html(xml):
|
||||
return (xml.text or '') + ''.join([etree.tostring(child) for child in xml.iterchildren()])
|
||||
|
||||
def cleanNode(xml):
|
||||
|
||||
def clean_node(xml):
|
||||
[xml.remove(child) for child in xml.iterchildren()]
|
||||
|
||||
|
||||
class FeedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def parse(data):
|
||||
# encoding
|
||||
match = re.search('encoding=["\']?([0-9a-zA-Z-]+)', data[:100])
|
||||
|
@ -80,15 +87,16 @@ def parse(data):
|
|||
# rss
|
||||
match = doc.xpath("//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF", namespaces=NSMAP)
|
||||
if len(match):
|
||||
mtable = { 'rdf:rdf': FeedParserRSS, 'channel': FeedParserRSS,
|
||||
m_table = {'rdf:rdf': FeedParserRSS, 'channel': FeedParserRSS,
|
||||
'atom03:feed': FeedParserAtom, 'atom:feed': FeedParserAtom}
|
||||
match = match[0]
|
||||
tag = tagNS(match.tag)
|
||||
if tag in mtable:
|
||||
return mtable[tag](doc, tag)
|
||||
tag = tag_NS(match.tag)
|
||||
if tag in m_table:
|
||||
return m_table[tag](doc, tag)
|
||||
|
||||
raise FeedException('unknown feed type')
|
||||
|
||||
|
||||
class FeedBase(object):
|
||||
"""
|
||||
Base for xml-related classes, which provides simple wrappers around xpath
|
||||
|
@ -135,7 +143,7 @@ class FeedBase(object):
|
|||
else:
|
||||
return ""
|
||||
|
||||
def xgetCreate(self, table):
|
||||
def xget_create(self, table):
|
||||
""" Returns an element, and creates it when not present """
|
||||
value = table[self.tag]
|
||||
if not isinstance(value, tuple):
|
||||
|
@ -145,7 +153,7 @@ class FeedBase(object):
|
|||
if match is not None:
|
||||
return match
|
||||
else:
|
||||
element = etree.Element(tagNS(new))
|
||||
element = etree.Element(tag_NS(new))
|
||||
self.root.append(element)
|
||||
return element
|
||||
|
||||
|
@ -158,58 +166,62 @@ class FeedBase(object):
|
|||
""" Returns string using lxml. Arguments passed to tostring """
|
||||
return etree.tostring(self.xml, pretty_print=True, **k)
|
||||
|
||||
|
||||
class FeedDescriptor(object):
|
||||
"""
|
||||
Descriptor which gives off elements based on "self.getName" and
|
||||
"self.setName" as getter/setters. Looks far better, and avoids duplicates
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.nname = name[0].upper() + name[1:]
|
||||
|
||||
def __get__(self, instance, owner):
|
||||
getter = getattr(instance, 'get%s' % self.nname)
|
||||
getter = getattr(instance, 'get_%s' % self.name)
|
||||
return getter()
|
||||
|
||||
def __set__(self, instance, value):
|
||||
setter = getattr(instance, 'set%s' % self.nname)
|
||||
setter = getattr(instance, 'set_%s' % self.name)
|
||||
return setter(value)
|
||||
|
||||
def __delete__(self, instance):
|
||||
deleter = getattr(instance, 'del%s' % self.nname)
|
||||
deleter = getattr(instance, 'del_%s' % self.name)
|
||||
return deleter()
|
||||
|
||||
|
||||
class FeedTime(FeedDescriptor):
|
||||
def __get__(self, instance, owner):
|
||||
getter = getattr(instance, 'get%s' % self.nname)
|
||||
getter = getattr(instance, 'get_%s' % self.name)
|
||||
raw = getter()
|
||||
try:
|
||||
time = parseTime(raw)
|
||||
time = parse_time(raw)
|
||||
return time
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def __set__(self, instance, value):
|
||||
try:
|
||||
time = parseTime(value)
|
||||
time = parse_time(value)
|
||||
raw = time.strftime(instance.timeFormat)
|
||||
setter = getattr(instance, 'set%s' % self.nname)
|
||||
setter = getattr(instance, 'set_%s' % self.name)
|
||||
return setter(raw)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
class FeedBool(FeedDescriptor):
|
||||
def __get__(self, instance, owner):
|
||||
getter = getattr(instance, 'get%s' % self.nname)
|
||||
getter = getattr(instance, 'get_%s' % self.name)
|
||||
raw = getter()
|
||||
return (raw or '').lower() != 'false'
|
||||
|
||||
def __set__(self, instance, value):
|
||||
raw = 'true' if value else 'false'
|
||||
setter = getattr(instance, 'set%s' % self.nname)
|
||||
setter = getattr(instance, 'set_%s' % self.name)
|
||||
return setter(raw)
|
||||
|
||||
def parseTime(value):
|
||||
|
||||
def parse_time(value):
|
||||
if isinstance(value, basestring):
|
||||
if re.match(r'^[0-9]+$', value):
|
||||
return datetime.fromtimestamp(int(value), tz.tzutc())
|
||||
|
@ -222,6 +234,7 @@ def parseTime(value):
|
|||
else:
|
||||
return False
|
||||
|
||||
|
||||
class FeedList(object):
|
||||
"""
|
||||
Class to map a list of xml elements against a list of matching objects,
|
||||
|
@ -231,14 +244,15 @@ class FeedList(object):
|
|||
|
||||
Comes with its very own descriptor.
|
||||
"""
|
||||
def __init__(self, parent, getter, tag, childClass):
|
||||
|
||||
def __init__(self, parent, getter, tag, child_class):
|
||||
self.parent = parent
|
||||
self.getter = getter
|
||||
self.childClass = childClass
|
||||
self.childClass = child_class
|
||||
self.tag = tag
|
||||
self._children = {} # id(xml) => FeedItem
|
||||
|
||||
def getChildren(self):
|
||||
def get_children(self):
|
||||
children = self.getter()
|
||||
out = []
|
||||
for child in children:
|
||||
|
@ -269,7 +283,7 @@ class FeedList(object):
|
|||
return new
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.getChildren()[key]
|
||||
return self.get_children()[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
child = self.getter()[key]
|
||||
|
@ -282,10 +296,12 @@ class FeedList(object):
|
|||
def __len__(self):
|
||||
return len(self.getter())
|
||||
|
||||
|
||||
class FeedListDescriptor(object):
|
||||
"""
|
||||
Descriptor for FeedList
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.items = {} # id(instance) => FeedList
|
||||
|
@ -295,9 +311,9 @@ class FeedListDescriptor(object):
|
|||
if key in self.items:
|
||||
return self.items[key]
|
||||
else:
|
||||
getter = getattr(instance, 'get%s' % self.name.title())
|
||||
className = globals()[getattr(instance, '%sClass' % self.name)]
|
||||
self.items[key] = FeedList(instance, getter, instance.tag, className)
|
||||
getter = getattr(instance, 'get_%s' % self.name)
|
||||
class_name = globals()[getattr(instance, '%sClass' % self.name)]
|
||||
self.items[key] = FeedList(instance, getter, instance.tag, class_name)
|
||||
return self.items[key]
|
||||
|
||||
def __set__(self, instance, value):
|
||||
|
@ -305,6 +321,7 @@ class FeedListDescriptor(object):
|
|||
[x.remove() for x in [x for x in f.items]]
|
||||
[feedlist.append(x) for x in value]
|
||||
|
||||
|
||||
class FeedParser(FeedBase):
|
||||
itemsClass = 'FeedItem'
|
||||
mimetype = 'application/xml'
|
||||
|
@ -318,27 +335,25 @@ class FeedParser(FeedBase):
|
|||
self.root = self.xml.xpath("//atom03:feed|//atom:feed|//channel|//rssfake:channel", namespaces=NSMAP)[0]
|
||||
self.tag = tag
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return ""
|
||||
|
||||
def setTitle(self, value):
|
||||
def set_title(self, value):
|
||||
pass
|
||||
|
||||
def delTitle(self):
|
||||
def del_title(self):
|
||||
self.title = ""
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
pass
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
pass
|
||||
|
||||
def delDesc(self):
|
||||
def del_desc(self):
|
||||
self.desc = ""
|
||||
|
||||
|
||||
def getItems(self):
|
||||
def get_items(self):
|
||||
return []
|
||||
|
||||
title = FeedDescriptor('title')
|
||||
|
@ -355,7 +370,8 @@ class FeedParser(FeedBase):
|
|||
out = StringIO()
|
||||
c = csv.writer(out, dialect=csv.excel)
|
||||
for item in self.items:
|
||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if isinstance(x[1], basestring)]
|
||||
row = [x[1].encode('utf-8') if isinstance(x[1], unicode) else x[1] for x in item if
|
||||
isinstance(x[1], basestring)]
|
||||
c.writerow(row)
|
||||
out.seek(0)
|
||||
return out.read()
|
||||
|
@ -369,44 +385,45 @@ class FeedParser(FeedBase):
|
|||
template = engine.get_template('reader')
|
||||
return template.render({'feed': self}).encode('utf-8')
|
||||
|
||||
|
||||
class FeedParserRSS(FeedParser):
|
||||
"""
|
||||
RSS Parser
|
||||
"""
|
||||
itemsClass = 'FeedItemRSS'
|
||||
mimetype = 'application/rss+xml'
|
||||
base = { 'rdf:rdf': '<?xml version="1.0" encoding="utf-8"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"><channel rdf:about="http://example.org/rss.rdf"></channel></rdf:RDF>',
|
||||
base = {
|
||||
'rdf:rdf': '<?xml version="1.0" encoding="utf-8"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/"><channel rdf:about="http://example.org/rss.rdf"></channel></rdf:RDF>',
|
||||
'channel': '<?xml version="1.0" encoding="utf-8"?><rss version="2.0"><channel></channel></rss>'}
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return self.xval('rssfake:title|title')
|
||||
|
||||
def setTitle(self, value):
|
||||
def set_title(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:title|title')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:title',
|
||||
'channel': 'title'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
return self.xval('rssfake:description|description')
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:description|description')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:description',
|
||||
'channel': 'description'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getItems(self):
|
||||
def get_items(self):
|
||||
return self.xpath('rssfake:item|item')
|
||||
|
||||
|
||||
class FeedParserAtom(FeedParser):
|
||||
"""
|
||||
Atom Parser
|
||||
|
@ -416,120 +433,112 @@ class FeedParserAtom(FeedParser):
|
|||
base = {'atom:feed': '<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>',
|
||||
'atom03:feed': '<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#"></feed>'}
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return self.xval('atom:title|atom03:title')
|
||||
|
||||
def setTitle(self, value):
|
||||
def set_title(self, value):
|
||||
if not value:
|
||||
return self.xval('atom:title|atom03:title')
|
||||
|
||||
table = {'atom:feed': 'atom:title',
|
||||
'atom03:feed': 'atom03:title'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
return self.xval('atom:subtitle|atom03:subtitle')
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:subtitle|atom03:subtitle')
|
||||
|
||||
table = {'atom:feed': 'atom:subtitle',
|
||||
'atom03:feed': 'atom03:subtitle'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getItems(self):
|
||||
def get_items(self):
|
||||
return self.xpath('atom:entry|atom03:entry')
|
||||
|
||||
|
||||
class FeedItem(FeedBase):
|
||||
timeFormat = ''
|
||||
dic = ('title', 'link', 'desc', 'content', 'id', 'isPermaLink', 'time', 'updated')
|
||||
dic = ('title', 'link', 'desc', 'content', 'id', 'is_permalink', 'time', 'updated')
|
||||
|
||||
def __init__(self, xml=None, tag='atom:feed'):
|
||||
if xml is None:
|
||||
xml = Element(tagNS(self.base[tag]))
|
||||
xml = Element(tag_NS(self.base[tag]))
|
||||
|
||||
self.root = self.xml = xml
|
||||
self.tag = tag
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return ""
|
||||
|
||||
def setTitle(self):
|
||||
def set_title(self, value):
|
||||
pass
|
||||
|
||||
def delTitle(self):
|
||||
def del_title(self):
|
||||
self.title = ""
|
||||
|
||||
|
||||
def getLink(self):
|
||||
def get_link(self):
|
||||
return ""
|
||||
|
||||
def setLink(self, value):
|
||||
def set_link(self, value):
|
||||
pass
|
||||
|
||||
def delLink(self):
|
||||
def del_link(self):
|
||||
self.link = ""
|
||||
|
||||
|
||||
def getIsPermaLink(self):
|
||||
def get_is_permalink(self):
|
||||
return ""
|
||||
|
||||
def setIsPermaLink(self, value):
|
||||
def set_is_permalink(self, value):
|
||||
pass
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
return ""
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
pass
|
||||
|
||||
def delDesc(self):
|
||||
def del_desc(self):
|
||||
self.desc = ""
|
||||
|
||||
|
||||
def getContent(self):
|
||||
def get_content(self):
|
||||
return ""
|
||||
|
||||
def setContent(self, value):
|
||||
def set_content(self, value):
|
||||
pass
|
||||
|
||||
def delContent(self):
|
||||
def del_content(self):
|
||||
self.content = ""
|
||||
|
||||
|
||||
def getId(self):
|
||||
def get_id(self):
|
||||
return ""
|
||||
|
||||
def setId(self, value):
|
||||
def set_id(self, value):
|
||||
pass
|
||||
|
||||
def delId(self):
|
||||
def del_id(self):
|
||||
self.id = ""
|
||||
|
||||
|
||||
def getTime(self):
|
||||
def get_time(self):
|
||||
return None
|
||||
|
||||
def setTime(self, value):
|
||||
def set_time(self, value):
|
||||
pass
|
||||
|
||||
def delTime(self):
|
||||
self.time = None
|
||||
|
||||
|
||||
def getUpdated(self):
|
||||
def get_updated(self):
|
||||
return None
|
||||
|
||||
def setUpdated(self, value):
|
||||
def set_updated(self, value):
|
||||
pass
|
||||
|
||||
def delUpdated(self):
|
||||
def del_updated(self):
|
||||
self.updated = None
|
||||
|
||||
title = FeedDescriptor('title')
|
||||
|
@ -537,11 +546,11 @@ class FeedItem(FeedBase):
|
|||
description = desc = FeedDescriptor('desc')
|
||||
content = FeedDescriptor('content')
|
||||
id = FeedDescriptor('id')
|
||||
isPermaLink = FeedBool('isPermaLink')
|
||||
is_permalink = FeedBool('is_permalink')
|
||||
time = FeedTime('time')
|
||||
updated = FeedTime('updated')
|
||||
|
||||
def pushContent(self, value):
|
||||
def push_content(self, value):
|
||||
if not self.desc and self.content:
|
||||
self.desc = self.content
|
||||
|
||||
|
@ -550,201 +559,192 @@ class FeedItem(FeedBase):
|
|||
def remove(self):
|
||||
self.xml.getparent().remove(self.xml)
|
||||
|
||||
|
||||
class FeedItemRSS(FeedItem):
|
||||
timeFormat = '%a, %d %b %Y %H:%M:%S %Z'
|
||||
base = {'rdf:rdf': 'rssfake:item',
|
||||
'channel': 'item'}
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return self.xval('rssfake:title|title')
|
||||
|
||||
def setTitle(self, value):
|
||||
def set_title(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:title|title')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:title',
|
||||
'channel': 'title'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getLink(self):
|
||||
def get_link(self):
|
||||
return self.xval('rssfake:link|link')
|
||||
|
||||
def setLink(self, value):
|
||||
if self.isPermaLink and self.id == self.link != value:
|
||||
self.isPermaLink = False
|
||||
def set_link(self, value):
|
||||
if self.is_permalink and self.id == self.link != value:
|
||||
self.is_permalink = False
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:link',
|
||||
'channel': 'link'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
return self.xval('rssfake:description|description')
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:description|description')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:description',
|
||||
'channel': 'description'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getContent(self):
|
||||
def get_content(self):
|
||||
return self.xval('content:encoded')
|
||||
|
||||
def setContent(self, value):
|
||||
def set_content(self, value):
|
||||
if not value:
|
||||
return self.xdel('content:encoded')
|
||||
|
||||
table = {'rdf:rdf': 'content:encoded',
|
||||
'channel': 'content:encoded'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getId(self):
|
||||
def get_id(self):
|
||||
return self.xval('rssfake:guid|guid')
|
||||
|
||||
def setId(self, value):
|
||||
def set_id(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:guid|guid')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:guid',
|
||||
'channel': 'guid'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getIsPermaLink(self):
|
||||
def get_is_permalink(self):
|
||||
return self.xget('rssfake:guid/@isPermaLink|guid/@isPermaLink')
|
||||
|
||||
def setIsPermaLink(self, value):
|
||||
def set_is_permalink(self, value):
|
||||
table = {'rdf:rdf': 'rssfake:guid',
|
||||
'channel': 'guid'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.attrib['isPermaLink'] = value
|
||||
|
||||
|
||||
def getTime(self):
|
||||
def get_time(self):
|
||||
return self.xval('rssfake:pubDate|pubDate')
|
||||
|
||||
def setTime(self, value):
|
||||
def set_time(self, value):
|
||||
if not value:
|
||||
return self.xdel('rssfake:pubDate|pubDate')
|
||||
|
||||
table = {'rdf:rdf': 'rssfake:pubDate',
|
||||
'channel': 'pubDate'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
class FeedItemAtom(FeedItem):
|
||||
timeFormat = '%Y-%m-%dT%H:%M:%SZ'
|
||||
base = {'atom:feed': 'atom:entry',
|
||||
'atom03:feed': 'atom03:entry'}
|
||||
|
||||
def getTitle(self):
|
||||
def get_title(self):
|
||||
return self.xval('atom:title|atom03:title')
|
||||
|
||||
def setTitle(self, value):
|
||||
def set_title(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:title|atom03:title')
|
||||
|
||||
table = {'atom:feed': 'atom:title',
|
||||
'atom03:feed': 'atom03:title'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getLink(self):
|
||||
def get_link(self):
|
||||
return self.xget('(atom:link|atom03:link)[@rel="alternate" or not(@rel)]/@href')
|
||||
|
||||
def setLink(self, value):
|
||||
def set_link(self, value):
|
||||
table = {'atom:feed': ('atom:link', 'atom:link[@rel="alternate" or not(@rel)]'),
|
||||
'atom03:feed': ('atom03:link', 'atom03:link[@rel="alternate" or not(@rel)]')}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.attrib['href'] = value
|
||||
|
||||
|
||||
def getDesc(self):
|
||||
def get_desc(self):
|
||||
# default "type" is "text"
|
||||
element = self.xget('atom:summary|atom03:summary')
|
||||
if element is not None:
|
||||
return innerHTML(element)
|
||||
return inner_html(element)
|
||||
else:
|
||||
return ""
|
||||
|
||||
def setDesc(self, value):
|
||||
def set_desc(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:summary|atom03:summary')
|
||||
|
||||
table = {'atom:feed': 'atom:summary',
|
||||
'atom03:feed': 'atom03:summary'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
if element.attrib.get('type', '') == 'xhtml':
|
||||
cleanNode(element)
|
||||
clean_node(element)
|
||||
element.attrib['type'] = 'html'
|
||||
element.text = value
|
||||
|
||||
|
||||
def getContent(self):
|
||||
def get_content(self):
|
||||
element = self.xget('atom:content|atom03:content')
|
||||
if element is not None:
|
||||
return innerHTML(element)
|
||||
return inner_html(element)
|
||||
else:
|
||||
return ""
|
||||
|
||||
def setContent(self, value):
|
||||
def set_content(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:content|atom03:content')
|
||||
|
||||
table = {'atom:feed': 'atom:content',
|
||||
'atom03:feed': 'atom03:content'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
if element.attrib.get('type', '') == 'xhtml':
|
||||
cleanNode(element)
|
||||
clean_node(element)
|
||||
element.attrib['type'] = 'html'
|
||||
element.text = value
|
||||
|
||||
|
||||
def getId(self):
|
||||
def get_id(self):
|
||||
return self.xval('atom:id|atom03:id')
|
||||
|
||||
def setId(self, value):
|
||||
def set_id(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:id|atom03:id')
|
||||
|
||||
table = {'atom:feed': 'atom:id',
|
||||
'atom03:feed': 'atom03:id'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getTime(self):
|
||||
def get_time(self):
|
||||
return self.xval('atom:published|atom03:published')
|
||||
|
||||
def setTime(self, value):
|
||||
def set_time(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:published|atom03:published')
|
||||
|
||||
table = {'atom:feed': 'atom:published',
|
||||
'atom03:feed': 'atom03:published'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
||||
|
||||
def getUpdated(self):
|
||||
def get_updated(self):
|
||||
return self.xval('atom:updated|atom03:updated')
|
||||
|
||||
def setUpdated(self, value):
|
||||
def set_updated(self, value):
|
||||
if not value:
|
||||
return self.xdel('atom:updated|atom03:updated')
|
||||
|
||||
table = {'atom:feed': 'atom:updated',
|
||||
'atom03:feed': 'atom03:updated'}
|
||||
element = self.xgetCreate(table)
|
||||
element = self.xget_create(table)
|
||||
element.text = value
|
||||
|
|
116
morss/morss.py
116
morss/morss.py
|
@ -44,7 +44,8 @@ DEBUG = False
|
|||
UA_RSS = 'Liferea/1.8.12 (Linux; fr_FR.utf8; http://liferea.sf.net/)'
|
||||
UA_HTML = 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0'
|
||||
|
||||
MIMETYPE = { 'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
||||
MIMETYPE = {
|
||||
'xml': ['text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml'],
|
||||
'html': ['text/html', 'application/xhtml+xml', 'application/xml']}
|
||||
|
||||
FBAPPID = "<insert yours>"
|
||||
|
@ -57,11 +58,14 @@ if 'SCRIPT_NAME' in os.environ:
|
|||
httplib.HTTPConnection.debuglevel = 1
|
||||
|
||||
import cgitb
|
||||
|
||||
cgitb.enable()
|
||||
|
||||
|
||||
class MorssException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def log(txt, force=False):
|
||||
if DEBUG or force:
|
||||
if 'REQUEST_URI' in os.environ:
|
||||
|
@ -70,18 +74,19 @@ def log(txt, force=False):
|
|||
print repr(txt)
|
||||
|
||||
|
||||
def lenHTML(txt):
|
||||
def len_html(txt):
|
||||
if len(txt):
|
||||
return len(lxml.html.fromstring(txt).text_content())
|
||||
else:
|
||||
return 0
|
||||
|
||||
def countWord(txt):
|
||||
|
||||
def count_words(txt):
|
||||
if len(txt):
|
||||
return len(lxml.html.fromstring(txt).text_content().split())
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
class Options:
|
||||
def __init__(self, options=None):
|
||||
self.options = options or []
|
||||
|
@ -95,8 +100,10 @@ class Options:
|
|||
def __contains__(self, key):
|
||||
return key in self.options
|
||||
|
||||
|
||||
class Cache:
|
||||
""" Light, error-prone caching system. """
|
||||
|
||||
def __init__(self, folder=None, key='cache', lifespan=10 * 24 * 3600):
|
||||
self._key = key
|
||||
self._dir = folder
|
||||
|
@ -178,13 +185,16 @@ class Cache:
|
|||
else:
|
||||
return self
|
||||
|
||||
|
||||
class SimpleDownload(urllib2.HTTPCookieProcessor):
|
||||
"""
|
||||
Custom urllib2 handler to download a page, using etag/last-modified headers,
|
||||
to save bandwidth. The given headers are added back into the header on error
|
||||
304 for easier use.
|
||||
"""
|
||||
def __init__(self, cache="", etag=None, lastmodified=None, useragent=UA_HTML, decode=True, cookiejar=None, accept=None, strict=False):
|
||||
|
||||
def __init__(self, cache="", etag=None, lastmodified=None, useragent=UA_HTML, decode=True, cookiejar=None,
|
||||
accept=None, strict=False):
|
||||
urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||
self.cache = cache
|
||||
self.etag = etag
|
||||
|
@ -214,7 +224,7 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
|||
out = {}
|
||||
rank = 1.1
|
||||
for group in self.accept:
|
||||
rank = rank - 0.1
|
||||
rank -= 0.1
|
||||
|
||||
if isinstance(group, basestring):
|
||||
if group in MIMETYPE:
|
||||
|
@ -259,20 +269,20 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
|||
if resp.info().type in MIMETYPE['html']:
|
||||
match = re.search(r'(?i)<meta http-equiv=.refresh[^>]*?url=(http.*?)["\']', data)
|
||||
if match:
|
||||
newurl = match.groups()[0]
|
||||
log('redirect: %s' % newurl)
|
||||
new_url = match.groups()[0]
|
||||
log('redirect: %s' % new_url)
|
||||
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
new_headers = dict((k, v) for k, v in req.headers.items()
|
||||
if k.lower() not in ('content-length', 'content-type'))
|
||||
new = urllib2.Request(newurl,
|
||||
headers=newheaders,
|
||||
new = urllib2.Request(new_url,
|
||||
headers=new_headers,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True)
|
||||
|
||||
return self.parent.open(new, timeout=req.timeout)
|
||||
|
||||
# encoding
|
||||
enc = detEncoding(data, resp)
|
||||
enc = detect_encoding(data, resp)
|
||||
|
||||
if enc:
|
||||
data = data.decode(enc, 'replace')
|
||||
|
@ -290,7 +300,8 @@ class SimpleDownload(urllib2.HTTPCookieProcessor):
|
|||
https_response = http_response
|
||||
https_request = http_request
|
||||
|
||||
def detEncoding(data, con=None):
|
||||
|
||||
def detect_encoding(data, con=None):
|
||||
if con is not None and con.headers.getparam('charset'):
|
||||
log('header')
|
||||
return con.headers.getparam('charset')
|
||||
|
@ -306,6 +317,7 @@ def detEncoding(data, con=None):
|
|||
|
||||
return None
|
||||
|
||||
|
||||
def Fix(item, feedurl='/'):
|
||||
""" Improves feed items (absolute links, resolve feedburner links, etc) """
|
||||
|
||||
|
@ -358,7 +370,8 @@ def Fix(item, feedurl='/'):
|
|||
match = re.search('/([0-9a-zA-Z]{20,})/story01.htm$', item.link)
|
||||
if match:
|
||||
url = match.groups()[0].split('0')
|
||||
t = {'A':'0', 'B':'.', 'C':'/', 'D':'?', 'E':'-', 'H':',', 'I':'_', 'L':'http://', 'S':'www.', 'N':'.com', 'O':'.co.uk'}
|
||||
t = {'A': '0', 'B': '.', 'C': '/', 'D': '?', 'E': '-', 'H': ',', 'I': '_', 'L': 'http://', 'S': 'www.',
|
||||
'N': '.com', 'O': '.co.uk'}
|
||||
item.link = ''.join([(t[s[0]] if s[0] in t else '=') + s[1:] for s in url[1:]])
|
||||
log(item.link)
|
||||
|
||||
|
@ -371,6 +384,7 @@ def Fix(item, feedurl='/'):
|
|||
|
||||
return item
|
||||
|
||||
|
||||
def Fill(item, cache, feedurl='/', fast=False):
|
||||
""" Returns True when it has done its best """
|
||||
|
||||
|
@ -381,8 +395,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
|||
log(item.link)
|
||||
|
||||
# content already provided?
|
||||
count_content = countWord(item.content)
|
||||
count_desc = countWord(item.desc)
|
||||
count_content = count_words(item.content)
|
||||
count_desc = count_words(item.desc)
|
||||
|
||||
if max(count_content, count_desc) > 500:
|
||||
if count_desc > count_content:
|
||||
|
@ -432,7 +446,7 @@ def Fill(item, cache, feedurl='/', fast=False):
|
|||
log('old error')
|
||||
else:
|
||||
log('cached')
|
||||
item.pushContent(cache.get(link))
|
||||
item.push_content(cache.get(link))
|
||||
return True
|
||||
|
||||
# super-fast mode
|
||||
|
@ -457,8 +471,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
|||
|
||||
out = readability.Document(data, url=con.url).summary(True)
|
||||
|
||||
if countWord(out) > max(count_content, count_desc) > 0:
|
||||
item.pushContent(out)
|
||||
if count_words(out) > max(count_content, count_desc) > 0:
|
||||
item.push_content(out)
|
||||
cache.set(link, out)
|
||||
else:
|
||||
log('not bigger enough')
|
||||
|
@ -467,7 +481,8 @@ def Fill(item, cache, feedurl='/', fast=False):
|
|||
|
||||
return True
|
||||
|
||||
def Init(url, cachePath, options):
|
||||
|
||||
def Init(url, cache_path, options):
|
||||
# url clean up
|
||||
log(url)
|
||||
|
||||
|
@ -481,14 +496,15 @@ def Init(url, cachePath, options):
|
|||
url = url.replace(' ', '%20')
|
||||
|
||||
# cache
|
||||
cache = Cache(cachePath, url)
|
||||
cache = Cache(cache_path, url)
|
||||
log(cache._hash)
|
||||
|
||||
return (url, cache)
|
||||
|
||||
|
||||
def Fetch(url, cache, options):
|
||||
# do some useful facebook work
|
||||
feedify.PreWorker(url, cache)
|
||||
feedify.pre_worker(url, cache)
|
||||
|
||||
if 'redirect' in cache:
|
||||
url = cache.get('redirect')
|
||||
|
@ -502,7 +518,8 @@ def Fetch(url, cache, options):
|
|||
style = cache.get('style')
|
||||
else:
|
||||
try:
|
||||
opener = SimpleDownload(cache.get(url), cache.get('etag'), cache.get('lastmodified'), accept=('xml','html'))
|
||||
opener = SimpleDownload(cache.get(url), cache.get('etag'), cache.get('lastmodified'),
|
||||
accept=('xml', 'html'))
|
||||
con = urllib2.build_opener(opener).open(url, timeout=TIMEOUT * 2)
|
||||
xml = con.read()
|
||||
except (IOError, httplib.HTTPException):
|
||||
|
@ -540,7 +557,8 @@ def Fetch(url, cache, options):
|
|||
feed.build()
|
||||
rss = feed.feed
|
||||
elif style == 'html':
|
||||
match = lxml.html.fromstring(xml).xpath("//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
|
||||
match = lxml.html.fromstring(xml).xpath(
|
||||
"//link[@rel='alternate'][@type='application/rss+xml' or @type='application/atom+xml']/@href")
|
||||
if len(match):
|
||||
link = urlparse.urljoin(url, match[0])
|
||||
log('rss redirect: %s' % link)
|
||||
|
@ -552,13 +570,13 @@ def Fetch(url, cache, options):
|
|||
log('random page')
|
||||
raise MorssException('Link provided is not a valid feed')
|
||||
|
||||
|
||||
cache.save()
|
||||
return rss
|
||||
|
||||
|
||||
def Gather(rss, url, cache, options):
|
||||
size = len(rss.items)
|
||||
startTime = time.time()
|
||||
start_time = time.time()
|
||||
|
||||
# custom settings
|
||||
lim_item = LIM_ITEM
|
||||
|
@ -580,14 +598,14 @@ def Gather(rss, url, cache, options):
|
|||
queue.task_done()
|
||||
|
||||
def worker(i, item):
|
||||
if time.time() - startTime > lim_time >= 0 or i+1 > lim_item >= 0:
|
||||
if time.time() - start_time > lim_time >= 0 or i + 1 > lim_item >= 0:
|
||||
log('dropped')
|
||||
item.remove()
|
||||
return
|
||||
|
||||
item = Fix(item, url)
|
||||
|
||||
if time.time() - startTime > max_time >= 0 or i+1 > max_item >= 0:
|
||||
if time.time() - start_time > max_time >= 0 or i + 1 > max_item >= 0:
|
||||
if not options.proxy:
|
||||
if Fill(item, cache, url, True) is False:
|
||||
item.remove()
|
||||
|
@ -617,10 +635,11 @@ def Gather(rss, url, cache, options):
|
|||
new.time = "5 Oct 2013 22:42"
|
||||
|
||||
log(len(rss.items))
|
||||
log(time.time() - startTime)
|
||||
log(time.time() - start_time)
|
||||
|
||||
return rss
|
||||
|
||||
|
||||
def After(rss, options):
|
||||
for i, item in enumerate(rss.items):
|
||||
|
||||
|
@ -662,8 +681,9 @@ def After(rss, options):
|
|||
else:
|
||||
return rss.tostring(xml_declaration=True, encoding='UTF-8')
|
||||
|
||||
|
||||
def process(url, cache=None, options=None):
|
||||
if options == None:
|
||||
if not options:
|
||||
options = []
|
||||
|
||||
options = Options(options)
|
||||
|
@ -673,6 +693,7 @@ def process(url, cache=None, options=None):
|
|||
|
||||
return After(rss, options)
|
||||
|
||||
|
||||
def cgi_app(environ, start_response):
|
||||
# get options
|
||||
if 'REQUEST_URI' in environ:
|
||||
|
@ -696,7 +717,8 @@ def cgi_app(environ, start_response):
|
|||
DEBUG = options.debug
|
||||
|
||||
if 'HTTP_IF_NONE_MATCH' in environ:
|
||||
if not options.force and not options.facebook and time.time() - int(environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
||||
if not options.force and not options.facebook and time.time() - int(
|
||||
environ['HTTP_IF_NONE_MATCH'][1:-1]) < DELAY:
|
||||
headers['status'] = '304 Not Modified'
|
||||
start_response(headers['status'], headers.items())
|
||||
log(url)
|
||||
|
@ -722,25 +744,26 @@ def cgi_app(environ, start_response):
|
|||
url, cache = Init(url, os.getcwd() + '/cache', options)
|
||||
|
||||
if options.facebook:
|
||||
doFacebook(url, environ, headers, options, cache)
|
||||
do_facebook(url, environ, headers, options, cache)
|
||||
start_response(headers['status'], headers.items())
|
||||
return
|
||||
|
||||
# get the work done
|
||||
RSS = Fetch(url, cache, options)
|
||||
rss = Fetch(url, cache, options)
|
||||
|
||||
if headers['content-type'] == 'text/xml':
|
||||
headers['content-type'] = RSS.mimetype
|
||||
headers['content-type'] = rss.mimetype
|
||||
|
||||
start_response(headers['status'], headers.items())
|
||||
|
||||
RSS = Gather(RSS, url, cache, options)
|
||||
rss = Gather(rss, url, cache, options)
|
||||
|
||||
if not DEBUG and not options.silent:
|
||||
return After(RSS, options)
|
||||
return After(rss, options)
|
||||
|
||||
log('done')
|
||||
|
||||
|
||||
def cgi_wrapper(environ, start_response):
|
||||
# simple http server for html and css
|
||||
files = {
|
||||
|
@ -774,13 +797,12 @@ def cgi_wrapper(environ, start_response):
|
|||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except Exception as e:
|
||||
headers = {}
|
||||
headers['status'] = '500 Oops'
|
||||
headers['content-type'] = 'text/plain'
|
||||
headers = {'status': '500 Oops', 'content-type': 'text/plain'}
|
||||
start_response(headers['status'], headers.items(), sys.exc_info())
|
||||
log('ERROR: %s' % e.message, force=True)
|
||||
return 'An error happened'
|
||||
|
||||
|
||||
def cli_app():
|
||||
options = Options(sys.argv[1:-1])
|
||||
url = sys.argv[-1]
|
||||
|
@ -789,15 +811,16 @@ def cli_app():
|
|||
DEBUG = options.debug
|
||||
|
||||
url, cache = Init(url, os.path.expanduser('~/.cache/morss'), options)
|
||||
RSS = Fetch(url, cache, options)
|
||||
RSS = Gather(RSS, url, cache, options)
|
||||
rss = Fetch(url, cache, options)
|
||||
rss = Gather(rss, url, cache, options)
|
||||
|
||||
if not DEBUG and not options.silent:
|
||||
print After(RSS, options)
|
||||
print After(rss, options)
|
||||
|
||||
log('done')
|
||||
|
||||
def doFacebook(url, environ, headers, options, cache):
|
||||
|
||||
def do_facebook(url, environ, headers, options, cache):
|
||||
log('fb stuff')
|
||||
|
||||
query = urlparse.urlparse(url).query
|
||||
|
@ -805,11 +828,13 @@ def doFacebook(url, environ, headers, options, cache):
|
|||
if 'code' in query:
|
||||
# get real token from code
|
||||
code = urlparse.parse_qs(query)['code'][0]
|
||||
eurl = "https://graph.facebook.com/oauth/access_token?client_id={app_id}&redirect_uri={redirect_uri}&client_secret={app_secret}&code={code_parameter}".format(app_id=FBAPPID, app_secret=FBSECRET, code_parameter=code, redirect_uri=environ['SCRIPT_URI'])
|
||||
eurl = "https://graph.facebook.com/oauth/access_token?client_id={app_id}&redirect_uri={redirect_uri}&client_secret={app_secret}&code={code_parameter}".format(
|
||||
app_id=FBAPPID, app_secret=FBSECRET, code_parameter=code, redirect_uri=environ['SCRIPT_URI'])
|
||||
token = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())['access_token'][0]
|
||||
|
||||
# get long-lived access token
|
||||
eurl = "https://graph.facebook.com/oauth/access_token?grant_type=fb_exchange_token&client_id={app_id}&client_secret={app_secret}&fb_exchange_token={short_lived_token}".format(app_id=FBAPPID, app_secret=FBSECRET, short_lived_token=token)
|
||||
eurl = "https://graph.facebook.com/oauth/access_token?grant_type=fb_exchange_token&client_id={app_id}&client_secret={app_secret}&fb_exchange_token={short_lived_token}".format(
|
||||
app_id=FBAPPID, app_secret=FBSECRET, short_lived_token=token)
|
||||
values = urlparse.parse_qs(urllib2.urlopen(eurl).read().strip())
|
||||
|
||||
ltoken = values['access_token'][0]
|
||||
|
@ -824,6 +849,7 @@ def doFacebook(url, environ, headers, options, cache):
|
|||
log('fb done')
|
||||
return
|
||||
|
||||
|
||||
def main():
|
||||
if 'REQUEST_URI' in os.environ:
|
||||
wsgiref.handlers.CGIHandler().run(cgi_wrapper)
|
||||
|
|
Loading…
Reference in New Issue