Added cache, easier debug

master
pictuga 2013-02-25 18:01:59 +01:00
parent 7dfe92de63
commit b63f91a151
1 changed files with 30 additions and 24 deletions

54
morss
View File

@ -1,51 +1,57 @@
#! /usr/bin/env python2.7 #! /usr/bin/env python2.7
import sys import sys
import os
from lxml import etree from lxml import etree
import urllib2 import urllib2
from os.path import expanduser
def log(str):
if (len(sys.argv) == 3):
print str
if len(sys.argv) < 2: if len(sys.argv) < 2:
print "argument please" print "argument please"
sys.exit(1) sys.exit(1)
debug = (len(sys.argv) == 3)
node = sys.argv[1] node = sys.argv[1]
xml = sys.stdin.read() xml = sys.stdin.read()
rss = etree.fromstring(xml) rss = etree.fromstring(xml)
items = rss.xpath('//item') items = rss.xpath('//item')
#comment below to keep too-long feeds (like BBC) cache = expanduser("~") + "/.cache/morss"
del items[30:] if not os.path.exists(cache):
os.makedirs(cache)
for item in items: for item in items:
title = item.findtext('title') title = item.findtext('title')
link = item.findtext('link') link = item.findtext('link')
desc = item.xpath('description')[0] desc = item.xpath('description')[0]
if debug: log(title)
print title log(link)
print link
try: cached = cache + "/" + str(hash(link))
data = urllib2.urlopen(link).read()
html = etree.HTML(data) if os.path.exists(cached):
match = html.xpath(node) log("cached")
desc.text = open(cached, 'r').read()
else:
try:
data = urllib2.urlopen(link).read()
html = etree.HTML(data)
match = html.xpath(node)
if len(match): if len(match):
text = etree.tostring(match[0]) text = etree.tostring(match[0])
log("ok txt")
desc.text = text
open(cached, 'w').write(text)
else:
log("no match")
if debug: except urllib2.HTTPError, error:
print text log("error")
desc.text = text
else:
if debug:
print "no match"
except urllib2.HTTPError, error:
if debug:
print "error"
if len(sys.argv) == 2: if len(sys.argv) == 2:
print etree.tostring(rss) print etree.tostring(rss)