morss/morss

70 lines
1.4 KiB
Python

#! /usr/bin/env python2.7
import sys
import os
from os.path import expanduser
from lxml import etree
import string
import urllib2
import urllib
from cookielib import CookieJar
def log(txt):
if os.getenv('DEBUG', False):
print txt
def xmlclean(xml):
table = string.maketrans('', '')
return xml.translate(table, table[:32])
node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
xml = xmlclean(sys.stdin.read())
rss = etree.XML(xml)
items = rss.xpath('//item')
cache = expanduser("~") + "/.cache/morss"
if not os.path.exists(cache):
os.makedirs(cache)
for item in items:
link = item.findtext('link').encode('utf-8')
desc = item.xpath('description')[0]
log(link)
cached = cache + "/" + str(hash(link))
log(cached)
if os.path.exists(cached):
log("cached")
desc.text = open(cached, 'r').read()
else:
try:
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
data = opener.open(link).read()
html = etree.HTML(data)
match = html.xpath(node)
if len(match):
try:
text = etree.tostring(match[0])
log("ok txt")
except etree.SerialisationError:
log('serialisation')
continue
try:
desc.text = text
open(cached, 'w').write(text)
except ValueError:
log('xml error')
else:
log("no match")
except (urllib2.HTTPError, urllib2.URLError) as error:
log(error)
log("http error")
if not os.getenv('DEBUG', False):
print etree.tostring(rss)