67 lines
1.3 KiB
Python
67 lines
1.3 KiB
Python
#! /usr/bin/env python2.7
|
|
import sys
|
|
import os
|
|
from os.path import expanduser
|
|
from lxml import etree
|
|
import urllib2
|
|
import urllib
|
|
from cookielib import CookieJar
|
|
|
|
def log(txt):
|
|
if os.getenv('DEBUG', False):
|
|
print txt
|
|
|
|
node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
|
|
|
|
xml = sys.stdin.read()
|
|
rss = etree.fromstring(xml)
|
|
items = rss.xpath('//item')
|
|
|
|
cache = expanduser("~") + "/.cache/morss"
|
|
if not os.path.exists(cache):
|
|
os.makedirs(cache)
|
|
|
|
for item in items:
|
|
title = item.findtext('title')
|
|
link = item.findtext('link')
|
|
desc = item.xpath('description')[0]
|
|
|
|
log(title)
|
|
log(link)
|
|
|
|
cached = cache + "/" + str(hash(link))
|
|
log(cached)
|
|
|
|
if os.path.exists(cached):
|
|
log("cached")
|
|
desc.text = open(cached, 'r').read()
|
|
else:
|
|
try:
|
|
cj = CookieJar()
|
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
|
|
data = opener.open(link).read()
|
|
html = etree.HTML(data)
|
|
match = html.xpath(node)
|
|
|
|
if len(match):
|
|
try:
|
|
text = etree.tostring(match[0])
|
|
log("ok txt")
|
|
except etree.SerialisationError:
|
|
log('serialisation')
|
|
continue
|
|
try:
|
|
desc.text = text
|
|
open(cached, 'w').write(text)
|
|
except ValueError:
|
|
log('xml error')
|
|
else:
|
|
log("no match")
|
|
|
|
except urllib2.HTTPError, error:
|
|
log(error)
|
|
log("http error")
|
|
|
|
if not os.getenv('DEBUG', False):
|
|
print etree.tostring(rss)
|