Default to "//h1/.." since most website use it

because it is said to be good for SEO. Debug now requires env variable "DEBUG" to be set to something else than "".
master
pictuga 2013-02-25 21:36:02 +01:00
parent 253bc27f17
commit ed8a45875c
2 changed files with 5 additions and 21 deletions

14
morss
View File

@ -7,15 +7,11 @@ import urllib2
import urllib import urllib
from cookielib import CookieJar from cookielib import CookieJar
def log(str): def log(txt):
if (len(sys.argv) == 3): if os.getenv('DEBUG', False):
print str print txt
if len(sys.argv) < 2: node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
print "argument please"
sys.exit(1)
node = sys.argv[1]
xml = sys.stdin.read() xml = sys.stdin.read()
rss = etree.fromstring(xml) rss = etree.fromstring(xml)
@ -66,5 +62,5 @@ for item in items:
log(error) log(error)
log("http error") log("http error")
if len(sys.argv) == 2: if not os.getenv('DEBUG', False):
print etree.tostring(rss) print etree.tostring(rss)

12
rules
View File

@ -1,23 +1,11 @@
LeMonde
http://www.lemonde.fr/rss/une.xml
//*[contains(@class,'article') or contains(@id,'content')]
TehranTimes TehranTimes
http://www.tehrantimes.com/component/ninjarsssyndicator/?feed_id=1&format=raw http://www.tehrantimes.com/component/ninjarsssyndicator/?feed_id=1&format=raw
//div[@class='article-indent'] //div[@class='article-indent']
BBC
http://feeds.bbci.co.uk/news/rss.xml
//h1/..
FranceInfo FranceInfo
http://www.franceinfo.fr/rss.xml http://www.franceinfo.fr/rss.xml
//h2[@class='chapo']/.. //h2[@class='chapo']/..
Courrier International
http://www.courrierinternational.com/rss/all/rss.xml
//div[@class='story-content']
Spiegel Spiegel
http://www.spiegel.de/schlagzeilen/tops/index.rss http://www.spiegel.de/schlagzeilen/tops/index.rss
//div[@id='spArticleSection'] //div[@id='spArticleSection']