Default to "//h1/.." since most website use it
because it is said to be good for SEO. Debug now requires env variable "DEBUG" to be set to something else than "".master
parent
253bc27f17
commit
ed8a45875c
14
morss
14
morss
|
@ -7,15 +7,11 @@ import urllib2
|
|||
import urllib
|
||||
from cookielib import CookieJar
|
||||
|
||||
def log(str):
|
||||
if (len(sys.argv) == 3):
|
||||
print str
|
||||
def log(txt):
|
||||
if os.getenv('DEBUG', False):
|
||||
print txt
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print "argument please"
|
||||
sys.exit(1)
|
||||
|
||||
node = sys.argv[1]
|
||||
node = sys.argv[1] if len(sys.argv) > 1 else "//h1/.."
|
||||
|
||||
xml = sys.stdin.read()
|
||||
rss = etree.fromstring(xml)
|
||||
|
@ -66,5 +62,5 @@ for item in items:
|
|||
log(error)
|
||||
log("http error")
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
if not os.getenv('DEBUG', False):
|
||||
print etree.tostring(rss)
|
||||
|
|
12
rules
12
rules
|
@ -1,23 +1,11 @@
|
|||
LeMonde
|
||||
http://www.lemonde.fr/rss/une.xml
|
||||
//*[contains(@class,'article') or contains(@id,'content')]
|
||||
|
||||
TehranTimes
|
||||
http://www.tehrantimes.com/component/ninjarsssyndicator/?feed_id=1&format=raw
|
||||
//div[@class='article-indent']
|
||||
|
||||
BBC
|
||||
http://feeds.bbci.co.uk/news/rss.xml
|
||||
//h1/..
|
||||
|
||||
FranceInfo
|
||||
http://www.franceinfo.fr/rss.xml
|
||||
//h2[@class='chapo']/..
|
||||
|
||||
Courrier International
|
||||
http://www.courrierinternational.com/rss/all/rss.xml
|
||||
//div[@class='story-content']
|
||||
|
||||
Spiegel
|
||||
http://www.spiegel.de/schlagzeilen/tops/index.rss
|
||||
//div[@id='spArticleSection']
|
||||
|
|
Loading…
Reference in New Issue