Make it possible to directly run sub-libs (feeds, crawler, readabilite)
Run `python -im morss.feeds http://website.sample/rss.xml` and so onmaster
parent
d39d7bb19d
commit
2806c64326
|
@ -573,3 +573,7 @@ class MySQLCacheHandler(BaseCache):
|
|||
else:
|
||||
with self.cursor() as cursor:
|
||||
cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||
|
|
|
@ -754,3 +754,10 @@ class ItemJSON(Item, ParserJSON):
|
|||
return
|
||||
|
||||
cur = cur[node]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from . import crawler
|
||||
|
||||
data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
|
||||
feed = parse(data, url=con.geturl(), mimetype=contenttype, encoding=encoding)
|
||||
|
|
|
@ -342,3 +342,11 @@ def get_article(data, url=None, encoding=None, debug=False, threshold=5):
|
|||
best.make_links_absolute(url)
|
||||
|
||||
return lxml.etree.tostring(best if not debug else html, pretty_print=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
from . import crawler
|
||||
|
||||
data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||
article = get_article(data, url=con.geturl(), encoding=encoding)
|
||||
|
|
Loading…
Reference in New Issue