Make it possible to directly run sub-libs (feeds, crawler, readabilite)
Run `python -im morss.feeds http://website.sample/rss.xml` and so onmaster
parent
d39d7bb19d
commit
2806c64326
|
@ -573,3 +573,7 @@ class MySQLCacheHandler(BaseCache):
|
||||||
else:
|
else:
|
||||||
with self.cursor() as cursor:
|
with self.cursor() as cursor:
|
||||||
cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
|
cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
data = get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||||
|
|
|
@ -754,3 +754,10 @@ class ItemJSON(Item, ParserJSON):
|
||||||
return
|
return
|
||||||
|
|
||||||
cur = cur[node]
|
cur = cur[node]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from . import crawler
|
||||||
|
|
||||||
|
data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
|
||||||
|
feed = parse(data, url=con.geturl(), mimetype=contenttype, encoding=encoding)
|
||||||
|
|
|
@ -342,3 +342,11 @@ def get_article(data, url=None, encoding=None, debug=False, threshold=5):
|
||||||
best.make_links_absolute(url)
|
best.make_links_absolute(url)
|
||||||
|
|
||||||
return lxml.etree.tostring(best if not debug else html, pretty_print=True)
|
return lxml.etree.tostring(best if not debug else html, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
from . import crawler
|
||||||
|
|
||||||
|
data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it')
|
||||||
|
article = get_article(data, url=con.geturl(), encoding=encoding)
|
||||||
|
|
Loading…
Reference in New Issue