From 2806c64326e3c9b3c9b853c539f75067eaebd3f3 Mon Sep 17 00:00:00 2001 From: pictuga Date: Mon, 27 Apr 2020 17:19:31 +0200 Subject: [PATCH] Make it possible to directly run sub-libs (feeds, crawler, readabilite) Run `python -im morss.feeds http://website.sample/rss.xml` and so on --- morss/crawler.py | 4 ++++ morss/feeds.py | 7 +++++++ morss/readabilite.py | 8 ++++++++ 3 files changed, 19 insertions(+) diff --git a/morss/crawler.py b/morss/crawler.py index 208a417..cb081b1 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -573,3 +573,7 @@ class MySQLCacheHandler(BaseCache): else: with self.cursor() as cursor: cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value) + + +if __name__ == '__main__': + data = get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') diff --git a/morss/feeds.py b/morss/feeds.py index 7bdd3d7..6ec72d5 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -754,3 +754,10 @@ class ItemJSON(Item, ParserJSON): return cur = cur[node] + + +if __name__ == '__main__': + from . import crawler + + data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss') + feed = parse(data, url=con.geturl(), mimetype=contenttype, encoding=encoding) diff --git a/morss/readabilite.py b/morss/readabilite.py index 1cef84f..9c15c1a 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -342,3 +342,11 @@ def get_article(data, url=None, encoding=None, debug=False, threshold=5): best.make_links_absolute(url) return lxml.etree.tostring(best if not debug else html, pretty_print=True) + + +if __name__ == '__main__': + import sys + from . import crawler + + data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') + article = get_article(data, url=con.geturl(), encoding=encoding)