diff --git a/morss/crawler.py b/morss/crawler.py index 208a417..cb081b1 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -573,3 +573,7 @@ class MySQLCacheHandler(BaseCache): else: with self.cursor() as cursor: cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value) + + +if __name__ == '__main__': + data = get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') diff --git a/morss/feeds.py b/morss/feeds.py index 7bdd3d7..6ec72d5 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -754,3 +754,10 @@ class ItemJSON(Item, ParserJSON): return cur = cur[node] + + +if __name__ == '__main__': + from . import crawler + + data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss') + feed = parse(data, url=con.geturl(), mimetype=contenttype, encoding=encoding) diff --git a/morss/readabilite.py b/morss/readabilite.py index 1cef84f..9c15c1a 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -342,3 +342,11 @@ def get_article(data, url=None, encoding=None, debug=False, threshold=5): best.make_links_absolute(url) return lxml.etree.tostring(best if not debug else html, pretty_print=True) + + +if __name__ == '__main__': + import sys + from . import crawler + + data, con, contenttype, encoding = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://morss.it') + article = get_article(data, url=con.geturl(), encoding=encoding)