diff --git a/.drone.yml b/.drone.yml index 2e36dd2..77eee5d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -10,6 +10,7 @@ steps: - pip3 install --no-cache-dir .[full] .[dev] - isort --check-only --diff . - pylint morss --rcfile=.pylintrc --disable=C,R,W --fail-under=8 + - pytest --cov=morss tests --- kind: pipeline diff --git a/morss/feedify.ini b/morss/feedify.ini index aaab600..151974b 100644 --- a/morss/feedify.ini +++ b/morss/feedify.ini @@ -90,9 +90,6 @@ item_updated = updated [html] mode = html -path = - http://localhost/ - title = //div[@id='header']/h1 desc = //div[@id='header']/p items = //div[@id='content']/div diff --git a/morss/feeds.py b/morss/feeds.py index a229521..fd5d749 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -699,7 +699,7 @@ class Feed(object): try: setattr(item, attr, new[attr]) - except (IndexError, TypeError): + except (KeyError, IndexError, TypeError): pass return item diff --git a/setup.py b/setup.py index c153033..1c5370e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( install_requires = ['lxml', 'bs4', 'python-dateutil', 'chardet'], extras_require = { 'full': ['pymysql', 'redis', 'diskcache', 'gunicorn', 'setproctitle'], - 'dev': ['pylint'] + 'dev': ['pylint', 'pytest', 'pytest-cov'], }, python_requires = '>=2.7', package_data = {package_name: ['feedify.ini']}, diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f7cb869 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,60 @@ +import os +import os.path +import threading + +import pytest + +try: + # python2 + from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer + from SimpleHTTPServer import SimpleHTTPRequestHandler +except: + # python3 + from http.server import (BaseHTTPRequestHandler, HTTPServer, + SimpleHTTPRequestHandler) + +class HTTPReplayHandler(SimpleHTTPRequestHandler): + " Serves pages saved alongside with headers. See `curl --http1.1 -is http://...` " + + directory = os.path.join(os.path.dirname(__file__), './samples/') + + __init__ = BaseHTTPRequestHandler.__init__ + + def do_GET(self): + path = self.translate_path(self.path) + + if os.path.isdir(path): + f = self.list_directory(path) + + else: + f = open(path, 'rb') + + try: + self.copyfile(f, self.wfile) + + finally: + f.close() + +class MuteHTTPServer(HTTPServer): + def handle_error(self, request, client_address): + # mute errors + pass + +def make_server(port=8888): + print('Serving http://localhost:%s/' % port) + return MuteHTTPServer(('', port), RequestHandlerClass=HTTPReplayHandler) + +@pytest.fixture +def replay_server(): + httpd = make_server() + thread = threading.Thread(target=httpd.serve_forever) + thread.start() + + yield + + httpd.shutdown() + thread.join() + +if __name__ == '__main__': + httpd = make_server() + httpd.serve_forever() diff --git a/tests/samples/feed-atom-utf-8.txt b/tests/samples/feed-atom-utf-8.txt new file mode 100644 index 0000000..e02c0a9 --- /dev/null +++ b/tests/samples/feed-atom-utf-8.txt @@ -0,0 +1,16 @@ +HTTP/1.1 200 OK +Content-Type: text/xml; charset=utf-8 + + + + !TITLE! + !DESC! + + !ITEM_TITLE! + !ITEM_DESC! + !ITEM_CONTENT! + + 2022-01-01T00:00:01+01:00 + 2022-01-01T00:00:02+01:00 + + diff --git a/tests/samples/feed-atom03-utf-8.txt b/tests/samples/feed-atom03-utf-8.txt new file mode 100644 index 0000000..2697126 --- /dev/null +++ b/tests/samples/feed-atom03-utf-8.txt @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +content-type: application/xml + + + + !TITLE! + !DESC! + + !ITEM_TITLE! + + !ITEM_DESC! + !ITEM_CONTENT! + 2022-01-01T00:00:01+01:00 + + diff --git a/tests/samples/feed-html-utf-8.txt b/tests/samples/feed-html-utf-8.txt new file mode 100644 index 0000000..7f307de --- /dev/null +++ b/tests/samples/feed-html-utf-8.txt @@ -0,0 +1,22 @@ +HTTP/1.1 200 OK +Content-Type: text/html; charset=utf-8 + + + + + + + +
+
+ !ITEM_TITLE! +
!ITEM_DESC!
+
!ITEM_CONTENT!
+
+
+ + + diff --git a/tests/samples/feed-json-utf-8.txt b/tests/samples/feed-json-utf-8.txt new file mode 100644 index 0000000..8df8669 --- /dev/null +++ b/tests/samples/feed-json-utf-8.txt @@ -0,0 +1,16 @@ +HTTP/1.1 200 OK +Content-Type: application/json; charset=utf-8 + +{ + "title": "!TITLE!", + "desc": "!DESC!", + "items": [ + { + "title": "!ITEM_TITLE!", + "time": "2022-01-01T00:00:01+0100", + "url": "!ITEM_LINK!", + "desc": "!ITEM_DESC!", + "content": "!ITEM_CONTENT!" + } + ] +} diff --git a/tests/samples/feed-rss-channel-utf-8.txt b/tests/samples/feed-rss-channel-utf-8.txt new file mode 100644 index 0000000..ca9e5b5 --- /dev/null +++ b/tests/samples/feed-rss-channel-utf-8.txt @@ -0,0 +1,17 @@ +HTTP/1.1 200 OK +Content-Type: text/xml; charset=utf-8 + + + + + !TITLE! + !DESC! + + !ITEM_TITLE! + Mon, 01 Jan 2022 00:00:01 +0100 + !ITEM_LINK! + !ITEM_DESC! + !ITEM_CONTENT! + + + diff --git a/tests/test_feeds.py b/tests/test_feeds.py new file mode 100644 index 0000000..3e6d668 --- /dev/null +++ b/tests/test_feeds.py @@ -0,0 +1,108 @@ +import pytest + +from morss.crawler import adv_get +from morss.feeds import * + + +def get_feed(url): + url = 'http://localhost:8888/%s' % url + out = adv_get(url) + feed = parse(out['data'], url=url, encoding=out['encoding']) + return feed + +def check_feed(feed): + # NB. time and updated not covered + assert feed.title == '!TITLE!' + assert feed.desc == '!DESC!' + assert feed[0] == feed.items[0] + assert feed[0].title == '!ITEM_TITLE!' + assert feed[0].link == '!ITEM_LINK!' + assert '!ITEM_DESC!' in feed[0].desc # broader test due to possible inclusion of surrounding
in xml + assert '!ITEM_CONTENT!' in feed[0].content + +def check_output(feed): + output = feed.tostring() + assert '!TITLE!' in output + assert '!DESC!' in output + assert '!ITEM_TITLE!' in output + assert '!ITEM_LINK!' in output + assert '!ITEM_DESC!' in output + assert '!ITEM_CONTENT!' in output + +def check_change(feed): + feed.title = '!TITLE2!' + feed.desc = '!DESC2!' + feed[0].title = '!ITEM_TITLE2!' + feed[0].link = '!ITEM_LINK2!' + feed[0].desc = '!ITEM_DESC2!' + feed[0].content = '!ITEM_CONTENT2!' + + assert feed.title == '!TITLE2!' + assert feed.desc == '!DESC2!' + assert feed[0].title == '!ITEM_TITLE2!' + assert feed[0].link == '!ITEM_LINK2!' + assert '!ITEM_DESC2!' in feed[0].desc + assert '!ITEM_CONTENT2!' in feed[0].content + +def check_add(feed): + feed.append({ + 'title': '!ITEM_TITLE3!', + 'link': '!ITEM_LINK3!', + 'desc': '!ITEM_DESC3!', + 'content': '!ITEM_CONTENT3!', + }) + + assert feed[1].title == '!ITEM_TITLE3!' + assert feed[1].link == '!ITEM_LINK3!' + assert '!ITEM_DESC3!' in feed[1].desc + assert '!ITEM_CONTENT3!' in feed[1].content + +each_format = pytest.mark.parametrize('url', [ + 'feed-rss-channel-utf-8.txt', 'feed-atom-utf-8.txt', + 'feed-atom03-utf-8.txt', 'feed-json-utf-8.txt', 'feed-html-utf-8.txt', + ]) + +each_check = pytest.mark.parametrize('check', [ + check_feed, check_output, check_change, check_add, + ]) + +@each_format +@each_check +def test_parse(replay_server, url, check): + feed = get_feed(url) + check(feed) + +@each_format +@each_check +def test_convert_rss(replay_server, url, check): + feed = get_feed(url) + feed = feed.convert(FeedXML) + check(feed) + +@each_format +@each_check +def test_convert_json(replay_server, url, check): + feed = get_feed(url) + feed = feed.convert(FeedJSON) + check(feed) + +@each_format +@each_check +def test_convert_html(replay_server, url, check): + feed = get_feed(url) + feed = feed.convert(FeedHTML) + if len(feed) > 1: + # remove the 'blank' default html item + del feed[0] + check(feed) + +@each_format +def test_convert_csv(replay_server, url): + # only csv output, not csv feed, check therefore differnet + feed = get_feed(url) + output = feed.tocsv() + + assert '!ITEM_TITLE!' in output + assert '!ITEM_LINK!' in output + assert '!ITEM_DESC!' in output + assert '!ITEM_CONTENT!' in output