From 9dbe061fd6e3b555c2c7a4e13a4f7d21920b413b Mon Sep 17 00:00:00 2001 From: pictuga Date: Wed, 18 Mar 2020 16:47:00 +0100 Subject: [PATCH] Remove markdown-related code Time to clean up the code and stop with those non-core features They just make the code harder to maintain --- README.md | 6 ++---- morss/morss.py | 10 ---------- requirements.txt | 1 - 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 6a8972a..0850b2f 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,6 @@ You do need: - [python](http://www.python.org/) >= 2.6 (python 3 is supported) - [lxml](http://lxml.de/) for xml parsing - [dateutil](http://labix.org/python-dateutil) to parse feed dates -- [html2text](http://www.aaronsw.com/2002/html2text/) - [OrderedDict](https://pypi.python.org/pypi/ordereddict) if using python < 2.7 - [wheezy.template](https://pypi.python.org/pypi/wheezy.template) to generate HTML pages - [chardet](https://pypi.python.org/pypi/chardet) @@ -77,7 +76,6 @@ The arguments are: - `search=STRING`: does a basic case-sensitive search in the feed - Advanced - `csv`: export to csv - - `md`: convert articles to Markdown - `indent`: returns indented XML or JSON, takes more place, but human-readable - `nolink`: drop links, but keeps links' inner text - `noref`: drop items' link @@ -199,7 +197,7 @@ Using cache and passing arguments: >>> import morss >>> url = 'http://feeds.bbci.co.uk/news/rss.xml' >>> cache = '/tmp/morss-cache.db' # sqlite cache location ->>> options = {'csv':True, 'md':True} +>>> options = {'csv':True} >>> xml_string = morss.process(url, cache, options) >>> xml_string[:50] '{"title": "BBC News - Home", "desc": "The latest s' @@ -214,7 +212,7 @@ Doing it step-by-step: import morss, morss.crawler url = 'http://newspaper.example/feed.xml' -options = morss.Options(csv=True, md=True) # arguments +options = morss.Options(csv=True) # arguments morss.crawler.sqlite_default = '/tmp/morss-cache.db' # sqlite cache location rss = morss.FeedFetch(url, options) # this only grabs the RSS feed diff --git a/morss/morss.py b/morss/morss.py index 4719f50..02389e0 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -19,7 +19,6 @@ from . import readabilite import wsgiref.simple_server import wsgiref.handlers -from html2text import HTML2Text try: # python 2 @@ -290,15 +289,6 @@ def ItemAfter(item, options): if options.noref: item.link = '' - if options.md: - conv = HTML2Text(baseurl=item.link) - conv.unicode_snob = True - - if item.desc: - item.desc = conv.handle(item.desc) - if item.content: - item.content = conv.handle(item.content) - return item diff --git a/requirements.txt b/requirements.txt index 220bed6..f569843 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ lxml python-dateutil <= 1.5 -html2text chardet pymysql