Move cli code to argParse

Related code changes (incl. :format=xyz)
master
pictuga 2020-08-21 23:52:56 +02:00
parent c7c2c5d749
commit bd182bcb85
4 changed files with 98 additions and 39 deletions

View File

@ -73,35 +73,56 @@ morss accepts some arguments, to lightly alter the output of morss. Arguments
may need to have a value (usually a string or a number). In the different "Use may need to have a value (usually a string or a number). In the different "Use
cases" below is detailed how to pass those arguments to morss. cases" below is detailed how to pass those arguments to morss.
The arguments are: The list of arguments can be obtained by running `morss --help`
```
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip] [--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink] [--items XPATH] [--item_link XPATH]
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH] [--nolink] [--noref] [--debug]
url
Get full-text RSS feeds
positional arguments:
url feed url
optional arguments:
-h, --help show this help message and exit
output:
--format {rss,json,html,csv}
output format
--search STRING does a basic case-sensitive search in the feed
--clip stick the full article content under the original feed content (useful for twitter)
--indent returns indented XML or JSON, takes more place, but human-readable
action:
--cache only take articles from the cache (ie. don't grab new articles' content), so as to save time
--force force refetch the rss feed and articles
--proxy doesn't fill the articles
--newest return the feed items in chronological order (morss ohterwise shows the items by appearing order)
--firstlink pull the first article mentioned in the description instead of the default link
custom feeds:
--items XPATH (mandatory to activate the custom feeds function) xpath rule to match all the RSS entries
--item_link XPATH xpath rule relative to items to point to the entry's link
--item_title XPATH entry's title
--item_content XPATH entry's content
--item_time XPATH entry's date & time (accepts a wide range of time formats)
misc:
--nolink drop links, but keeps links' inner text
--noref drop items' link
--debug to have some feedback from the script execution. Useful for debugging
GNU AGPLv3 code
```
Further options:
- Change what morss does - Change what morss does
- `json`: output as JSON - `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
- `html`: outpout as HTML - `callback=NAME`: for JSONP calls
- `csv`: outpout as CSV - `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
- `proxy`: doesn't fill the articles - `txt`: changes the http content-type to txt (for faster "`view-source:`")
- `clip`: stick the full article content under the original feed content (useful for twitter)
- `search=STRING`: does a basic case-sensitive search in the feed
- Advanced
- `csv`: export to csv
- `indent`: returns indented XML or JSON, takes more place, but human-readable
- `nolink`: drop links, but keeps links' inner text
- `noref`: drop items' link
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
- `debug`: to have some feedback from the script execution. Useful for debugging
- `force`: force refetch the rss feed and articles
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
- `newest`: return the feed items in chronological order (morss ohterwise shows the items by appearing order)
- http server only
- `callback=NAME`: for JSONP calls
- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
- `txt`: changes the http content-type to txt (for faster "`view-source:`")
- Custom feeds: you can turn any HTML page into a RSS feed using morss, using xpath rules. The article content will be fetched as usual (with readabilite). Please note that you will have to **replace** any `/` in your rule with a `|` when using morss as a webserver
- `items`: (**mandatory** to activate the custom feeds function) xpath rule to match all the RSS entries
- `item_link`: xpath rule relative to `items` to point to the entry's link
- `item_title`: entry's title
- `item_content`: entry's description
- `item_time`: entry's date & time (accepts a wide range of time formats)
## Use cases ## Use cases
@ -213,9 +234,9 @@ Works like a charm with [Tiny Tiny RSS](http://tt-rss.org/redmine/projects/tt-rs
Run: Run:
``` ```
morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL morss [--argwithoutvalue] [--argwithvalue=value] [...] FEEDURL
``` ```
For example: `morss debug http://feeds.bbci.co.uk/news/rss.xml` For example: `morss --debug http://feeds.bbci.co.uk/news/rss.xml`
*(Brackets indicate optional text)* *(Brackets indicate optional text)*

View File

@ -68,15 +68,15 @@ def cgi_app(environ, start_response):
if options.cors: if options.cors:
headers['access-control-allow-origin'] = '*' headers['access-control-allow-origin'] = '*'
if options.html: if options.format == 'html':
headers['content-type'] = 'text/html' headers['content-type'] = 'text/html'
elif options.txt or options.silent: elif options.txt or options.silent:
headers['content-type'] = 'text/plain' headers['content-type'] = 'text/plain'
elif options.json: elif options.format == 'json':
headers['content-type'] = 'application/json' headers['content-type'] = 'application/json'
elif options.callback: elif options.callback:
headers['content-type'] = 'application/javascript' headers['content-type'] = 'application/javascript'
elif options.csv: elif options.format == 'csv':
headers['content-type'] = 'text/csv' headers['content-type'] = 'text/csv'
headers['content-disposition'] = 'attachment; filename="feed.csv"' headers['content-disposition'] = 'attachment; filename="feed.csv"'
else: else:

View File

@ -1,15 +1,53 @@
import sys import sys
import os.path import os.path
import argparse
from . import crawler from . import crawler
from .morss import FeedFetch, FeedGather, FeedFormat from .morss import FeedFetch, FeedGather, FeedFormat
from .morss import Options, parseOptions from .morss import Options
from .morss import log, DEBUG from .morss import log, DEBUG
#args = parser.parse_args()
def cli_app(): def cli_app():
options = Options(parseOptions(sys.argv[1:-1])) parser = argparse.ArgumentParser(
url = sys.argv[-1] prog='morss',
description='Get full-text RSS feeds',
epilog='GNU AGPLv3 code'
)
parser.add_argument('url', help='feed url')
group = parser.add_argument_group('output')
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
group.add_argument('--clip', action='store_true', help='stick the full article content under the original feed content (useful for twitter)')
group.add_argument('--indent', action='store_true', help='returns indented XML or JSON, takes more place, but human-readable')
group = parser.add_argument_group('action')
group.add_argument('--cache', action='store_true', help='only take articles from the cache (ie. don\'t grab new articles\' content), so as to save time')
group.add_argument('--force', action='store_true', help='force refetch the rss feed and articles')
group.add_argument('--proxy', action='store_true', help='doesn\'t fill the articles')
group.add_argument('--newest', action='store_true', help='return the feed items in chronological order (morss ohterwise shows the items by appearing order)')
group.add_argument('--firstlink', action='store_true', help='pull the first article mentioned in the description instead of the default link')
group = parser.add_argument_group('custom feeds')
group.add_argument('--items', action='store', type=str, metavar='XPATH', help='(mandatory to activate the custom feeds function) xpath rule to match all the RSS entries')
group.add_argument('--item_link', action='store', type=str, metavar='XPATH', help='xpath rule relative to items to point to the entry\'s link')
group.add_argument('--item_title', action='store', type=str, metavar='XPATH', help='entry\'s title')
group.add_argument('--item_content', action='store', type=str, metavar='XPATH', help='entry\'s content')
group.add_argument('--item_time', action='store', type=str, metavar='XPATH', help='entry\'s date & time (accepts a wide range of time formats)')
group = parser.add_argument_group('misc')
group.add_argument('--nolink', action='store_true', help='drop links, but keeps links\' inner text')
group.add_argument('--noref', action='store_true', help='drop items\' link')
group.add_argument('--debug', action='store_true', help='to have some feedback from the script execution. Useful for debugging')
options = Options(parser.parse_args())
url = options.url
global DEBUG global DEBUG
DEBUG = options.debug DEBUG = options.debug

View File

@ -379,24 +379,24 @@ def FeedFormat(rss, options, encoding='utf-8'):
else: else:
raise MorssException('Invalid callback var name') raise MorssException('Invalid callback var name')
elif options.json: elif options.format == 'json':
if options.indent: if options.indent:
return rss.tojson(encoding=encoding, indent=4) return rss.tojson(encoding=encoding, indent=4)
else: else:
return rss.tojson(encoding=encoding) return rss.tojson(encoding=encoding)
elif options.csv: elif options.format == 'csv':
return rss.tocsv(encoding=encoding) return rss.tocsv(encoding=encoding)
elif options.html: elif options.format == 'html':
if options.indent: if options.indent:
return rss.tohtml(encoding=encoding, pretty_print=True) return rss.tohtml(encoding=encoding, pretty_print=True)
else: else:
return rss.tohtml(encoding=encoding) return rss.tohtml(encoding=encoding)
else: else: # i.e. format == 'rss'
if options.indent: if options.indent:
return rss.torss(xml_declaration=(not encoding == 'unicode'), encoding=encoding, pretty_print=True) return rss.torss(xml_declaration=(not encoding == 'unicode'), encoding=encoding, pretty_print=True)