Move cli code to argParse

Related code changes (incl. :format=xyz)
master
pictuga 2020-08-21 23:52:56 +02:00
parent c7c2c5d749
commit bd182bcb85
4 changed files with 98 additions and 39 deletions

View File

@ -73,35 +73,56 @@ morss accepts some arguments, to lightly alter the output of morss. Arguments
may need to have a value (usually a string or a number). In the different "Use
cases" below is detailed how to pass those arguments to morss.
The arguments are:
The list of arguments can be obtained by running `morss --help`
```
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip] [--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink] [--items XPATH] [--item_link XPATH]
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH] [--nolink] [--noref] [--debug]
url
Get full-text RSS feeds
positional arguments:
url feed url
optional arguments:
-h, --help show this help message and exit
output:
--format {rss,json,html,csv}
output format
--search STRING does a basic case-sensitive search in the feed
--clip stick the full article content under the original feed content (useful for twitter)
--indent returns indented XML or JSON, takes more place, but human-readable
action:
--cache only take articles from the cache (ie. don't grab new articles' content), so as to save time
--force force refetch the rss feed and articles
--proxy doesn't fill the articles
--newest return the feed items in chronological order (morss ohterwise shows the items by appearing order)
--firstlink pull the first article mentioned in the description instead of the default link
custom feeds:
--items XPATH (mandatory to activate the custom feeds function) xpath rule to match all the RSS entries
--item_link XPATH xpath rule relative to items to point to the entry's link
--item_title XPATH entry's title
--item_content XPATH entry's content
--item_time XPATH entry's date & time (accepts a wide range of time formats)
misc:
--nolink drop links, but keeps links' inner text
--noref drop items' link
--debug to have some feedback from the script execution. Useful for debugging
GNU AGPLv3 code
```
Further options:
- Change what morss does
- `json`: output as JSON
- `html`: outpout as HTML
- `csv`: outpout as CSV
- `proxy`: doesn't fill the articles
- `clip`: stick the full article content under the original feed content (useful for twitter)
- `search=STRING`: does a basic case-sensitive search in the feed
- Advanced
- `csv`: export to csv
- `indent`: returns indented XML or JSON, takes more place, but human-readable
- `nolink`: drop links, but keeps links' inner text
- `noref`: drop items' link
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
- `debug`: to have some feedback from the script execution. Useful for debugging
- `force`: force refetch the rss feed and articles
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
- `newest`: return the feed items in chronological order (morss ohterwise shows the items by appearing order)
- http server only
- `callback=NAME`: for JSONP calls
- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
- `txt`: changes the http content-type to txt (for faster "`view-source:`")
- Custom feeds: you can turn any HTML page into a RSS feed using morss, using xpath rules. The article content will be fetched as usual (with readabilite). Please note that you will have to **replace** any `/` in your rule with a `|` when using morss as a webserver
- `items`: (**mandatory** to activate the custom feeds function) xpath rule to match all the RSS entries
- `item_link`: xpath rule relative to `items` to point to the entry's link
- `item_title`: entry's title
- `item_content`: entry's description
- `item_time`: entry's date & time (accepts a wide range of time formats)
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
- `callback=NAME`: for JSONP calls
- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
- `txt`: changes the http content-type to txt (for faster "`view-source:`")
## Use cases
@ -213,9 +234,9 @@ Works like a charm with [Tiny Tiny RSS](http://tt-rss.org/redmine/projects/tt-rs
Run:
```
morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL
morss [--argwithoutvalue] [--argwithvalue=value] [...] FEEDURL
```
For example: `morss debug http://feeds.bbci.co.uk/news/rss.xml`
For example: `morss --debug http://feeds.bbci.co.uk/news/rss.xml`
*(Brackets indicate optional text)*

View File

@ -68,15 +68,15 @@ def cgi_app(environ, start_response):
if options.cors:
headers['access-control-allow-origin'] = '*'
if options.html:
if options.format == 'html':
headers['content-type'] = 'text/html'
elif options.txt or options.silent:
headers['content-type'] = 'text/plain'
elif options.json:
elif options.format == 'json':
headers['content-type'] = 'application/json'
elif options.callback:
headers['content-type'] = 'application/javascript'
elif options.csv:
elif options.format == 'csv':
headers['content-type'] = 'text/csv'
headers['content-disposition'] = 'attachment; filename="feed.csv"'
else:

View File

@ -1,15 +1,53 @@
import sys
import os.path
import argparse
from . import crawler
from .morss import FeedFetch, FeedGather, FeedFormat
from .morss import Options, parseOptions
from .morss import Options
from .morss import log, DEBUG
#args = parser.parse_args()
def cli_app():
options = Options(parseOptions(sys.argv[1:-1]))
url = sys.argv[-1]
parser = argparse.ArgumentParser(
prog='morss',
description='Get full-text RSS feeds',
epilog='GNU AGPLv3 code'
)
parser.add_argument('url', help='feed url')
group = parser.add_argument_group('output')
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
group.add_argument('--clip', action='store_true', help='stick the full article content under the original feed content (useful for twitter)')
group.add_argument('--indent', action='store_true', help='returns indented XML or JSON, takes more place, but human-readable')
group = parser.add_argument_group('action')
group.add_argument('--cache', action='store_true', help='only take articles from the cache (ie. don\'t grab new articles\' content), so as to save time')
group.add_argument('--force', action='store_true', help='force refetch the rss feed and articles')
group.add_argument('--proxy', action='store_true', help='doesn\'t fill the articles')
group.add_argument('--newest', action='store_true', help='return the feed items in chronological order (morss ohterwise shows the items by appearing order)')
group.add_argument('--firstlink', action='store_true', help='pull the first article mentioned in the description instead of the default link')
group = parser.add_argument_group('custom feeds')
group.add_argument('--items', action='store', type=str, metavar='XPATH', help='(mandatory to activate the custom feeds function) xpath rule to match all the RSS entries')
group.add_argument('--item_link', action='store', type=str, metavar='XPATH', help='xpath rule relative to items to point to the entry\'s link')
group.add_argument('--item_title', action='store', type=str, metavar='XPATH', help='entry\'s title')
group.add_argument('--item_content', action='store', type=str, metavar='XPATH', help='entry\'s content')
group.add_argument('--item_time', action='store', type=str, metavar='XPATH', help='entry\'s date & time (accepts a wide range of time formats)')
group = parser.add_argument_group('misc')
group.add_argument('--nolink', action='store_true', help='drop links, but keeps links\' inner text')
group.add_argument('--noref', action='store_true', help='drop items\' link')
group.add_argument('--debug', action='store_true', help='to have some feedback from the script execution. Useful for debugging')
options = Options(parser.parse_args())
url = options.url
global DEBUG
DEBUG = options.debug

View File

@ -379,24 +379,24 @@ def FeedFormat(rss, options, encoding='utf-8'):
else:
raise MorssException('Invalid callback var name')
elif options.json:
elif options.format == 'json':
if options.indent:
return rss.tojson(encoding=encoding, indent=4)
else:
return rss.tojson(encoding=encoding)
elif options.csv:
elif options.format == 'csv':
return rss.tocsv(encoding=encoding)
elif options.html:
elif options.format == 'html':
if options.indent:
return rss.tohtml(encoding=encoding, pretty_print=True)
else:
return rss.tohtml(encoding=encoding)
else:
else: # i.e. format == 'rss'
if options.indent:
return rss.torss(xml_declaration=(not encoding == 'unicode'), encoding=encoding, pretty_print=True)