diff --git a/README.md b/README.md index 4fed2e5..925906d 100644 --- a/README.md +++ b/README.md @@ -73,35 +73,56 @@ morss accepts some arguments, to lightly alter the output of morss. Arguments may need to have a value (usually a string or a number). In the different "Use cases" below is detailed how to pass those arguments to morss. -The arguments are: +The list of arguments can be obtained by running `morss --help` +``` +usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip] [--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink] [--items XPATH] [--item_link XPATH] + [--item_title XPATH] [--item_content XPATH] [--item_time XPATH] [--nolink] [--noref] [--debug] + url + +Get full-text RSS feeds + +positional arguments: + url feed url + +optional arguments: + -h, --help show this help message and exit + +output: + --format {rss,json,html,csv} + output format + --search STRING does a basic case-sensitive search in the feed + --clip stick the full article content under the original feed content (useful for twitter) + --indent returns indented XML or JSON, takes more place, but human-readable + +action: + --cache only take articles from the cache (ie. don't grab new articles' content), so as to save time + --force force refetch the rss feed and articles + --proxy doesn't fill the articles + --newest return the feed items in chronological order (morss ohterwise shows the items by appearing order) + --firstlink pull the first article mentioned in the description instead of the default link + +custom feeds: + --items XPATH (mandatory to activate the custom feeds function) xpath rule to match all the RSS entries + --item_link XPATH xpath rule relative to items to point to the entry's link + --item_title XPATH entry's title + --item_content XPATH entry's content + --item_time XPATH entry's date & time (accepts a wide range of time formats) + +misc: + --nolink drop links, but keeps links' inner text + --noref drop items' link + --debug to have some feedback from the script execution. Useful for debugging + +GNU AGPLv3 code +``` + +Further options: - Change what morss does - - `json`: output as JSON - - `html`: outpout as HTML - - `csv`: outpout as CSV - - `proxy`: doesn't fill the articles - - `clip`: stick the full article content under the original feed content (useful for twitter) - - `search=STRING`: does a basic case-sensitive search in the feed -- Advanced - - `csv`: export to csv - - `indent`: returns indented XML or JSON, takes more place, but human-readable - - `nolink`: drop links, but keeps links' inner text - - `noref`: drop items' link - - `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time - - `debug`: to have some feedback from the script execution. Useful for debugging - - `force`: force refetch the rss feed and articles - - `silent`: don't output the final RSS (useless on its own, but can be nice when debugging) - - `newest`: return the feed items in chronological order (morss ohterwise shows the items by appearing order) -- http server only - - `callback=NAME`: for JSONP calls - - `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers) - - `txt`: changes the http content-type to txt (for faster "`view-source:`") -- Custom feeds: you can turn any HTML page into a RSS feed using morss, using xpath rules. The article content will be fetched as usual (with readabilite). Please note that you will have to **replace** any `/` in your rule with a `|` when using morss as a webserver - - `items`: (**mandatory** to activate the custom feeds function) xpath rule to match all the RSS entries - - `item_link`: xpath rule relative to `items` to point to the entry's link - - `item_title`: entry's title - - `item_content`: entry's description - - `item_time`: entry's date & time (accepts a wide range of time formats) +- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging) +- `callback=NAME`: for JSONP calls +- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers) +- `txt`: changes the http content-type to txt (for faster "`view-source:`") ## Use cases @@ -213,9 +234,9 @@ Works like a charm with [Tiny Tiny RSS](http://tt-rss.org/redmine/projects/tt-rs Run: ``` -morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL +morss [--argwithoutvalue] [--argwithvalue=value] [...] FEEDURL ``` -For example: `morss debug http://feeds.bbci.co.uk/news/rss.xml` +For example: `morss --debug http://feeds.bbci.co.uk/news/rss.xml` *(Brackets indicate optional text)* diff --git a/morss/cgi.py b/morss/cgi.py index 3f0bdc5..7b4b013 100644 --- a/morss/cgi.py +++ b/morss/cgi.py @@ -68,15 +68,15 @@ def cgi_app(environ, start_response): if options.cors: headers['access-control-allow-origin'] = '*' - if options.html: + if options.format == 'html': headers['content-type'] = 'text/html' elif options.txt or options.silent: headers['content-type'] = 'text/plain' - elif options.json: + elif options.format == 'json': headers['content-type'] = 'application/json' elif options.callback: headers['content-type'] = 'application/javascript' - elif options.csv: + elif options.format == 'csv': headers['content-type'] = 'text/csv' headers['content-disposition'] = 'attachment; filename="feed.csv"' else: diff --git a/morss/cli.py b/morss/cli.py index c256857..acd0c84 100644 --- a/morss/cli.py +++ b/morss/cli.py @@ -1,15 +1,53 @@ import sys import os.path +import argparse from . import crawler from .morss import FeedFetch, FeedGather, FeedFormat -from .morss import Options, parseOptions +from .morss import Options from .morss import log, DEBUG + + +#args = parser.parse_args() + def cli_app(): - options = Options(parseOptions(sys.argv[1:-1])) - url = sys.argv[-1] + parser = argparse.ArgumentParser( + prog='morss', + description='Get full-text RSS feeds', + epilog='GNU AGPLv3 code' + ) + + parser.add_argument('url', help='feed url') + + group = parser.add_argument_group('output') + group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format') + group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed') + group.add_argument('--clip', action='store_true', help='stick the full article content under the original feed content (useful for twitter)') + group.add_argument('--indent', action='store_true', help='returns indented XML or JSON, takes more place, but human-readable') + + group = parser.add_argument_group('action') + group.add_argument('--cache', action='store_true', help='only take articles from the cache (ie. don\'t grab new articles\' content), so as to save time') + group.add_argument('--force', action='store_true', help='force refetch the rss feed and articles') + group.add_argument('--proxy', action='store_true', help='doesn\'t fill the articles') + group.add_argument('--newest', action='store_true', help='return the feed items in chronological order (morss ohterwise shows the items by appearing order)') + group.add_argument('--firstlink', action='store_true', help='pull the first article mentioned in the description instead of the default link') + + group = parser.add_argument_group('custom feeds') + group.add_argument('--items', action='store', type=str, metavar='XPATH', help='(mandatory to activate the custom feeds function) xpath rule to match all the RSS entries') + group.add_argument('--item_link', action='store', type=str, metavar='XPATH', help='xpath rule relative to items to point to the entry\'s link') + group.add_argument('--item_title', action='store', type=str, metavar='XPATH', help='entry\'s title') + group.add_argument('--item_content', action='store', type=str, metavar='XPATH', help='entry\'s content') + group.add_argument('--item_time', action='store', type=str, metavar='XPATH', help='entry\'s date & time (accepts a wide range of time formats)') + + group = parser.add_argument_group('misc') + group.add_argument('--nolink', action='store_true', help='drop links, but keeps links\' inner text') + group.add_argument('--noref', action='store_true', help='drop items\' link') + group.add_argument('--debug', action='store_true', help='to have some feedback from the script execution. Useful for debugging') + + options = Options(parser.parse_args()) + url = options.url global DEBUG DEBUG = options.debug diff --git a/morss/morss.py b/morss/morss.py index 4a5c352..bb5b6b0 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -379,24 +379,24 @@ def FeedFormat(rss, options, encoding='utf-8'): else: raise MorssException('Invalid callback var name') - elif options.json: + elif options.format == 'json': if options.indent: return rss.tojson(encoding=encoding, indent=4) else: return rss.tojson(encoding=encoding) - elif options.csv: + elif options.format == 'csv': return rss.tocsv(encoding=encoding) - elif options.html: + elif options.format == 'html': if options.indent: return rss.tohtml(encoding=encoding, pretty_print=True) else: return rss.tohtml(encoding=encoding) - else: + else: # i.e. format == 'rss' if options.indent: return rss.torss(xml_declaration=(not encoding == 'unicode'), encoding=encoding, pretty_print=True)