parent
c7c2c5d749
commit
bd182bcb85
79
README.md
79
README.md
|
@ -73,35 +73,56 @@ morss accepts some arguments, to lightly alter the output of morss. Arguments
|
|||
may need to have a value (usually a string or a number). In the different "Use
|
||||
cases" below is detailed how to pass those arguments to morss.
|
||||
|
||||
The arguments are:
|
||||
The list of arguments can be obtained by running `morss --help`
|
||||
|
||||
```
|
||||
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip] [--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink] [--items XPATH] [--item_link XPATH]
|
||||
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH] [--nolink] [--noref] [--debug]
|
||||
url
|
||||
|
||||
Get full-text RSS feeds
|
||||
|
||||
positional arguments:
|
||||
url feed url
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
|
||||
output:
|
||||
--format {rss,json,html,csv}
|
||||
output format
|
||||
--search STRING does a basic case-sensitive search in the feed
|
||||
--clip stick the full article content under the original feed content (useful for twitter)
|
||||
--indent returns indented XML or JSON, takes more place, but human-readable
|
||||
|
||||
action:
|
||||
--cache only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
||||
--force force refetch the rss feed and articles
|
||||
--proxy doesn't fill the articles
|
||||
--newest return the feed items in chronological order (morss ohterwise shows the items by appearing order)
|
||||
--firstlink pull the first article mentioned in the description instead of the default link
|
||||
|
||||
custom feeds:
|
||||
--items XPATH (mandatory to activate the custom feeds function) xpath rule to match all the RSS entries
|
||||
--item_link XPATH xpath rule relative to items to point to the entry's link
|
||||
--item_title XPATH entry's title
|
||||
--item_content XPATH entry's content
|
||||
--item_time XPATH entry's date & time (accepts a wide range of time formats)
|
||||
|
||||
misc:
|
||||
--nolink drop links, but keeps links' inner text
|
||||
--noref drop items' link
|
||||
--debug to have some feedback from the script execution. Useful for debugging
|
||||
|
||||
GNU AGPLv3 code
|
||||
```
|
||||
|
||||
Further options:
|
||||
- Change what morss does
|
||||
- `json`: output as JSON
|
||||
- `html`: outpout as HTML
|
||||
- `csv`: outpout as CSV
|
||||
- `proxy`: doesn't fill the articles
|
||||
- `clip`: stick the full article content under the original feed content (useful for twitter)
|
||||
- `search=STRING`: does a basic case-sensitive search in the feed
|
||||
- Advanced
|
||||
- `csv`: export to csv
|
||||
- `indent`: returns indented XML or JSON, takes more place, but human-readable
|
||||
- `nolink`: drop links, but keeps links' inner text
|
||||
- `noref`: drop items' link
|
||||
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
|
||||
- `debug`: to have some feedback from the script execution. Useful for debugging
|
||||
- `force`: force refetch the rss feed and articles
|
||||
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
|
||||
- `newest`: return the feed items in chronological order (morss ohterwise shows the items by appearing order)
|
||||
- http server only
|
||||
- `callback=NAME`: for JSONP calls
|
||||
- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
|
||||
- `txt`: changes the http content-type to txt (for faster "`view-source:`")
|
||||
- Custom feeds: you can turn any HTML page into a RSS feed using morss, using xpath rules. The article content will be fetched as usual (with readabilite). Please note that you will have to **replace** any `/` in your rule with a `|` when using morss as a webserver
|
||||
- `items`: (**mandatory** to activate the custom feeds function) xpath rule to match all the RSS entries
|
||||
- `item_link`: xpath rule relative to `items` to point to the entry's link
|
||||
- `item_title`: entry's title
|
||||
- `item_content`: entry's description
|
||||
- `item_time`: entry's date & time (accepts a wide range of time formats)
|
||||
- `silent`: don't output the final RSS (useless on its own, but can be nice when debugging)
|
||||
- `callback=NAME`: for JSONP calls
|
||||
- `cors`: allow Cross-origin resource sharing (allows XHR calls from other servers)
|
||||
- `txt`: changes the http content-type to txt (for faster "`view-source:`")
|
||||
|
||||
## Use cases
|
||||
|
||||
|
@ -213,9 +234,9 @@ Works like a charm with [Tiny Tiny RSS](http://tt-rss.org/redmine/projects/tt-rs
|
|||
|
||||
Run:
|
||||
```
|
||||
morss [argwithoutvalue] [argwithvalue=value] [...] FEEDURL
|
||||
morss [--argwithoutvalue] [--argwithvalue=value] [...] FEEDURL
|
||||
```
|
||||
For example: `morss debug http://feeds.bbci.co.uk/news/rss.xml`
|
||||
For example: `morss --debug http://feeds.bbci.co.uk/news/rss.xml`
|
||||
|
||||
*(Brackets indicate optional text)*
|
||||
|
||||
|
|
|
@ -68,15 +68,15 @@ def cgi_app(environ, start_response):
|
|||
if options.cors:
|
||||
headers['access-control-allow-origin'] = '*'
|
||||
|
||||
if options.html:
|
||||
if options.format == 'html':
|
||||
headers['content-type'] = 'text/html'
|
||||
elif options.txt or options.silent:
|
||||
headers['content-type'] = 'text/plain'
|
||||
elif options.json:
|
||||
elif options.format == 'json':
|
||||
headers['content-type'] = 'application/json'
|
||||
elif options.callback:
|
||||
headers['content-type'] = 'application/javascript'
|
||||
elif options.csv:
|
||||
elif options.format == 'csv':
|
||||
headers['content-type'] = 'text/csv'
|
||||
headers['content-disposition'] = 'attachment; filename="feed.csv"'
|
||||
else:
|
||||
|
|
44
morss/cli.py
44
morss/cli.py
|
@ -1,15 +1,53 @@
|
|||
import sys
|
||||
import os.path
|
||||
import argparse
|
||||
|
||||
from . import crawler
|
||||
from .morss import FeedFetch, FeedGather, FeedFormat
|
||||
from .morss import Options, parseOptions
|
||||
from .morss import Options
|
||||
from .morss import log, DEBUG
|
||||
|
||||
|
||||
|
||||
|
||||
#args = parser.parse_args()
|
||||
|
||||
def cli_app():
|
||||
options = Options(parseOptions(sys.argv[1:-1]))
|
||||
url = sys.argv[-1]
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='morss',
|
||||
description='Get full-text RSS feeds',
|
||||
epilog='GNU AGPLv3 code'
|
||||
)
|
||||
|
||||
parser.add_argument('url', help='feed url')
|
||||
|
||||
group = parser.add_argument_group('output')
|
||||
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
||||
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
||||
group.add_argument('--clip', action='store_true', help='stick the full article content under the original feed content (useful for twitter)')
|
||||
group.add_argument('--indent', action='store_true', help='returns indented XML or JSON, takes more place, but human-readable')
|
||||
|
||||
group = parser.add_argument_group('action')
|
||||
group.add_argument('--cache', action='store_true', help='only take articles from the cache (ie. don\'t grab new articles\' content), so as to save time')
|
||||
group.add_argument('--force', action='store_true', help='force refetch the rss feed and articles')
|
||||
group.add_argument('--proxy', action='store_true', help='doesn\'t fill the articles')
|
||||
group.add_argument('--newest', action='store_true', help='return the feed items in chronological order (morss ohterwise shows the items by appearing order)')
|
||||
group.add_argument('--firstlink', action='store_true', help='pull the first article mentioned in the description instead of the default link')
|
||||
|
||||
group = parser.add_argument_group('custom feeds')
|
||||
group.add_argument('--items', action='store', type=str, metavar='XPATH', help='(mandatory to activate the custom feeds function) xpath rule to match all the RSS entries')
|
||||
group.add_argument('--item_link', action='store', type=str, metavar='XPATH', help='xpath rule relative to items to point to the entry\'s link')
|
||||
group.add_argument('--item_title', action='store', type=str, metavar='XPATH', help='entry\'s title')
|
||||
group.add_argument('--item_content', action='store', type=str, metavar='XPATH', help='entry\'s content')
|
||||
group.add_argument('--item_time', action='store', type=str, metavar='XPATH', help='entry\'s date & time (accepts a wide range of time formats)')
|
||||
|
||||
group = parser.add_argument_group('misc')
|
||||
group.add_argument('--nolink', action='store_true', help='drop links, but keeps links\' inner text')
|
||||
group.add_argument('--noref', action='store_true', help='drop items\' link')
|
||||
group.add_argument('--debug', action='store_true', help='to have some feedback from the script execution. Useful for debugging')
|
||||
|
||||
options = Options(parser.parse_args())
|
||||
url = options.url
|
||||
|
||||
global DEBUG
|
||||
DEBUG = options.debug
|
||||
|
|
|
@ -379,24 +379,24 @@ def FeedFormat(rss, options, encoding='utf-8'):
|
|||
else:
|
||||
raise MorssException('Invalid callback var name')
|
||||
|
||||
elif options.json:
|
||||
elif options.format == 'json':
|
||||
if options.indent:
|
||||
return rss.tojson(encoding=encoding, indent=4)
|
||||
|
||||
else:
|
||||
return rss.tojson(encoding=encoding)
|
||||
|
||||
elif options.csv:
|
||||
elif options.format == 'csv':
|
||||
return rss.tocsv(encoding=encoding)
|
||||
|
||||
elif options.html:
|
||||
elif options.format == 'html':
|
||||
if options.indent:
|
||||
return rss.tohtml(encoding=encoding, pretty_print=True)
|
||||
|
||||
else:
|
||||
return rss.tohtml(encoding=encoding)
|
||||
|
||||
else:
|
||||
else: # i.e. format == 'rss'
|
||||
if options.indent:
|
||||
return rss.torss(xml_declaration=(not encoding == 'unicode'), encoding=encoding, pretty_print=True)
|
||||
|
||||
|
|
Loading…
Reference in New Issue