Add :smart :noref modes, update README

master
pictuga 2014-06-16 14:00:02 +02:00
parent f991802d9e
commit 7211093cc5
2 changed files with 13 additions and 0 deletions

View File

@ -40,6 +40,8 @@ The arguments are:
- Advanced
- `csv`: export to csv
- `md`: convert articles to Markdown
- `nolink`: drop links, but keeps links' inner text
- `noref`: drop items' link
- `cache`: only take articles from the cache (ie. don't grab new articles' content), so as to save time
- `debug`: to have some feedback from the script execution. Useful for debugging
- `theforce`: force download the rss feed

View File

@ -11,6 +11,7 @@ from fnmatch import fnmatch
import re
import json
import lxml.etree
import lxml.html
import feeds
@ -628,6 +629,16 @@ def After(rss, options):
if not options.keep:
del item.desc
if options.nolink and item.content:
content = lxml.html.fromstring(item.content)
for link in content.xpath('//a'):
log(link.text_content())
link.drop_tag()
item.content = lxml.etree.tostring(content)
if options.noref:
item.link = ''
if options.md:
conv = HTML2Text(baseurl=item.link)
conv.unicode_snob = True