Compare commits

..

No commits in common. "62f2346b3f601c774655f33d8b7b1599ebc8b9e3" and "71d9c7a027835da8edc6502391224f909ea65ec5" have entirely different histories.

8 changed files with 61 additions and 69 deletions

View File

@ -1,8 +0,0 @@
kind: pipeline
name: default
steps:
- name: lint
image: python
commands:
- isort --diff --color --recursive .

View File

@ -263,11 +263,11 @@ arguments to morss is explained in Run above.
The list of arguments can be obtained by running `morss --help` The list of arguments can be obtained by running `morss --help`
``` ```
usage: morss [-h] [--post STRING] [--format {rss,json,html,csv}] usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip]
[--search STRING] [--clip] [--indent] [--cache] [--force] [--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink]
[--proxy] [--newest] [--firstlink] [--resolve] [--items XPATH] [--resolve] [--items XPATH] [--item_link XPATH]
[--item_link XPATH] [--item_title XPATH] [--item_content XPATH] [--item_title XPATH] [--item_content XPATH] [--item_time XPATH]
[--item_time XPATH] [--nolink] [--noref] [--silent] [--nolink] [--noref] [--silent]
url url
Get full-text RSS feeds Get full-text RSS feeds
@ -277,7 +277,6 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
--post STRING POST request
output: output:
--format {rss,json,html,csv} --format {rss,json,html,csv}

View File

@ -20,7 +20,9 @@
import os import os
import sys import sys
from . import cli, wsgi from . import wsgi
from . import cli
from .morss import MorssException from .morss import MorssException

View File

@ -32,8 +32,6 @@ def cli_app():
parser.add_argument('url', help='feed url') parser.add_argument('url', help='feed url')
parser.add_argument('--post', action='store', type=str, metavar='STRING', help='POST request')
group = parser.add_argument_group('output') group = parser.add_argument_group('output')
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format') group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed') group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')

View File

@ -16,33 +16,30 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import random
import re
import sys import sys
import threading
import time
import zlib
from cgi import parse_header
from collections import OrderedDict
from io import BytesIO, StringIO
import zlib
from io import BytesIO, StringIO
import re
import chardet import chardet
from cgi import parse_header
import time
import threading
import random
from collections import OrderedDict
try: try:
# python 2 # python 2
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib import quote from urllib import quote
import mimetools
from urllib2 import (BaseHandler, HTTPCookieProcessor, Request, addinfourl,
build_opener, parse_http_list, parse_keqv_list)
from urlparse import urlparse, urlunparse from urlparse import urlparse, urlunparse
import mimetools
except ImportError: except ImportError:
# python 3 # python 3
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib.parse import quote
from urllib.parse import urlparse, urlunparse
import email import email
from urllib.parse import quote, urlparse, urlunparse
from urllib.request import (BaseHandler, HTTPCookieProcessor, Request,
addinfourl, build_opener, parse_http_list,
parse_keqv_list)
try: try:
# python 2 # python 2
@ -84,17 +81,14 @@ def get(*args, **kwargs):
return adv_get(*args, **kwargs)['data'] return adv_get(*args, **kwargs)['data']
def adv_get(url, post=None, timeout=None, *args, **kwargs): def adv_get(url, timeout=None, *args, **kwargs):
url = sanitize_url(url) url = sanitize_url(url)
if post is not None:
post = post.encode('utf-8')
if timeout is None: if timeout is None:
con = custom_opener(*args, **kwargs).open(url, data=post) con = custom_opener(*args, **kwargs).open(url)
else: else:
con = custom_opener(*args, **kwargs).open(url, data=post, timeout=timeout) con = custom_opener(*args, **kwargs).open(url, timeout=timeout)
data = con.read() data = con.read()
@ -623,7 +617,7 @@ class BaseCache:
return True return True
import sqlite3 # isort:skip import sqlite3
class SQLiteCache(BaseCache): class SQLiteCache(BaseCache):
@ -660,7 +654,7 @@ class SQLiteCache(BaseCache):
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value) self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
import pymysql.cursors # isort:skip import pymysql.cursors
class MySQLCacheHandler(BaseCache): class MySQLCacheHandler(BaseCache):

View File

@ -15,35 +15,35 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os.path
import sys import sys
import os.path
sys.path.append('/home/paul/Documents/Code/morss/lib')
import csv
import json
import re
from copy import deepcopy
from datetime import datetime from datetime import datetime
import re
import json
import csv
from fnmatch import fnmatch from fnmatch import fnmatch
import dateutil.parser
import lxml.html
from dateutil import tz
from lxml import etree from lxml import etree
from dateutil import tz
import dateutil.parser
from copy import deepcopy
import lxml.html
from .readabilite import parse as html_parse from .readabilite import parse as html_parse
json.encoder.c_make_encoder = None json.encoder.c_make_encoder = None
try: try:
# python 2 # python 2
from ConfigParser import RawConfigParser
from StringIO import StringIO from StringIO import StringIO
from ConfigParser import RawConfigParser
except ImportError: except ImportError:
# python 3 # python 3
from configparser import RawConfigParser
from io import StringIO from io import StringIO
from configparser import RawConfigParser
try: try:
# python 2 # python 2

View File

@ -16,25 +16,30 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import re
import time import time
from datetime import datetime from datetime import datetime
from dateutil import tz
from fnmatch import fnmatch from fnmatch import fnmatch
import re
import lxml.etree import lxml.etree
import lxml.html import lxml.html
from dateutil import tz
from . import crawler, feeds, readabilite from . import feeds
from . import crawler
from . import readabilite
try: try:
# python 2 # python 2
from httplib import HTTPException from httplib import HTTPException
from urlparse import parse_qs, urljoin, urlparse from urlparse import urlparse, urljoin, parse_qs
except ImportError: except ImportError:
# python 3 # python 3
from http.client import HTTPException from http.client import HTTPException
from urllib.parse import parse_qs, urljoin, urlparse from urllib.parse import urlparse, urljoin, parse_qs
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
@ -271,7 +276,7 @@ def FeedFetch(url, options):
delay = 0 delay = 0
try: try:
req = crawler.adv_get(url=url, post=options.post, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2) req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
except (IOError, HTTPException): except (IOError, HTTPException):
raise MorssException('Error downloading feed') raise MorssException('Error downloading feed')

View File

@ -15,17 +15,17 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import cgitb import sys
import mimetypes
import os.path import os.path
import re import re
import sys
import wsgiref.handlers
import wsgiref.simple_server
import wsgiref.util
import lxml.etree import lxml.etree
import cgitb
import wsgiref.util
import wsgiref.simple_server
import wsgiref.handlers
import mimetypes
try: try:
# python 2 # python 2
from urllib import unquote from urllib import unquote
@ -33,9 +33,11 @@ except ImportError:
# python 3 # python 3
from urllib.parse import unquote from urllib.parse import unquote
from . import crawler, readabilite from . import crawler
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather, from . import readabilite
MorssException, Options, log) from .morss import FeedFetch, FeedGather, FeedFormat
from .morss import Options, log, TIMEOUT, DELAY, MorssException
PORT = int(os.getenv('PORT', 8080)) PORT = int(os.getenv('PORT', 8080))