Compare commits

...

3 Commits

Author SHA1 Message Date
pictuga 62f2346b3f ci/cd attempt (drone)
continuous-integration/drone/push Build is failing Details
2021-09-08 21:05:01 +02:00
pictuga 69cdf05341 Apply isort 2021-09-08 20:54:34 +02:00
pictuga 06e0ada95b Allow POST requests 2021-09-08 20:43:21 +02:00
8 changed files with 69 additions and 61 deletions

8
.drone.yml 100644
View File

@ -0,0 +1,8 @@
kind: pipeline
name: default
steps:
- name: lint
image: python
commands:
- isort --diff --color --recursive .

View File

@ -263,11 +263,11 @@ arguments to morss is explained in Run above.
The list of arguments can be obtained by running `morss --help` The list of arguments can be obtained by running `morss --help`
``` ```
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip] usage: morss [-h] [--post STRING] [--format {rss,json,html,csv}]
[--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink] [--search STRING] [--clip] [--indent] [--cache] [--force]
[--resolve] [--items XPATH] [--item_link XPATH] [--proxy] [--newest] [--firstlink] [--resolve] [--items XPATH]
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH] [--item_link XPATH] [--item_title XPATH] [--item_content XPATH]
[--nolink] [--noref] [--silent] [--item_time XPATH] [--nolink] [--noref] [--silent]
url url
Get full-text RSS feeds Get full-text RSS feeds
@ -277,6 +277,7 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
--post STRING POST request
output: output:
--format {rss,json,html,csv} --format {rss,json,html,csv}

View File

@ -20,9 +20,7 @@
import os import os
import sys import sys
from . import wsgi from . import cli, wsgi
from . import cli
from .morss import MorssException from .morss import MorssException

View File

@ -32,6 +32,8 @@ def cli_app():
parser.add_argument('url', help='feed url') parser.add_argument('url', help='feed url')
parser.add_argument('--post', action='store', type=str, metavar='STRING', help='POST request')
group = parser.add_argument_group('output') group = parser.add_argument_group('output')
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format') group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed') group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')

View File

@ -16,30 +16,33 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import sys
import zlib
from io import BytesIO, StringIO
import re
import chardet
from cgi import parse_header
import time
import threading
import random import random
import re
import sys
import threading
import time
import zlib
from cgi import parse_header
from collections import OrderedDict from collections import OrderedDict
from io import BytesIO, StringIO
import chardet
try: try:
# python 2 # python 2
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib import quote from urllib import quote
from urlparse import urlparse, urlunparse
import mimetools import mimetools
from urllib2 import (BaseHandler, HTTPCookieProcessor, Request, addinfourl,
build_opener, parse_http_list, parse_keqv_list)
from urlparse import urlparse, urlunparse
except ImportError: except ImportError:
# python 3 # python 3
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
from urllib.parse import quote
from urllib.parse import urlparse, urlunparse
import email import email
from urllib.parse import quote, urlparse, urlunparse
from urllib.request import (BaseHandler, HTTPCookieProcessor, Request,
addinfourl, build_opener, parse_http_list,
parse_keqv_list)
try: try:
# python 2 # python 2
@ -81,14 +84,17 @@ def get(*args, **kwargs):
return adv_get(*args, **kwargs)['data'] return adv_get(*args, **kwargs)['data']
def adv_get(url, timeout=None, *args, **kwargs): def adv_get(url, post=None, timeout=None, *args, **kwargs):
url = sanitize_url(url) url = sanitize_url(url)
if post is not None:
post = post.encode('utf-8')
if timeout is None: if timeout is None:
con = custom_opener(*args, **kwargs).open(url) con = custom_opener(*args, **kwargs).open(url, data=post)
else: else:
con = custom_opener(*args, **kwargs).open(url, timeout=timeout) con = custom_opener(*args, **kwargs).open(url, data=post, timeout=timeout)
data = con.read() data = con.read()
@ -617,7 +623,7 @@ class BaseCache:
return True return True
import sqlite3 import sqlite3 # isort:skip
class SQLiteCache(BaseCache): class SQLiteCache(BaseCache):
@ -654,7 +660,7 @@ class SQLiteCache(BaseCache):
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value) self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
import pymysql.cursors import pymysql.cursors # isort:skip
class MySQLCacheHandler(BaseCache): class MySQLCacheHandler(BaseCache):

View File

@ -15,35 +15,35 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import sys
import os.path import os.path
import sys
from datetime import datetime sys.path.append('/home/paul/Documents/Code/morss/lib')
import re
import json
import csv import csv
import json
import re
from copy import deepcopy
from datetime import datetime
from fnmatch import fnmatch from fnmatch import fnmatch
from lxml import etree
from dateutil import tz
import dateutil.parser import dateutil.parser
from copy import deepcopy
import lxml.html import lxml.html
from dateutil import tz
from lxml import etree
from .readabilite import parse as html_parse from .readabilite import parse as html_parse
json.encoder.c_make_encoder = None json.encoder.c_make_encoder = None
try: try:
# python 2 # python 2
from StringIO import StringIO
from ConfigParser import RawConfigParser from ConfigParser import RawConfigParser
from StringIO import StringIO
except ImportError: except ImportError:
# python 3 # python 3
from io import StringIO
from configparser import RawConfigParser from configparser import RawConfigParser
from io import StringIO
try: try:
# python 2 # python 2

View File

@ -16,30 +16,25 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import re
import time import time
from datetime import datetime from datetime import datetime
from dateutil import tz
from fnmatch import fnmatch from fnmatch import fnmatch
import re
import lxml.etree import lxml.etree
import lxml.html import lxml.html
from dateutil import tz
from . import feeds from . import crawler, feeds, readabilite
from . import crawler
from . import readabilite
try: try:
# python 2 # python 2
from httplib import HTTPException from httplib import HTTPException
from urlparse import urlparse, urljoin, parse_qs from urlparse import parse_qs, urljoin, urlparse
except ImportError: except ImportError:
# python 3 # python 3
from http.client import HTTPException from http.client import HTTPException
from urllib.parse import urlparse, urljoin, parse_qs from urllib.parse import parse_qs, urljoin, urlparse
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
@ -276,7 +271,7 @@ def FeedFetch(url, options):
delay = 0 delay = 0
try: try:
req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2) req = crawler.adv_get(url=url, post=options.post, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
except (IOError, HTTPException): except (IOError, HTTPException):
raise MorssException('Error downloading feed') raise MorssException('Error downloading feed')

View File

@ -15,16 +15,16 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
import sys import cgitb
import mimetypes
import os.path import os.path
import re import re
import lxml.etree import sys
import cgitb
import wsgiref.util
import wsgiref.simple_server
import wsgiref.handlers import wsgiref.handlers
import mimetypes import wsgiref.simple_server
import wsgiref.util
import lxml.etree
try: try:
# python 2 # python 2
@ -33,11 +33,9 @@ except ImportError:
# python 3 # python 3
from urllib.parse import unquote from urllib.parse import unquote
from . import crawler from . import crawler, readabilite
from . import readabilite from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
from .morss import FeedFetch, FeedGather, FeedFormat MorssException, Options, log)
from .morss import Options, log, TIMEOUT, DELAY, MorssException
PORT = int(os.getenv('PORT', 8080)) PORT = int(os.getenv('PORT', 8080))