Compare commits
3 Commits
71d9c7a027
...
62f2346b3f
Author | SHA1 | Date |
---|---|---|
pictuga | 62f2346b3f | |
pictuga | 69cdf05341 | |
pictuga | 06e0ada95b |
|
@ -0,0 +1,8 @@
|
||||||
|
kind: pipeline
|
||||||
|
name: default
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: lint
|
||||||
|
image: python
|
||||||
|
commands:
|
||||||
|
- isort --diff --color --recursive .
|
11
README.md
11
README.md
|
@ -263,11 +263,11 @@ arguments to morss is explained in Run above.
|
||||||
The list of arguments can be obtained by running `morss --help`
|
The list of arguments can be obtained by running `morss --help`
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip]
|
usage: morss [-h] [--post STRING] [--format {rss,json,html,csv}]
|
||||||
[--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink]
|
[--search STRING] [--clip] [--indent] [--cache] [--force]
|
||||||
[--resolve] [--items XPATH] [--item_link XPATH]
|
[--proxy] [--newest] [--firstlink] [--resolve] [--items XPATH]
|
||||||
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH]
|
[--item_link XPATH] [--item_title XPATH] [--item_content XPATH]
|
||||||
[--nolink] [--noref] [--silent]
|
[--item_time XPATH] [--nolink] [--noref] [--silent]
|
||||||
url
|
url
|
||||||
|
|
||||||
Get full-text RSS feeds
|
Get full-text RSS feeds
|
||||||
|
@ -277,6 +277,7 @@ positional arguments:
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
|
--post STRING POST request
|
||||||
|
|
||||||
output:
|
output:
|
||||||
--format {rss,json,html,csv}
|
--format {rss,json,html,csv}
|
||||||
|
|
|
@ -20,9 +20,7 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from . import wsgi
|
from . import cli, wsgi
|
||||||
from . import cli
|
|
||||||
|
|
||||||
from .morss import MorssException
|
from .morss import MorssException
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,8 @@ def cli_app():
|
||||||
|
|
||||||
parser.add_argument('url', help='feed url')
|
parser.add_argument('url', help='feed url')
|
||||||
|
|
||||||
|
parser.add_argument('--post', action='store', type=str, metavar='STRING', help='POST request')
|
||||||
|
|
||||||
group = parser.add_argument_group('output')
|
group = parser.add_argument_group('output')
|
||||||
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
||||||
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
||||||
|
|
|
@ -16,30 +16,33 @@
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
import zlib
|
|
||||||
from io import BytesIO, StringIO
|
|
||||||
import re
|
|
||||||
import chardet
|
|
||||||
from cgi import parse_header
|
|
||||||
import time
|
|
||||||
import threading
|
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import zlib
|
||||||
|
from cgi import parse_header
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
|
||||||
|
import chardet
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from urlparse import urlparse, urlunparse
|
|
||||||
import mimetools
|
import mimetools
|
||||||
|
from urllib2 import (BaseHandler, HTTPCookieProcessor, Request, addinfourl,
|
||||||
|
build_opener, parse_http_list, parse_keqv_list)
|
||||||
|
from urlparse import urlparse, urlunparse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
|
||||||
from urllib.parse import quote
|
|
||||||
from urllib.parse import urlparse, urlunparse
|
|
||||||
import email
|
import email
|
||||||
|
from urllib.parse import quote, urlparse, urlunparse
|
||||||
|
from urllib.request import (BaseHandler, HTTPCookieProcessor, Request,
|
||||||
|
addinfourl, build_opener, parse_http_list,
|
||||||
|
parse_keqv_list)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
@ -81,14 +84,17 @@ def get(*args, **kwargs):
|
||||||
return adv_get(*args, **kwargs)['data']
|
return adv_get(*args, **kwargs)['data']
|
||||||
|
|
||||||
|
|
||||||
def adv_get(url, timeout=None, *args, **kwargs):
|
def adv_get(url, post=None, timeout=None, *args, **kwargs):
|
||||||
url = sanitize_url(url)
|
url = sanitize_url(url)
|
||||||
|
|
||||||
|
if post is not None:
|
||||||
|
post = post.encode('utf-8')
|
||||||
|
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
con = custom_opener(*args, **kwargs).open(url)
|
con = custom_opener(*args, **kwargs).open(url, data=post)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
con = custom_opener(*args, **kwargs).open(url, timeout=timeout)
|
con = custom_opener(*args, **kwargs).open(url, data=post, timeout=timeout)
|
||||||
|
|
||||||
data = con.read()
|
data = con.read()
|
||||||
|
|
||||||
|
@ -617,7 +623,7 @@ class BaseCache:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3 # isort:skip
|
||||||
|
|
||||||
|
|
||||||
class SQLiteCache(BaseCache):
|
class SQLiteCache(BaseCache):
|
||||||
|
@ -654,7 +660,7 @@ class SQLiteCache(BaseCache):
|
||||||
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
||||||
|
|
||||||
|
|
||||||
import pymysql.cursors
|
import pymysql.cursors # isort:skip
|
||||||
|
|
||||||
|
|
||||||
class MySQLCacheHandler(BaseCache):
|
class MySQLCacheHandler(BaseCache):
|
||||||
|
|
|
@ -15,35 +15,35 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sys
|
|
||||||
import os.path
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
from datetime import datetime
|
sys.path.append('/home/paul/Documents/Code/morss/lib')
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from copy import deepcopy
|
||||||
|
from datetime import datetime
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from dateutil import tz
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
from copy import deepcopy
|
|
||||||
|
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
from dateutil import tz
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from .readabilite import parse as html_parse
|
from .readabilite import parse as html_parse
|
||||||
|
|
||||||
json.encoder.c_make_encoder = None
|
json.encoder.c_make_encoder = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from StringIO import StringIO
|
|
||||||
from ConfigParser import RawConfigParser
|
from ConfigParser import RawConfigParser
|
||||||
|
from StringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from io import StringIO
|
|
||||||
from configparser import RawConfigParser
|
from configparser import RawConfigParser
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
|
|
@ -16,30 +16,25 @@
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import tz
|
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
import re
|
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
from dateutil import tz
|
||||||
|
|
||||||
from . import feeds
|
from . import crawler, feeds, readabilite
|
||||||
from . import crawler
|
|
||||||
from . import readabilite
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from httplib import HTTPException
|
from httplib import HTTPException
|
||||||
from urlparse import urlparse, urljoin, parse_qs
|
from urlparse import parse_qs, urljoin, urlparse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from http.client import HTTPException
|
from http.client import HTTPException
|
||||||
from urllib.parse import urlparse, urljoin, parse_qs
|
from urllib.parse import parse_qs, urljoin, urlparse
|
||||||
|
|
||||||
|
|
||||||
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
||||||
|
@ -276,7 +271,7 @@ def FeedFetch(url, options):
|
||||||
delay = 0
|
delay = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
req = crawler.adv_get(url=url, post=options.post, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
||||||
|
|
||||||
except (IOError, HTTPException):
|
except (IOError, HTTPException):
|
||||||
raise MorssException('Error downloading feed')
|
raise MorssException('Error downloading feed')
|
||||||
|
|
|
@ -15,16 +15,16 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sys
|
import cgitb
|
||||||
|
import mimetypes
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import lxml.etree
|
import sys
|
||||||
|
|
||||||
import cgitb
|
|
||||||
import wsgiref.util
|
|
||||||
import wsgiref.simple_server
|
|
||||||
import wsgiref.handlers
|
import wsgiref.handlers
|
||||||
import mimetypes
|
import wsgiref.simple_server
|
||||||
|
import wsgiref.util
|
||||||
|
|
||||||
|
import lxml.etree
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
@ -33,11 +33,9 @@ except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from . import crawler
|
from . import crawler, readabilite
|
||||||
from . import readabilite
|
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
|
||||||
from .morss import FeedFetch, FeedGather, FeedFormat
|
MorssException, Options, log)
|
||||||
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
|
||||||
|
|
||||||
|
|
||||||
PORT = int(os.getenv('PORT', 8080))
|
PORT = int(os.getenv('PORT', 8080))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue