Compare commits
No commits in common. "62f2346b3f601c774655f33d8b7b1599ebc8b9e3" and "71d9c7a027835da8edc6502391224f909ea65ec5" have entirely different histories.
62f2346b3f
...
71d9c7a027
|
@ -1,8 +0,0 @@
|
||||||
kind: pipeline
|
|
||||||
name: default
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: lint
|
|
||||||
image: python
|
|
||||||
commands:
|
|
||||||
- isort --diff --color --recursive .
|
|
11
README.md
11
README.md
|
@ -263,11 +263,11 @@ arguments to morss is explained in Run above.
|
||||||
The list of arguments can be obtained by running `morss --help`
|
The list of arguments can be obtained by running `morss --help`
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: morss [-h] [--post STRING] [--format {rss,json,html,csv}]
|
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip]
|
||||||
[--search STRING] [--clip] [--indent] [--cache] [--force]
|
[--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink]
|
||||||
[--proxy] [--newest] [--firstlink] [--resolve] [--items XPATH]
|
[--resolve] [--items XPATH] [--item_link XPATH]
|
||||||
[--item_link XPATH] [--item_title XPATH] [--item_content XPATH]
|
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH]
|
||||||
[--item_time XPATH] [--nolink] [--noref] [--silent]
|
[--nolink] [--noref] [--silent]
|
||||||
url
|
url
|
||||||
|
|
||||||
Get full-text RSS feeds
|
Get full-text RSS feeds
|
||||||
|
@ -277,7 +277,6 @@ positional arguments:
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--post STRING POST request
|
|
||||||
|
|
||||||
output:
|
output:
|
||||||
--format {rss,json,html,csv}
|
--format {rss,json,html,csv}
|
||||||
|
|
|
@ -20,7 +20,9 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from . import cli, wsgi
|
from . import wsgi
|
||||||
|
from . import cli
|
||||||
|
|
||||||
from .morss import MorssException
|
from .morss import MorssException
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -32,8 +32,6 @@ def cli_app():
|
||||||
|
|
||||||
parser.add_argument('url', help='feed url')
|
parser.add_argument('url', help='feed url')
|
||||||
|
|
||||||
parser.add_argument('--post', action='store', type=str, metavar='STRING', help='POST request')
|
|
||||||
|
|
||||||
group = parser.add_argument_group('output')
|
group = parser.add_argument_group('output')
|
||||||
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
||||||
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
||||||
|
|
|
@ -16,33 +16,30 @@
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import zlib
|
|
||||||
from cgi import parse_header
|
|
||||||
from collections import OrderedDict
|
|
||||||
from io import BytesIO, StringIO
|
|
||||||
|
|
||||||
|
import zlib
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
import re
|
||||||
import chardet
|
import chardet
|
||||||
|
from cgi import parse_header
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import random
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
import mimetools
|
|
||||||
from urllib2 import (BaseHandler, HTTPCookieProcessor, Request, addinfourl,
|
|
||||||
build_opener, parse_http_list, parse_keqv_list)
|
|
||||||
from urlparse import urlparse, urlunparse
|
from urlparse import urlparse, urlunparse
|
||||||
|
import mimetools
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
|
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||||
|
from urllib.parse import quote
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
import email
|
import email
|
||||||
from urllib.parse import quote, urlparse, urlunparse
|
|
||||||
from urllib.request import (BaseHandler, HTTPCookieProcessor, Request,
|
|
||||||
addinfourl, build_opener, parse_http_list,
|
|
||||||
parse_keqv_list)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
@ -84,17 +81,14 @@ def get(*args, **kwargs):
|
||||||
return adv_get(*args, **kwargs)['data']
|
return adv_get(*args, **kwargs)['data']
|
||||||
|
|
||||||
|
|
||||||
def adv_get(url, post=None, timeout=None, *args, **kwargs):
|
def adv_get(url, timeout=None, *args, **kwargs):
|
||||||
url = sanitize_url(url)
|
url = sanitize_url(url)
|
||||||
|
|
||||||
if post is not None:
|
|
||||||
post = post.encode('utf-8')
|
|
||||||
|
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
con = custom_opener(*args, **kwargs).open(url, data=post)
|
con = custom_opener(*args, **kwargs).open(url)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
con = custom_opener(*args, **kwargs).open(url, data=post, timeout=timeout)
|
con = custom_opener(*args, **kwargs).open(url, timeout=timeout)
|
||||||
|
|
||||||
data = con.read()
|
data = con.read()
|
||||||
|
|
||||||
|
@ -623,7 +617,7 @@ class BaseCache:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
import sqlite3 # isort:skip
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
class SQLiteCache(BaseCache):
|
class SQLiteCache(BaseCache):
|
||||||
|
@ -660,7 +654,7 @@ class SQLiteCache(BaseCache):
|
||||||
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
||||||
|
|
||||||
|
|
||||||
import pymysql.cursors # isort:skip
|
import pymysql.cursors
|
||||||
|
|
||||||
|
|
||||||
class MySQLCacheHandler(BaseCache):
|
class MySQLCacheHandler(BaseCache):
|
||||||
|
|
|
@ -15,35 +15,35 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os.path
|
|
||||||
import sys
|
import sys
|
||||||
|
import os.path
|
||||||
|
|
||||||
sys.path.append('/home/paul/Documents/Code/morss/lib')
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from copy import deepcopy
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
|
||||||
import dateutil.parser
|
|
||||||
import lxml.html
|
|
||||||
from dateutil import tz
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from dateutil import tz
|
||||||
|
import dateutil.parser
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import lxml.html
|
||||||
from .readabilite import parse as html_parse
|
from .readabilite import parse as html_parse
|
||||||
|
|
||||||
json.encoder.c_make_encoder = None
|
json.encoder.c_make_encoder = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from ConfigParser import RawConfigParser
|
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
from ConfigParser import RawConfigParser
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from configparser import RawConfigParser
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
from configparser import RawConfigParser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
|
|
@ -16,25 +16,30 @@
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from dateutil import tz
|
||||||
|
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
import re
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
from dateutil import tz
|
|
||||||
|
|
||||||
from . import crawler, feeds, readabilite
|
from . import feeds
|
||||||
|
from . import crawler
|
||||||
|
from . import readabilite
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from httplib import HTTPException
|
from httplib import HTTPException
|
||||||
from urlparse import parse_qs, urljoin, urlparse
|
from urlparse import urlparse, urljoin, parse_qs
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from http.client import HTTPException
|
from http.client import HTTPException
|
||||||
from urllib.parse import parse_qs, urljoin, urlparse
|
from urllib.parse import urlparse, urljoin, parse_qs
|
||||||
|
|
||||||
|
|
||||||
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
||||||
|
@ -271,7 +276,7 @@ def FeedFetch(url, options):
|
||||||
delay = 0
|
delay = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = crawler.adv_get(url=url, post=options.post, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
||||||
|
|
||||||
except (IOError, HTTPException):
|
except (IOError, HTTPException):
|
||||||
raise MorssException('Error downloading feed')
|
raise MorssException('Error downloading feed')
|
||||||
|
|
|
@ -15,17 +15,17 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License along
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import cgitb
|
import sys
|
||||||
import mimetypes
|
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
import wsgiref.handlers
|
|
||||||
import wsgiref.simple_server
|
|
||||||
import wsgiref.util
|
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
|
|
||||||
|
import cgitb
|
||||||
|
import wsgiref.util
|
||||||
|
import wsgiref.simple_server
|
||||||
|
import wsgiref.handlers
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
@ -33,9 +33,11 @@ except ImportError:
|
||||||
# python 3
|
# python 3
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from . import crawler, readabilite
|
from . import crawler
|
||||||
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
|
from . import readabilite
|
||||||
MorssException, Options, log)
|
from .morss import FeedFetch, FeedGather, FeedFormat
|
||||||
|
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
||||||
|
|
||||||
|
|
||||||
PORT = int(os.getenv('PORT', 8080))
|
PORT = int(os.getenv('PORT', 8080))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue