Compare commits
No commits in common. "62f2346b3f601c774655f33d8b7b1599ebc8b9e3" and "71d9c7a027835da8edc6502391224f909ea65ec5" have entirely different histories.
62f2346b3f
...
71d9c7a027
|
@ -1,8 +0,0 @@
|
|||
kind: pipeline
|
||||
name: default
|
||||
|
||||
steps:
|
||||
- name: lint
|
||||
image: python
|
||||
commands:
|
||||
- isort --diff --color --recursive .
|
11
README.md
11
README.md
|
@ -263,11 +263,11 @@ arguments to morss is explained in Run above.
|
|||
The list of arguments can be obtained by running `morss --help`
|
||||
|
||||
```
|
||||
usage: morss [-h] [--post STRING] [--format {rss,json,html,csv}]
|
||||
[--search STRING] [--clip] [--indent] [--cache] [--force]
|
||||
[--proxy] [--newest] [--firstlink] [--resolve] [--items XPATH]
|
||||
[--item_link XPATH] [--item_title XPATH] [--item_content XPATH]
|
||||
[--item_time XPATH] [--nolink] [--noref] [--silent]
|
||||
usage: morss [-h] [--format {rss,json,html,csv}] [--search STRING] [--clip]
|
||||
[--indent] [--cache] [--force] [--proxy] [--newest] [--firstlink]
|
||||
[--resolve] [--items XPATH] [--item_link XPATH]
|
||||
[--item_title XPATH] [--item_content XPATH] [--item_time XPATH]
|
||||
[--nolink] [--noref] [--silent]
|
||||
url
|
||||
|
||||
Get full-text RSS feeds
|
||||
|
@ -277,7 +277,6 @@ positional arguments:
|
|||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--post STRING POST request
|
||||
|
||||
output:
|
||||
--format {rss,json,html,csv}
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
from . import cli, wsgi
|
||||
from . import wsgi
|
||||
from . import cli
|
||||
|
||||
from .morss import MorssException
|
||||
|
||||
|
||||
|
|
|
@ -32,8 +32,6 @@ def cli_app():
|
|||
|
||||
parser.add_argument('url', help='feed url')
|
||||
|
||||
parser.add_argument('--post', action='store', type=str, metavar='STRING', help='POST request')
|
||||
|
||||
group = parser.add_argument_group('output')
|
||||
group.add_argument('--format', default='rss', choices=('rss', 'json', 'html', 'csv'), help='output format')
|
||||
group.add_argument('--search', action='store', type=str, metavar='STRING', help='does a basic case-sensitive search in the feed')
|
||||
|
|
|
@ -16,33 +16,30 @@
|
|||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import zlib
|
||||
from cgi import parse_header
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
import zlib
|
||||
from io import BytesIO, StringIO
|
||||
import re
|
||||
import chardet
|
||||
from cgi import parse_header
|
||||
import time
|
||||
import threading
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
|
||||
try:
|
||||
# python 2
|
||||
from urllib2 import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||
from urllib import quote
|
||||
|
||||
import mimetools
|
||||
from urllib2 import (BaseHandler, HTTPCookieProcessor, Request, addinfourl,
|
||||
build_opener, parse_http_list, parse_keqv_list)
|
||||
from urlparse import urlparse, urlunparse
|
||||
import mimetools
|
||||
except ImportError:
|
||||
# python 3
|
||||
from urllib.request import BaseHandler, HTTPCookieProcessor, Request, addinfourl, parse_keqv_list, parse_http_list, build_opener
|
||||
from urllib.parse import quote
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import email
|
||||
from urllib.parse import quote, urlparse, urlunparse
|
||||
from urllib.request import (BaseHandler, HTTPCookieProcessor, Request,
|
||||
addinfourl, build_opener, parse_http_list,
|
||||
parse_keqv_list)
|
||||
|
||||
try:
|
||||
# python 2
|
||||
|
@ -84,17 +81,14 @@ def get(*args, **kwargs):
|
|||
return adv_get(*args, **kwargs)['data']
|
||||
|
||||
|
||||
def adv_get(url, post=None, timeout=None, *args, **kwargs):
|
||||
def adv_get(url, timeout=None, *args, **kwargs):
|
||||
url = sanitize_url(url)
|
||||
|
||||
if post is not None:
|
||||
post = post.encode('utf-8')
|
||||
|
||||
if timeout is None:
|
||||
con = custom_opener(*args, **kwargs).open(url, data=post)
|
||||
con = custom_opener(*args, **kwargs).open(url)
|
||||
|
||||
else:
|
||||
con = custom_opener(*args, **kwargs).open(url, data=post, timeout=timeout)
|
||||
con = custom_opener(*args, **kwargs).open(url, timeout=timeout)
|
||||
|
||||
data = con.read()
|
||||
|
||||
|
@ -623,7 +617,7 @@ class BaseCache:
|
|||
return True
|
||||
|
||||
|
||||
import sqlite3 # isort:skip
|
||||
import sqlite3
|
||||
|
||||
|
||||
class SQLiteCache(BaseCache):
|
||||
|
@ -660,7 +654,7 @@ class SQLiteCache(BaseCache):
|
|||
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
|
||||
|
||||
|
||||
import pymysql.cursors # isort:skip
|
||||
import pymysql.cursors
|
||||
|
||||
|
||||
class MySQLCacheHandler(BaseCache):
|
||||
|
|
|
@ -15,35 +15,35 @@
|
|||
# You should have received a copy of the GNU Affero General Public License along
|
||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import os.path
|
||||
|
||||
sys.path.append('/home/paul/Documents/Code/morss/lib')
|
||||
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
|
||||
import re
|
||||
import json
|
||||
import csv
|
||||
|
||||
from fnmatch import fnmatch
|
||||
|
||||
import dateutil.parser
|
||||
import lxml.html
|
||||
from dateutil import tz
|
||||
from lxml import etree
|
||||
from dateutil import tz
|
||||
import dateutil.parser
|
||||
from copy import deepcopy
|
||||
|
||||
import lxml.html
|
||||
from .readabilite import parse as html_parse
|
||||
|
||||
json.encoder.c_make_encoder = None
|
||||
|
||||
try:
|
||||
# python 2
|
||||
from ConfigParser import RawConfigParser
|
||||
from StringIO import StringIO
|
||||
from ConfigParser import RawConfigParser
|
||||
except ImportError:
|
||||
# python 3
|
||||
from configparser import RawConfigParser
|
||||
from io import StringIO
|
||||
from configparser import RawConfigParser
|
||||
|
||||
try:
|
||||
# python 2
|
||||
|
|
|
@ -16,25 +16,30 @@
|
|||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
import time
|
||||
from datetime import datetime
|
||||
from dateutil import tz
|
||||
|
||||
from fnmatch import fnmatch
|
||||
import re
|
||||
|
||||
import lxml.etree
|
||||
import lxml.html
|
||||
from dateutil import tz
|
||||
|
||||
from . import crawler, feeds, readabilite
|
||||
from . import feeds
|
||||
from . import crawler
|
||||
from . import readabilite
|
||||
|
||||
|
||||
try:
|
||||
# python 2
|
||||
from httplib import HTTPException
|
||||
from urlparse import parse_qs, urljoin, urlparse
|
||||
from urlparse import urlparse, urljoin, parse_qs
|
||||
except ImportError:
|
||||
# python 3
|
||||
from http.client import HTTPException
|
||||
from urllib.parse import parse_qs, urljoin, urlparse
|
||||
from urllib.parse import urlparse, urljoin, parse_qs
|
||||
|
||||
|
||||
MAX_ITEM = int(os.getenv('MAX_ITEM', 5)) # cache-only beyond
|
||||
|
@ -271,7 +276,7 @@ def FeedFetch(url, options):
|
|||
delay = 0
|
||||
|
||||
try:
|
||||
req = crawler.adv_get(url=url, post=options.post, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
||||
req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
|
||||
|
||||
except (IOError, HTTPException):
|
||||
raise MorssException('Error downloading feed')
|
||||
|
|
|
@ -15,17 +15,17 @@
|
|||
# You should have received a copy of the GNU Affero General Public License along
|
||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import cgitb
|
||||
import mimetypes
|
||||
import sys
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
import wsgiref.handlers
|
||||
import wsgiref.simple_server
|
||||
import wsgiref.util
|
||||
|
||||
import lxml.etree
|
||||
|
||||
import cgitb
|
||||
import wsgiref.util
|
||||
import wsgiref.simple_server
|
||||
import wsgiref.handlers
|
||||
import mimetypes
|
||||
|
||||
try:
|
||||
# python 2
|
||||
from urllib import unquote
|
||||
|
@ -33,9 +33,11 @@ except ImportError:
|
|||
# python 3
|
||||
from urllib.parse import unquote
|
||||
|
||||
from . import crawler, readabilite
|
||||
from .morss import (DELAY, TIMEOUT, FeedFetch, FeedFormat, FeedGather,
|
||||
MorssException, Options, log)
|
||||
from . import crawler
|
||||
from . import readabilite
|
||||
from .morss import FeedFetch, FeedGather, FeedFormat
|
||||
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
||||
|
||||
|
||||
PORT = int(os.getenv('PORT', 8080))
|
||||
|
||||
|
|
Loading…
Reference in New Issue