Compare commits
6 Commits
bbada0436a
...
9ce6acba20
Author | SHA1 | Date |
---|---|---|
pictuga | 9ce6acba20 | |
pictuga | 6192ff4081 | |
pictuga | 056a1b143f | |
pictuga | eed949736a | |
pictuga | 2fc7cd391c | |
pictuga | d9f46b23a6 |
|
@ -5,4 +5,4 @@ RUN apk add python3 py3-lxml py3-gunicorn py3-pip git
|
||||||
ADD . /app
|
ADD . /app
|
||||||
RUN pip3 install /app
|
RUN pip3 install /app
|
||||||
|
|
||||||
CMD gunicorn --bind 0.0.0.0:8080 -w 4 morss
|
CMD gunicorn --bind 0.0.0.0:8080 -w 4 --preload morss
|
||||||
|
|
|
@ -186,7 +186,7 @@ uwsgi --http :8080 --plugin python --wsgi-file main.py
|
||||||
#### Using Gunicorn
|
#### Using Gunicorn
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
gunicorn morss
|
gunicorn --preload morss
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Using docker
|
#### Using docker
|
||||||
|
@ -316,6 +316,13 @@ will be cleared every time the program is run). Path can be defined with
|
||||||
- `CACHE=mysql`: MySQL cache. Connection can be defined with the following
|
- `CACHE=mysql`: MySQL cache. Connection can be defined with the following
|
||||||
environment variables: `MYSQL_USER`, `MYSQL_PWD`, `MYSQL_DB`, `MYSQL_HOST`
|
environment variables: `MYSQL_USER`, `MYSQL_PWD`, `MYSQL_DB`, `MYSQL_HOST`
|
||||||
|
|
||||||
|
To limit the siz of the cache:
|
||||||
|
' `CACHE_SIZE` sets the target number of items in the cache (further items will
|
||||||
|
be deleted but the cache migth be temporarily bigger than that). Defaults to 10k
|
||||||
|
entries.
|
||||||
|
- `CACHE_LIFESPAN` sets how often the cache must be trimmed (i.e. cut down to
|
||||||
|
the number of items set in `CACHE_SIZE`). Defaults to 1hr.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
### Length limitation
|
### Length limitation
|
||||||
|
|
||||||
|
|
|
@ -25,36 +25,15 @@ from . import cli
|
||||||
|
|
||||||
from .morss import MorssException
|
from .morss import MorssException
|
||||||
|
|
||||||
import wsgiref.simple_server
|
|
||||||
import wsgiref.handlers
|
|
||||||
|
|
||||||
|
|
||||||
PORT = int(os.getenv('PORT', 8080))
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if 'REQUEST_URI' in os.environ:
|
if 'REQUEST_URI' in os.environ:
|
||||||
# mod_cgi (w/o file handler)
|
# mod_cgi (w/o file handler)
|
||||||
|
wsgi.cgi_handle_request()
|
||||||
app = wsgi.cgi_app
|
|
||||||
app = wsgi.cgi_dispatcher(app)
|
|
||||||
app = wsgi.cgi_error_handler(app)
|
|
||||||
app = wsgi.cgi_encode(app)
|
|
||||||
|
|
||||||
wsgiref.handlers.CGIHandler().run(app)
|
|
||||||
|
|
||||||
elif len(sys.argv) <= 1:
|
elif len(sys.argv) <= 1:
|
||||||
# start internal (basic) http server (w/ file handler)
|
# start internal (basic) http server (w/ file handler)
|
||||||
|
wsgi.cgi_start_server()
|
||||||
app = wsgi.cgi_app
|
|
||||||
app = wsgi.cgi_file_handler(app)
|
|
||||||
app = wsgi.cgi_dispatcher(app)
|
|
||||||
app = wsgi.cgi_error_handler(app)
|
|
||||||
app = wsgi.cgi_encode(app)
|
|
||||||
|
|
||||||
print('Serving http://localhost:%s/' % PORT)
|
|
||||||
httpd = wsgiref.simple_server.make_server('', PORT, app)
|
|
||||||
httpd.serve_forever()
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# as a CLI app
|
# as a CLI app
|
||||||
|
|
|
@ -25,7 +25,9 @@ import chardet
|
||||||
from cgi import parse_header
|
from cgi import parse_header
|
||||||
import lxml.html
|
import lxml.html
|
||||||
import time
|
import time
|
||||||
|
import threading
|
||||||
import random
|
import random
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 2
|
# python 2
|
||||||
|
@ -48,6 +50,10 @@ except NameError:
|
||||||
basestring = unicode = str
|
basestring = unicode = str
|
||||||
|
|
||||||
|
|
||||||
|
CACHE_SIZE = int(os.getenv('CACHE_SIZE', 10000)) # max number of items in cache (default: 10k items)
|
||||||
|
CACHE_LIFESPAN = int(os.getenv('CACHE_LIFESPAN', 60*60)) # how often to auto-clear the cache (default: 1hr)
|
||||||
|
|
||||||
|
|
||||||
# uncomment the lines below to ignore SSL certs
|
# uncomment the lines below to ignore SSL certs
|
||||||
#import ssl
|
#import ssl
|
||||||
#ssl._create_default_https_context = ssl._create_unverified_context
|
#ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
@ -605,6 +611,18 @@ class CacheHandler(BaseHandler):
|
||||||
class BaseCache:
|
class BaseCache:
|
||||||
""" Subclasses must behave like a dict """
|
""" Subclasses must behave like a dict """
|
||||||
|
|
||||||
|
def trim(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def autotrim(self, delay=CACHE_LIFESPAN):
|
||||||
|
# trim the cache every so often
|
||||||
|
|
||||||
|
self.trim()
|
||||||
|
|
||||||
|
t = threading.Timer(delay, self.autotrim)
|
||||||
|
t.daemon = True
|
||||||
|
t.start()
|
||||||
|
|
||||||
def __contains__(self, url):
|
def __contains__(self, url):
|
||||||
try:
|
try:
|
||||||
self[url]
|
self[url]
|
||||||
|
@ -627,9 +645,15 @@ class SQLiteCache(BaseCache):
|
||||||
self.con.execute('CREATE TABLE IF NOT EXISTS data (url UNICODE PRIMARY KEY, code INT, msg UNICODE, headers UNICODE, data BLOB, timestamp INT)')
|
self.con.execute('CREATE TABLE IF NOT EXISTS data (url UNICODE PRIMARY KEY, code INT, msg UNICODE, headers UNICODE, data BLOB, timestamp INT)')
|
||||||
self.con.execute('pragma journal_mode=WAL')
|
self.con.execute('pragma journal_mode=WAL')
|
||||||
|
|
||||||
|
self.trim()
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.con.close()
|
self.con.close()
|
||||||
|
|
||||||
|
def trim(self):
|
||||||
|
with self.con:
|
||||||
|
self.con.execute('DELETE FROM data WHERE timestamp <= ( SELECT timestamp FROM ( SELECT timestamp FROM data ORDER BY timestamp DESC LIMIT 1 OFFSET ? ) foo )', (CACHE_SIZE,))
|
||||||
|
|
||||||
def __getitem__(self, url):
|
def __getitem__(self, url):
|
||||||
row = self.con.execute('SELECT * FROM data WHERE url=?', (url,)).fetchone()
|
row = self.con.execute('SELECT * FROM data WHERE url=?', (url,)).fetchone()
|
||||||
|
|
||||||
|
@ -660,9 +684,15 @@ class MySQLCacheHandler(BaseCache):
|
||||||
with self.cursor() as cursor:
|
with self.cursor() as cursor:
|
||||||
cursor.execute('CREATE TABLE IF NOT EXISTS data (url VARCHAR(255) NOT NULL PRIMARY KEY, code INT, msg TEXT, headers TEXT, data BLOB, timestamp INT)')
|
cursor.execute('CREATE TABLE IF NOT EXISTS data (url VARCHAR(255) NOT NULL PRIMARY KEY, code INT, msg TEXT, headers TEXT, data BLOB, timestamp INT)')
|
||||||
|
|
||||||
|
self.trim()
|
||||||
|
|
||||||
def cursor(self):
|
def cursor(self):
|
||||||
return pymysql.connect(host=self.host, user=self.user, password=self.password, database=self.database, charset='utf8', autocommit=True).cursor()
|
return pymysql.connect(host=self.host, user=self.user, password=self.password, database=self.database, charset='utf8', autocommit=True).cursor()
|
||||||
|
|
||||||
|
def trim(self):
|
||||||
|
with self.cursor() as cursor:
|
||||||
|
cursor.execute('DELETE FROM data WHERE timestamp <= ( SELECT timestamp FROM ( SELECT timestamp FROM data ORDER BY timestamp DESC LIMIT 1 OFFSET %s ) foo )', (CACHE_SIZE,))
|
||||||
|
|
||||||
def __getitem__(self, url):
|
def __getitem__(self, url):
|
||||||
cursor = self.cursor()
|
cursor = self.cursor()
|
||||||
cursor.execute('SELECT * FROM data WHERE url=%s', (url,))
|
cursor.execute('SELECT * FROM data WHERE url=%s', (url,))
|
||||||
|
@ -679,20 +709,39 @@ class MySQLCacheHandler(BaseCache):
|
||||||
(url,) + value + value)
|
(url,) + value + value)
|
||||||
|
|
||||||
|
|
||||||
|
class CappedDict(OrderedDict, BaseCache):
|
||||||
|
def trim(self):
|
||||||
|
if CACHE_SIZE >= 0:
|
||||||
|
for i in range( max( len(self) - CACHE_SIZE , 0 )):
|
||||||
|
self.popitem(False)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
# https://docs.python.org/2/library/collections.html#ordereddict-examples-and-recipes
|
||||||
|
if key in self:
|
||||||
|
del self[key]
|
||||||
|
OrderedDict.__setitem__(self, key, value)
|
||||||
|
|
||||||
|
|
||||||
if 'CACHE' in os.environ:
|
if 'CACHE' in os.environ:
|
||||||
if os.environ['CACHE'] == 'mysql':
|
if os.environ['CACHE'] == 'mysql':
|
||||||
default_cache = MySQLCacheHandler(
|
default_cache = MySQLCacheHandler(
|
||||||
user = os.getenv('MYSQL_USER'),
|
user = os.getenv('MYSQL_USER'),
|
||||||
password = os.getenv('MYSQL_PWD'),
|
password = os.getenv('MYSQL_PWD'),
|
||||||
database = os.getenv('MYSQL_DB'),
|
database = os.getenv('MYSQL_DB'),
|
||||||
host = os.getenv('MYSQL_HOST')
|
host = os.getenv('MYSQL_HOST', 'localhost')
|
||||||
)
|
)
|
||||||
|
|
||||||
elif os.environ['CACHE'] == 'sqlite':
|
elif os.environ['CACHE'] == 'sqlite':
|
||||||
default_cache = SQLiteCache(os.getenv('SQLITE_PATH', ':memory:'))
|
if 'SQLITE_PATH' in os.environ:
|
||||||
|
path = os.getenv('SQLITE_PATH') + '/morss-cache.db'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
default_cache = {}
|
path = ':memory:'
|
||||||
|
|
||||||
|
default_cache = SQLiteCache(path)
|
||||||
|
|
||||||
|
else:
|
||||||
|
default_cache = CappedDict()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -22,6 +22,8 @@ import lxml.etree
|
||||||
|
|
||||||
import cgitb
|
import cgitb
|
||||||
import wsgiref.util
|
import wsgiref.util
|
||||||
|
import wsgiref.simple_server
|
||||||
|
import wsgiref.handlers
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -37,6 +39,9 @@ from .morss import FeedFetch, FeedGather, FeedFormat
|
||||||
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
from .morss import Options, log, TIMEOUT, DELAY, MorssException
|
||||||
|
|
||||||
|
|
||||||
|
PORT = int(os.getenv('PORT', 8080))
|
||||||
|
|
||||||
|
|
||||||
def parse_options(options):
|
def parse_options(options):
|
||||||
""" Turns ['md=True'] into {'md':True} """
|
""" Turns ['md=True'] into {'md':True} """
|
||||||
out = {}
|
out = {}
|
||||||
|
@ -267,3 +272,24 @@ application = cgi_file_handler(application)
|
||||||
application = cgi_dispatcher(application)
|
application = cgi_dispatcher(application)
|
||||||
application = cgi_error_handler(application)
|
application = cgi_error_handler(application)
|
||||||
application = cgi_encode(application)
|
application = cgi_encode(application)
|
||||||
|
|
||||||
|
|
||||||
|
def cgi_handle_request():
|
||||||
|
app = cgi_app
|
||||||
|
app = cgi_dispatcher(app)
|
||||||
|
app = cgi_error_handler(app)
|
||||||
|
app = cgi_encode(app)
|
||||||
|
|
||||||
|
wsgiref.handlers.CGIHandler().run(app)
|
||||||
|
|
||||||
|
|
||||||
|
def cgi_start_server():
|
||||||
|
crawler.default_cache.autotrim()
|
||||||
|
|
||||||
|
print('Serving http://localhost:%s/' % PORT)
|
||||||
|
httpd = wsgiref.simple_server.make_server('', PORT, application)
|
||||||
|
httpd.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
|
if 'gunicorn' in os.getenv('SERVER_SOFTWARE', ''):
|
||||||
|
crawler.default_cache.autotrim()
|
||||||
|
|
Loading…
Reference in New Issue