README: improve docker instructions

Various small bug fixes
Remove leftover threading var
2020-04-19 13:01:08 +02:00 · 2020-04-19 12:54:02 +02:00 · 2020-04-19 12:51:11 +02:00 · 2020-04-19 12:50:26 +02:00 · 2020-04-19 12:50:05 +02:00
6 changed files with 15 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -108,7 +108,6 @@ morss will auto-detect what "mode" to use.
 For this, you'll want to change a bit the architecture of the files, for example
 into something like this.

-
 ```
 /
 ├── cgi
@@ -151,20 +150,19 @@ gunicorn morss:cgi_standalone_app

 #### Using docker

-Build
+Build & run

 ```shell
-docker build https://git.pictuga.com/pictuga/morss.git
+docker build https://git.pictuga.com/pictuga/morss.git -t morss
+docker run -p 8080:8080 morss
 ```

-Run & Build in one go
+In one line

 ```shell
 docker run -p 8080:8080 $(docker build -q https://git.pictuga.com/pictuga/morss.git)
 ```

-It will run on port 8080 by default
-
 #### Using morss' internal HTTP server

 Morss can run its own HTTP server. The later should start when you run morss
@@ -256,9 +254,10 @@ output = morss.Format(rss, options) # formats final feed

 ## Cache information

-morss uses caching to make loading faster. There are 2 possible cache backends
+morss uses caching to make loading faster. There are 3 possible cache backends
 (visible in `morss/crawler.py`):

+- `{}`: a simple python in-memory dict() object
 - `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will
 be cleared every time the program is run
 - `MySQLCacheHandler`
--- a/morss/crawler.py
+++ b/morss/crawler.py
@@ -72,7 +72,6 @@ def custom_handler(follow=None, delay=None, encoding=None):
    handlers.append(HTTPRefreshHandler())
    handlers.append(UAHandler(DEFAULT_UA))
    handlers.append(BrowserlyHeaderHandler())
-
    handlers.append(EncodingFixHandler(encoding))

    if follow:
@@ -466,6 +465,8 @@ class CacheHandler(BaseHandler):


 class BaseCache:
+    """ Subclasses must behave like a dict """
+
    def __contains__(self, url):
        try:
            self[url]
--- a/morss/feedify.ini
+++ b/morss/feedify.ini
@@ -102,7 +102,7 @@ item_link = ./a/@href
 item_desc = ./div[class=desc]
 item_content = ./div[class=content]

-base = file:www/sheet.xsl
+base = file:sheet.xsl

 [twitter]
 mode = html
--- a/morss/feeds.py
+++ b/morss/feeds.py
@@ -85,7 +85,7 @@ def parse(data, url=None, mimetype=None, encoding=None):
                for path in ruleset['path']:
                    if fnmatch(url, path):
                        parser = [x for x in parsers if x.mode == ruleset['mode']][0]
-                        return parser(data, ruleset, encoding=encoding) 
+                        return parser(data, ruleset, encoding=encoding)

    # 2) Try each and every parser

--- a/morss/morss.py
+++ b/morss/morss.py
@@ -40,7 +40,6 @@ LIM_TIME = 2.5  # deletes what's after

 DELAY = 10 * 60  # xml cache & ETag cache (in sec)
 TIMEOUT = 4  # http timeout (in sec)
-THREADS = MAX_ITEM  # number of threads (1 for single-threaded)

 DEBUG = False
 PORT = 8080
@@ -137,7 +136,7 @@ def ItemFix(item, feedurl='/'):
    """ Improves feed items (absolute links, resolve feedburner links, etc) """

    # check unwanted uppercase title
-    if len(item.title) > 20 and item.title.isupper():
+    if item.title is not None and len(item.title) > 20 and item.title.isupper():
        item.title = item.title.title()

    # check if it includes link
@@ -200,7 +199,7 @@ def ItemFix(item, feedurl='/'):

    # reddit
    if urlparse(feedurl).netloc == 'www.reddit.com':
-        match = lxml.html.fromstring(item.desc).xpath('//a[text()="[link]"]/@href')
+        match = lxml.html.fromstring(item.content).xpath('//a[text()="[link]"]/@href')
        if len(match):
            item.link = match[0]
            log(item.link)
@@ -550,7 +549,7 @@ def cgi_app(environ, start_response):

 def middleware(func):
    " Decorator to turn a function into a wsgi middleware "
-    # This is called when parsing the code
+    # This is called when parsing the "@middleware" code

    def app_builder(app):
        # This is called when doing app = cgi_wrapper(app)
@@ -620,7 +619,7 @@ def cgi_get(environ, start_response):
    if urlparse(url).scheme not in ['http', 'https']:
        url = 'http://' + url

-    data, con, contenttype, encoding = crawler.adv_get(url=url)
+    data, con, contenttype, encoding = crawler.adv_get(url=url, timeout=TIMEOUT)

    if contenttype in ['text/html', 'application/xhtml+xml', 'application/xml']:
        if options.get == 'page':
--- a/morss/readabilite.py
+++ b/morss/readabilite.py
@@ -137,7 +137,7 @@ def score_all(node):

    for child in node:
        score = score_node(child)
-        child.attrib['seen'] = 'yes, ' + str(int(score))
+        child.attrib['morss_own_score'] = str(float(score))

        if score > 0 or len(list(child.iterancestors())) <= 2:
            spread_score(child, score)
Author	SHA1	Message	Date
pictuga	41a63900c2	README: improve docker instructions	2020-04-19 13:01:08 +02:00
pictuga	ec8edb02f1	Various small bug fixes	2020-04-19 12:54:02 +02:00
pictuga	d01b943597	Remove leftover threading var	2020-04-19 12:51:11 +02:00
pictuga	b361aa2867	Add timeout to :get	2020-04-19 12:50:26 +02:00
pictuga	4ce3c7cb32	Small code clean ups	2020-04-19 12:50:05 +02:00