From ec8edb02f11bd5ef2b964541ac8caf752b9cafb3 Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 19 Apr 2020 12:54:02 +0200 Subject: [PATCH] Various small bug fixes --- README.md | 3 ++- morss/crawler.py | 2 ++ morss/feedify.ini | 2 +- morss/morss.py | 4 ++-- morss/readabilite.py | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 88ff95b..e776bb3 100644 --- a/README.md +++ b/README.md @@ -255,9 +255,10 @@ output = morss.Format(rss, options) # formats final feed ## Cache information -morss uses caching to make loading faster. There are 2 possible cache backends +morss uses caching to make loading faster. There are 3 possible cache backends (visible in `morss/crawler.py`): +- `{}`: a simple python in-memory dict() object - `SQLiteCache`: sqlite3 cache. Default file location is in-memory (i.e. it will be cleared every time the program is run - `MySQLCacheHandler` diff --git a/morss/crawler.py b/morss/crawler.py index 790c67d..18795f1 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -465,6 +465,8 @@ class CacheHandler(BaseHandler): class BaseCache: + """ Subclasses must behave like a dict """ + def __contains__(self, url): try: self[url] diff --git a/morss/feedify.ini b/morss/feedify.ini index 291f21e..34ae831 100644 --- a/morss/feedify.ini +++ b/morss/feedify.ini @@ -102,7 +102,7 @@ item_link = ./a/@href item_desc = ./div[class=desc] item_content = ./div[class=content] -base = file:www/sheet.xsl +base = file:sheet.xsl [twitter] mode = html diff --git a/morss/morss.py b/morss/morss.py index d49898f..4f4b9af 100644 --- a/morss/morss.py +++ b/morss/morss.py @@ -136,7 +136,7 @@ def ItemFix(item, feedurl='/'): """ Improves feed items (absolute links, resolve feedburner links, etc) """ # check unwanted uppercase title - if len(item.title) > 20 and item.title.isupper(): + if item.title is not None and len(item.title) > 20 and item.title.isupper(): item.title = item.title.title() # check if it includes link @@ -199,7 +199,7 @@ def ItemFix(item, feedurl='/'): # reddit if urlparse(feedurl).netloc == 'www.reddit.com': - match = lxml.html.fromstring(item.desc).xpath('//a[text()="[link]"]/@href') + match = lxml.html.fromstring(item.content).xpath('//a[text()="[link]"]/@href') if len(match): item.link = match[0] log(item.link) diff --git a/morss/readabilite.py b/morss/readabilite.py index ff0e1d1..7cafcea 100644 --- a/morss/readabilite.py +++ b/morss/readabilite.py @@ -137,7 +137,7 @@ def score_all(node): for child in node: score = score_node(child) - child.attrib['seen'] = 'yes, ' + str(int(score)) + child.attrib['morss_own_score'] = str(float(score)) if score > 0 or len(list(child.iterancestors())) <= 2: spread_score(child, score)