crawler: use UPSERT statements

Avoid potential race conditions
morss: separate :clip with <hr> instead of stars
2020-05-03 21:27:45 +02:00 · 2020-05-02 19:19:54 +02:00 · 2020-05-02 19:18:58 +02:00
2 changed files with 7 additions and 18 deletions
--- a/morss/crawler.py
+++ b/morss/crawler.py
@ -585,14 +585,8 @@ class SQLiteCache(BaseCache):
        value[3] = sqlite3.Binary(value[3]) # data
        value = tuple(value)

-        if url in self:
-            with self.con:
-                self.con.execute('UPDATE data SET code=?, msg=?, headers=?, data=?, timestamp=? WHERE url=?',
-                    value + (url,))
-
-        else:
-            with self.con:
-                self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?)', (url,) + value)
+        with self.con:
+            self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)


 import pymysql.cursors
@ -622,14 +616,9 @@ class MySQLCacheHandler(BaseCache):
        return row[1:]

    def __setitem__(self, url, value): # (code, msg, headers, data, timestamp)
-        if url in self:
-            with self.cursor() as cursor:
-                cursor.execute('UPDATE data SET code=%s, msg=%s, headers=%s, data=%s, timestamp=%s WHERE url=%s',
-                    value + (url,))
-
-        else:
-            with self.cursor() as cursor:
-                cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
+        with self.cursor() as cursor:
+            cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE code=%s, msg=%s, headers=%s, data=%s, timestamp=%s',
+                (url,) + value + value)


 if __name__ == '__main__':
--- a/morss/morss.py
+++ b/morss/morss.py
@ -279,7 +279,7 @@ def ItemBefore(item, options):

 def ItemAfter(item, options):
    if options.clip and item.desc and item.content:
-        item.content = item.desc + "<br/><br/><center>* * *</center><br/><br/>" + item.content
+        item.content = item.desc + "<br/><br/><hr/><br/><br/>" + item.content
        del item.desc

    if options.nolink and item.content:
@ -303,7 +303,7 @@ def FeedFetch(url, options):
        delay = 0

    try:
-        req = crawler.adv_get(url=url, follow='rss', delay=delay, timeout=TIMEOUT * 2)
+        req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)

    except (IOError, HTTPException):
        raise MorssException('Error downloading feed')
Author	SHA1	Message	Date
pictuga	f685139137	crawler: use UPSERT statements Avoid potential race conditions	2020-05-03 21:27:45 +02:00
pictuga	73b477665e	morss: separate :clip with <hr> instead of stars	2020-05-02 19:19:54 +02:00
pictuga	b425992783	morss: don't follow alt=rss with custom feeds To have the same page as with :get=page and to avoid shitty feeds	2020-05-02 19:18:58 +02:00