From f6851391374d1902c41fc10f9ef4428dcc7b9ffb Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 3 May 2020 21:27:45 +0200 Subject: [PATCH] crawler: use UPSERT statements Avoid potential race conditions --- morss/crawler.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/morss/crawler.py b/morss/crawler.py index 63f6c5d..2ea52cb 100644 --- a/morss/crawler.py +++ b/morss/crawler.py @@ -585,14 +585,8 @@ class SQLiteCache(BaseCache): value[3] = sqlite3.Binary(value[3]) # data value = tuple(value) - if url in self: - with self.con: - self.con.execute('UPDATE data SET code=?, msg=?, headers=?, data=?, timestamp=? WHERE url=?', - value + (url,)) - - else: - with self.con: - self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?)', (url,) + value) + with self.con: + self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value) import pymysql.cursors @@ -622,14 +616,9 @@ class MySQLCacheHandler(BaseCache): return row[1:] def __setitem__(self, url, value): # (code, msg, headers, data, timestamp) - if url in self: - with self.cursor() as cursor: - cursor.execute('UPDATE data SET code=%s, msg=%s, headers=%s, data=%s, timestamp=%s WHERE url=%s', - value + (url,)) - - else: - with self.cursor() as cursor: - cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value) + with self.cursor() as cursor: + cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE code=%s, msg=%s, headers=%s, data=%s, timestamp=%s', + (url,) + value + value) if __name__ == '__main__':