Compare commits

...

3 Commits

Author SHA1 Message Date
pictuga f685139137 crawler: use UPSERT statements
Avoid potential race conditions
2020-05-03 21:27:45 +02:00
pictuga 73b477665e morss: separate :clip with <hr> instead of stars 2020-05-02 19:19:54 +02:00
pictuga b425992783 morss: don't follow alt=rss with custom feeds
To have the same page as with :get=page and to avoid shitty feeds
2020-05-02 19:18:58 +02:00
2 changed files with 7 additions and 18 deletions

View File

@ -585,14 +585,8 @@ class SQLiteCache(BaseCache):
value[3] = sqlite3.Binary(value[3]) # data
value = tuple(value)
if url in self:
with self.con:
self.con.execute('UPDATE data SET code=?, msg=?, headers=?, data=?, timestamp=? WHERE url=?',
value + (url,))
else:
with self.con:
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?)', (url,) + value)
with self.con:
self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?) ON CONFLICT(url) DO UPDATE SET code=?, msg=?, headers=?, data=?, timestamp=?', (url,) + value + value)
import pymysql.cursors
@ -622,14 +616,9 @@ class MySQLCacheHandler(BaseCache):
return row[1:]
def __setitem__(self, url, value): # (code, msg, headers, data, timestamp)
if url in self:
with self.cursor() as cursor:
cursor.execute('UPDATE data SET code=%s, msg=%s, headers=%s, data=%s, timestamp=%s WHERE url=%s',
value + (url,))
else:
with self.cursor() as cursor:
cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
with self.cursor() as cursor:
cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE code=%s, msg=%s, headers=%s, data=%s, timestamp=%s',
(url,) + value + value)
if __name__ == '__main__':

View File

@ -279,7 +279,7 @@ def ItemBefore(item, options):
def ItemAfter(item, options):
if options.clip and item.desc and item.content:
item.content = item.desc + "<br/><br/><center>* * *</center><br/><br/>" + item.content
item.content = item.desc + "<br/><br/><hr/><br/><br/>" + item.content
del item.desc
if options.nolink and item.content:
@ -303,7 +303,7 @@ def FeedFetch(url, options):
delay = 0
try:
req = crawler.adv_get(url=url, follow='rss', delay=delay, timeout=TIMEOUT * 2)
req = crawler.adv_get(url=url, follow=('rss' if not options.items else None), delay=delay, timeout=TIMEOUT * 2)
except (IOError, HTTPException):
raise MorssException('Error downloading feed')