crawler: add MySQL backend
With extra dependency
This commit is contained in:
		@@ -36,6 +36,7 @@ You do need:
 | 
			
		||||
- [wheezy.template](https://pypi.python.org/pypi/wheezy.template) to generate HTML pages
 | 
			
		||||
- [chardet](https://pypi.python.org/pypi/chardet)
 | 
			
		||||
- [six](https://pypi.python.org/pypi/six), a dependency of chardet
 | 
			
		||||
- pymysql
 | 
			
		||||
 | 
			
		||||
Simplest way to get these:
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -517,3 +517,36 @@ class SQLiteCache(BaseCache):
 | 
			
		||||
                self.con.execute('INSERT INTO data VALUES (?,?,?,?,?,?)', (url,) + value)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pymysql.cursors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MySQLCacheHandler(BaseCache):
 | 
			
		||||
    " NB. Requires mono-threading, as pymysql doesn't isn't thread-safe "
 | 
			
		||||
    def __init__(self, user, password, database, host='localhost'):
 | 
			
		||||
        self.con = pymysql.connect(host=host, user=user, password=password, database=database, charset='utf8', autocommit=True)
 | 
			
		||||
 | 
			
		||||
        with self.con.cursor() as cursor:
 | 
			
		||||
            cursor.execute('CREATE TABLE IF NOT EXISTS data (url VARCHAR(255) NOT NULL PRIMARY KEY, code INT, msg TEXT, headers TEXT, data BLOB, timestamp INT)')
 | 
			
		||||
 | 
			
		||||
    def __del__(self):
 | 
			
		||||
        self.con.close()
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, url):
 | 
			
		||||
        cursor = self.con.cursor()
 | 
			
		||||
        cursor.execute('SELECT * FROM data WHERE url=%s', (url,))
 | 
			
		||||
        row = cursor.fetchone()
 | 
			
		||||
 | 
			
		||||
        if not row:
 | 
			
		||||
            raise KeyError
 | 
			
		||||
 | 
			
		||||
        return row[1:]
 | 
			
		||||
 | 
			
		||||
    def __setitem__(self, url, value): # (code, msg, headers, data, timestamp)
 | 
			
		||||
        if url in self:
 | 
			
		||||
            with self.con.cursor() as cursor:
 | 
			
		||||
                cursor.execute('UPDATE data SET code=%s, msg=%s, headers=%s, data=%s, timestamp=%s WHERE url=%s',
 | 
			
		||||
                    value + (url,))
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            with self.con.cursor() as cursor:
 | 
			
		||||
                cursor.execute('INSERT INTO data VALUES (%s,%s,%s,%s,%s,%s)', (url,) + value)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,3 +4,4 @@ html2text
 | 
			
		||||
ordereddict
 | 
			
		||||
wheezy.template
 | 
			
		||||
chardet
 | 
			
		||||
pymysql
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user