关于Scrapy异步(使用连接池)对数据库插入的代码
程序员文章站
2022-05-10 19:58:02
...
在setting中设置数据库连接信息
ITEM_PIPELINES = {
'projectname.pipelines.MySQLTwistedPipeline': 300 ,
}
HOST='127.0.0.1'
MYSQL_USER = 'root'
MYSQL_PASSWD = 'root'
MYSQL_DBNAME='dbname'
MYSQL_CHARSET = 'utf8'
MYSQL_PORT = 3306
在pipline中编写代码
from twisted.enterprise import adbapi # 异步的连接池类
import MySQLdb
from MySQLdb import cursors
class mysqlTwistedpipline(object):
def __init__(self,dbpool):
self.dbpool=dbpool
“”“
获取setting中的配置
”“”
@classmethod
def from_settings(cls,settings):
dbparms = dict(
host = settings['HOST'],
user = settings['MYSQL_USER'],
passwd = settings['MYSQL_PASSWORD'],
db = settings['MYSQL_DBNAME'],
charset=settings['MYSQL_CHARSET'],
port=settings['MYSQL_PORT'],
cursorclass=MySQLdb.cursors.DictCursor,
use_unicode=True
)
dbpool = adbapi.ConnectionPool('MySQLdb',**dbparms)
return cls(dbpool)
def process_item(self, item, spider):
# 使用twisted将mysql的插入变成为异步的
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.handle_error) # 处理异步异常
def handle_error(self,failure):
# 处理异步插入异常
print(failure)
# 实际我们以后只有需要修改这个函数的逻辑代码
def do_insert(self,cursor,item):
# 执行具体的插入
insert_sql = """
insert into 表名(列名1,列名2) VALUES(%s,%s)
"""
cursor.execute(insert_sql,(item['列名1'],item['列名2']))
如果想用Django的model 可参考 djangoitem