欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

关于Scrapy异步(使用连接池)对数据库插入的代码

程序员文章站 2022-05-10 19:58:02
...

在setting中设置数据库连接信息

ITEM_PIPELINES = {
    'projectname.pipelines.MySQLTwistedPipeline': 300 ,
}
HOST='127.0.0.1'
MYSQL_USER = 'root'
MYSQL_PASSWD = 'root'
MYSQL_DBNAME='dbname'
MYSQL_CHARSET = 'utf8'
MYSQL_PORT = 3306

在pipline中编写代码

from twisted.enterprise import adbapi   # 异步的连接池类
import MySQLdb
from MySQLdb import cursors

class mysqlTwistedpipline(object):
    def __init__(self,dbpool):
        self.dbpool=dbpool
	
	“”“
	获取setting中的配置
	”“”
    @classmethod
    def from_settings(cls,settings):  
        dbparms = dict(
            host = settings['HOST'],
            user = settings['MYSQL_USER'],
            passwd = settings['MYSQL_PASSWORD'],
            db = settings['MYSQL_DBNAME'],
            charset=settings['MYSQL_CHARSET'],
            port=settings['MYSQL_PORT'],
            cursorclass=MySQLdb.cursors.DictCursor,
            use_unicode=True
         )
        dbpool = adbapi.ConnectionPool('MySQLdb',**dbparms)
        return cls(dbpool)
        
    def process_item(self, item, spider):
    	# 使用twisted将mysql的插入变成为异步的
        query = self.dbpool.runInteraction(self.do_insert,item)
        query.addErrback(self.handle_error) # 处理异步异常

    def handle_error(self,failure):
   		# 处理异步插入异常
        print(failure) 
	
	# 实际我们以后只有需要修改这个函数的逻辑代码
    def do_insert(self,cursor,item):
		# 执行具体的插入
        insert_sql = """
        insert into 表名(列名1,列名2) VALUES(%s,%s)
        """ 
        cursor.execute(insert_sql,(item['列名1'],item['列名2']))
如果想用Django的model 可参考 djangoitem

https://github.com/scrapy-plugins/scrapy-djangoitem

相关标签: python