Scrapy MongoDB异步插入
程序员文章站
2022-05-11 08:16:36
...
MongoDB 异步插入写法
由于Scrapy 是异步执行,写入数据库时如果用传统的写入方法,这样会拖慢速度
在settings.py
添加MONGO_URI
, MONGO_DB
,MONGO_COL
MONGO_URI = 'mongodb://127.0.0.1:27017/'
MONGO_DB = '数据库名'
在pipelines.py
中:
import pymongo
from twisted.internet import reactor, defer
class MongoPipline(object):
"""
异步插入MongoDB
"""
def __init__(self, mongo_uri, mongo_db, mongo_col):
self.mongo_uri = mongo_uri
self.mongo_db = mongo_db
self.mongo_col = mongo_col
@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_uri=crawler.settings.get('MONGO_URI', 'mongodb://127.0.0.1:27017/'),
mongo_db=crawler.settings.get('MONGO_DB'),
mongo_db=crawler.settings.get('MONGO_COL'),
)
def open_spider(self, spider):
"""
爬虫启动时,启动
:param spider:
:return:
"""
self.client = pymongo.MongoClient(self.mongo_uri)
self.mongodb = self.client[self.mongo_db]
def close_spider(self, spider):
"""
爬虫关闭时执行
:param spider:
:return:
"""
self.client.close()
@defer.inlineCallbacks
def process_item(self, item, spider):
out = defer.Deferred()
reactor.callInThread(self._insert, item, out, spider)
yield out
defer.returnValue(item)
def _insert(self, item, out, spider):
"""
插入函数
:param item:
:param out:
:return:
"""
self.mongodb[self.mongo_col].insert(dict(item))
reactor.callFromThread(out.callback, item)
上一篇: CSS3扁平化Loading动画特效
下一篇: CSS3总结(干货)