scrapy
程序员文章站
2022-03-02 22:29:32
...
编写middleware.py 文件中的类
from fake_useragent import UserAgent
class RandomUserAgentMiddleware(object):
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def process_requests(self, request, spider):
request.headers.setdefault("User-Agent", self.ua.random)
为每个spider配置私有配置
class MySpider(scrapy.Spider):
name = 'myspider'
custom_settings = {
'SOME_SETTING': 'some value',
}
# 这个优先级要比settings.py中的要高,通过custom_settings中的配置会覆盖settings.py中的配置。
转载于:https://www.jianshu.com/p/ce88a24ac053
上一篇: 数据结构思维 第十五章 爬取*