Python scrapy 添加随机请求头 fake_useragent模块
程序员文章站
2022-05-09 10:07:05
...
爬虫文件
拉钩页面没请求头访问不了
# -*- coding: utf-8 -*-
import scrapy
class TestSpider(scrapy.Spider):
name = 'test'
allowed_domains = ['www.baidu.com']
start_urls = ['https://www.lagou.com/jobs/5219979.html']
# 本地爬虫配置文件
custom_settings = {
'DOWNLOADER_MIDDLEWARES' : {
'Test_C.middlewares.Random_UA': 1,
}
}
def parse(self, response):
print('*_'*20)
print(response.css('span.c-gap-right::text').extract())
print(response.status)
print(response.headers)
print(response.text)
print('*_'*20)
自定义中间件 middleware
from scrapy import signals
from fake_useragent import UserAgent
class Random_UA(object):
def process_request(self,request,spider):
ua = UserAgent().random
request.headers.setdefault('User-Agent',ua)
上一篇: 有关对象处理的文章推荐10篇
下一篇: HTTP中Get和Post的区别