利用Request.meta属性给回调函数传参
程序员文章站
2022-05-11 18:12:37
...
知道这个,但一直不清楚怎么用,这里借鉴了网上朋友的例子
Requset.meta语句
#回调函数
def parse(self, response):
#xpath解析并在选择器对象中遍历
for sel in response.xpath('//li[@class="clearfix"]/div[@class="list_con"]'):
#实例化item
item=DmozItem()
#转换为str格式
item['href']=sel.xpath('h2/a/@href').extract()[0]
#二级爬取,调用回调函数
request= scrapy.Request(item['href'], callback=others_parse,dont_filter=True)
#把实例化的item传给meta属性,否则二级回调函数无法使用(或二级函数赋值无法传出来)
request.meta['item'] = item
#返回
yield request
#二级回调
def others_parse(self, response):
#声明
item = response.meta['item']
#赋值
item['other_url'] = response.url
#返回
yield item
写入到scrapy.Request
中
网上还有写到Request方法里的,但我尝试不起作用,目前还不清楚原因
def parse(self, response):
selector = Selector(response)
item = YumSpiderItem()
ret1 = selector.xpath("//a[contains(@href,'kube')]/@href").extract()
for urls in ret1:
dir1 = urls[11:]
next_url = self.base_url + urls + '/repodata'
next_dir = self.repo_dir + dir1 + '\\repodata'
item["repodirs"] = next_dir
item["repourls"] = next_url
request = scrapy.Request(
next_url,
#写到这里
meta=request.meta['item'],
callback=self.parse_next
)
request.meta["item"] = item
yield request
def parse_next(self, response):
selector = Selector(response)
item = response.meta['item']
ret2 = selector.xpath("//body/a/text()").extract()
for filename in ret2:
file_url = item['repourls'] + filename
item["filenames"] = filename
item["file_urls"] = file_url
yield item