房多多scrapy爬虫实例
程序员文章站
2022-05-06 18:47:21
...
# -*- coding: utf-8 -*-
import scrapy
import os
import sys
sys.path.append("C:/Users/***/scrapy/fhdodo")
from fhdodo.items import FhdodoItem
class FhdoSpider(scrapy.Spider):
name = 'fhdo'
allowed_domains = ['https://suzhou.fangdd.com/']
start_urls = []
host = 'https://suzhou.fangdd.com/esf-a0-a150_s1-s2_l70_x0/?pageNo={}'
count = 1
while count <31:
url = host.format(str(count))
start_urls.append(url)
count = count+1
def parse(self, response):
teacher_list = response.xpath("//li[@class='LpList-item']")
for each in teacher_list:
item = FhdodoItem()
whvi = each.xpath("./a/@href").extract()
title = each.xpath("./div[@class='LpList-cont']/h4/a/span/text()").extract()
info = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-type']/text()").extract()
addr = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-address ellipsis']/a/text()").extract()
price = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/strong/text()").extract()
up = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/text()").extract()
item['whvi'] = whvi[0].split()
item['title'] = title[0].split()
item['info0'] = info[0].split()
item['info1'] = info[1].split()
item['addr0'] = addr[0].split()
item['addr1'] = addr[1].split()
item['addr2'] = addr[2].split()
item['price'] = price[0].split()
item['up'] = up[1].split()
yield item
主代码,别的都参考前一个实例
上一篇: scrapy爬虫实例(1)