欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

房多多scrapy爬虫实例

程序员文章站 2022-05-06 18:47:21
...
# -*- coding: utf-8 -*-
import scrapy
import os
import sys
sys.path.append("C:/Users/***/scrapy/fhdodo")
from  fhdodo.items import FhdodoItem


class FhdoSpider(scrapy.Spider):
    name = 'fhdo'
    allowed_domains = ['https://suzhou.fangdd.com/']
    start_urls = []
    host = 'https://suzhou.fangdd.com/esf-a0-a150_s1-s2_l70_x0/?pageNo={}'
    count = 1
    while count <31:
        url = host.format(str(count))
        start_urls.append(url)
        count = count+1
    def parse(self, response):
        teacher_list = response.xpath("//li[@class='LpList-item']")
        for each in teacher_list:
            item = FhdodoItem()
            whvi = each.xpath("./a/@href").extract()
            title = each.xpath("./div[@class='LpList-cont']/h4/a/span/text()").extract()
            info = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-type']/text()").extract()
            addr = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-address ellipsis']/a/text()").extract()
            price = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/strong/text()").extract()
            up = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/text()").extract()
            
            item['whvi'] = whvi[0].split()
            item['title'] = title[0].split()
            item['info0'] = info[0].split()
            item['info1'] = info[1].split()
            item['addr0'] = addr[0].split()
            item['addr1'] = addr[1].split()
            item['addr2'] = addr[2].split()
            item['price'] = price[0].split()
            item['up'] = up[1].split()
            yield item

主代码,别的都参考前一个实例