【项目实战】数据爬虫＋数据清洗 + 数据可视化+开源代码啦

程序员文章站 2023-12-11 20:34:47

文章目录写在前面：自己已经创建公众号啦~AI算法交流+开源数据汇总+私房数据及标注数据共享+自己实践项目开源欢迎大家关注：DeepAI 视界爬虫：链接网二手房（以贵阳市为例）对应的数据可视化：同时赠送给大家另一个版本的：爬虫：链家网：柳州市数据可视化：（优化版）话不多说，自己上篇爬虫博客写的还行，10000的阅读量以及360多的收藏和100多的点赞评论，自己一个一个发送代码发了一个多月，现在正式开源！写在前面：自己已经创建公众号啦~AI算法交流+开源数据汇总+私房数据及标注数据共享+自己实践项目开源...

文章目录

写在前面：

自己已经创建公众号啦~
AI算法交流+开源数据汇总+私房数据及标注数据共享+自己实践项目开源
欢迎大家关注：DeepAI 视界

爬虫：链接网二手房（以贵阳市为例）
对应的数据可视化：

同时赠送给大家另一个版本的：

爬虫：链家网：柳州市

数据可视化：（优化版）

话不多说，自己上篇爬虫博客写的还行，10000的阅读量以及360多的收藏和100多的点赞评论，自己一个一个发送代码发了一个多月，现在正式开源！
附上原文传送门：https://blog.csdn.net/qq_46098574/article/details/106048756

写在前面：

自己已经创建公众号啦~

AI算法交流+开源数据汇总+私房数据及标注数据共享+自己实践项目开源

欢迎大家关注：DeepAI 视界

【项目实战】数据爬虫＋数据清洗 + 数据可视化+开源代码啦

展示一下：
【项目实战】数据爬虫＋数据清洗 + 数据可视化+开源代码啦

爬虫：链接网二手房（以贵阳市为例）

"""
爬虫
"""
import requests
from lxml import etree
import xlwt
import xlrd
import csv
import pandas as pd
import time
import re
class LanjiaSpider:
    def __init__(self):
        self.url = 'https://wh.lianjia.com/ershoufang/ronganxian/pg{}/'
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}

    def get_response_spider(self, url_str):  # 发送请求
        get_response = requests.get(self.url, headers=self.headers)
        time.sleep(2)
        response = get_response.content.decode()
        html = etree.HTML(response)
        return html
    def get_content_html(self, html):  # 使xpath获取数据
        self.houseInfo = html.xpath('//div[@class="houseInfo"]/text()')
        self.title = html.xpath('//div[@class="title"]/a/text()')
        self.positionInfo = html.xpath('//div[@class="positionInfo"]/a/text()')
        self.totalPrice = html.xpath('//div[@class="totalPrice"]/span/text()')
        self.unitPrice = html.xpath('//div[@class="unitPrice"]/span/text()')
        self.followInfo = html.xpath('//div[@class="followInfo"]/text()')
        self.tag = html.xpath('//div[@class="tag"]/span/text()')
        # print(title)
        # return houseInfo,title,positionInfo,totalPrice,unitPrice,followInfo,tag

    def xpath_houseInfo(self):
        #print(self.houseInfo)
        #print(type(self.houseInfo))
        # df = pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title,"positionInfo":self.positionInfo,"totaPrice":self.totalPrice,"unitPrice":self.unitPrice,"followInfo":self.followInfo,"tag":self.tag})
        # df=pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title})
        # df.to_excel(r'C:\Users\wy\Desktop\sublime\链家\pand3.xlsx')
        # a=len(self.houseInfo)
        for i in range(len(self.houseInfo)):
            # print(i)
            # yield i
            # print(type(self.houseInfo))
            yield self.houseInfo[i]

    def qingxi_data_houseInfo(self):  # 清洗数据

        self.xpath_houseInfo()
        self.xpath_title()
        self.xpath_positionInfo()
        self.xpath_totalPrice()
        self.xpath_unitPrice()
        self.xpath_followInfo()
        self.xpath_tag()
        get_houseInfo = self.xpath_houseInfo()
        get_title = self.xpath_title()
        get_positionInfo=self.xpath_positionInfo()
        get_totalPrice = self.xpath_totalPrice()
        get_unitPrice = self.xpath_unitPrice()
        get_followInfo=self.xpath_followInfo()
        get_tag=self.xpath_tag()
        i = 1

        while True:
            data_houseInfo= next(get_houseInfo)
            data_title=next(get_title)
            data_positionInfo=next(get_positionInfo)
            data_totalPrice=next(get_totalPrice)
            data_unitPrice=next(get_unitPrice)
            data_followInfo=next(get_followInfo)
            data_tag=next(get_tag)

            with open("a.csv", "a", newline="", encoding="utf-8-sig") as f:
                # fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
                # writer = csv.DictWriter(f, fieldnames=fieldnames)  # 写入表头
                # writer.writeheader()
                writer = csv.DictWriter(f, fieldnames=fieldnames)  # 写入表头

                list_1 = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
                list_2 = [data_houseInfo,data_title,data_positionInfo,data_totalPrice,data_unitPrice,data_followInfo,data_tag]
                list_3 = dict(zip(list_1, list_2))
                writer.writerow(list_3)
                print("写入第"+str(i)+"行数据")
            i += 1
            if i > len(self.houseInfo):
                break

    def xpath_title(self):
        for i in range(len(self.title)):
            yield self.title[i]

    def xpath_positionInfo(self):
        for i in range(len(self.positionInfo)):
            yield self.positionInfo[i]

    def xpath_totalPrice(self):
        for i in range(len(self.totalPrice)):
            yield self.totalPrice[i]
    def xpath_unitPrice(self):
        for i in range(len(self.unitPrice)):
            yield self.unitPrice[i]
    def xpath_followInfo(self):
        for i in range(len(self.followInfo)):
            yield self.followInfo[i]
    def xpath_tag(self):
        for i in range(len(self.tag)):
            yield self.tag[i]
    def run(self):
        i = 1
        while True:
            url_str = self.url.format(i)  # 构造请求url
            html = self.get_response_spider(url_str)
            self.get_content_html(html)
            self.qingxi_data_houseInfo()

            i += 1
            if i == 1:  # 不包括57页
                break


if __name__ == "__main__":
    with open("a.csv", "a", newline="", encoding="utf-8-sig") as f:
        fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
        writer = csv.DictWriter(f, fieldnames=fieldnames)  # 写入表头
        writer.writeheader()
    lanjia = LanjiaSpider()
    lanjia.run()

对应的数据可视化：

"""
数据分析及可视化
"""
import pandas as pd
from pyecharts.charts import Line, Bar
import numpy as np
from pyecharts.globals import ThemeType
from pyecharts.charts import Pie
from pyecharts import options as opts


places = ['lianjia_BaiYunQu', 'lianjia_GuanShanHuQu', 'lianjia_HuaXiQu', 'lianjia_NanMingQu', 'lianjia_WuDangQu', 'lianjia_YunYanQu']
place = ['白云区', '观山湖区', '花溪区', '南明区', '乌当区', '云岩区']
avgs = []  # 房价均值
median = []  # 房价中位数
favourate_avg = []  # 房价收藏人数均值
favourate_median = []  # 房价收藏人数中位数
houseidfo = ['2室1厅', '3室1厅', '2室2厅', '3室2厅', '其他']  # 房型定义
houseidfos = ['2.1', '3.1', '2.2', '3.2']
sum_house = [0,  0, 0, 0, 0]  # 各房型数量
price = []  # 房价
fav = []  # 收藏人数
type = []
area = []  # 房间面积
def avg(name):
    df = pd.read_csv(str(name)+'.csv', encoding='utf-8')
    pattern = '\d+'
    df['totalPrice/万元'] = df['totalPrice/万元'].str.findall(pattern)
    df['followInfo'] = df['followInfo'].str.findall(pattern)
    df['houseInfo'] = df['houseInfo'].str.findall(pattern)
    sum_houses = [0, 0, 0, 0, 0]
    # print(sum_house)
    avg_work_year = []
    medians = []
    favourates = []
    k = 0
    k1 = 0
    k3 = 0
    k4 = 0

    for i in range(len(df)):
        if (i + 1) % 2 == 0:
            continue
        else:
            if len(df['totalPrice/万元'][i]) == 2:
                avg_work_year.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
                medians.append(float(','.join(df['totalPrice/万元'][i]).replace(',', '.')))
                price.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
            if len(df['followInfo'][i]) ==2:
                favourates.append(int(','.join(df['followInfo'][i][:1])))
                fav.append(int(','.join(df['followInfo'][i][:1])))
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.1:
                k +=1
                sum_houses[0] =k
                type.append(2.1)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.1:
                k1 +=1
                sum_houses[1] =k1
                type.append(3.1)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.2:
                k3 +=1
                sum_houses[2] =k3
                type.append(2.2)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.2:
                k4 +=1
                sum_houses[3] =k4
                type.append(3.2)
            else:
                k4 +=1
                sum_houses[4] = k4
                type.append('other')
            area.append(float(','.join(df['houseInfo'][i][2:4]).replace(',', '.')))
    sum_house[0] =sum_houses[0]
    sum_house[1] = sum_houses[1]
    sum_house[2] = sum_houses[2]
    sum_house[3] = sum_houses[3]
    sum_house[4] = sum_houses[4]

    favourates.sort()
    favourate_median.append(int(np.median(favourates)))
    medians.sort()
    median.append(np.median(medians))
    # price = avg_work_year
    b = len(avg_work_year)
    b1= len(favourates)

    sum = 0
    sum1 = 0
    for i in avg_work_year:
        sum = sum+float(i)
    avgs.append(round(sum/b, 2))
    for i in favourates:
        sum1 = sum1+float(i)
    favourate_avg.append(round(int(sum1/b1), 2))

for i in places:
    avg(i)

print("各区平均房价", avgs)
print('各房型的出售总数:', sum_house)
print("房间面积", area)
"""
[280, 56, 504, 1676, 1680]
[392, 112, 448, 1679, 1680]
[224, 0, 616, 3359, 3360]
[448, 112, 280, 1679, 1680]
[504, 0, 336, 1680, 1679]
[224, 56, 168, 1680, 1670]


[66.17, 65.6, 76.04, 78.94, 62.06, 74.37] 
[68.8, 67.8, 79.8, 70.8, 57.6, 78.8]
[6, 6, 9, 4, 4, 4] [5, 4, 3, 2, 3, 2]
"""
# print(median)
# print(favourate_avg,favourate_median)
line = Line()
line.add_xaxis(place)
line.add_yaxis("贵阳各地房价平均值(万元)", avgs)
line.add_yaxis("贵阳各地房价中位数值（万元）", median)
# line.render("predict_line.html")

def bar() -> Bar:
    c = (
        Bar({"theme": ThemeType.MACARONS})
            .add_xaxis(place)
            .add_yaxis("平均值", avgs)
            .add_yaxis("中位数", median)
            .set_global_opts(
            title_opts={"text": "贵阳各地房价(万元)"}
        )
    )
    return c
bar().render("predict_bar.html")
# print(sum_house)
def bar() -> Bar:
    c = (
        Bar({"theme": ThemeType.MACARONS})
            .add_xaxis(houseidfo)
            .add_yaxis(place[0], [280, 56, 504, 1676, 1680])
            .add_yaxis(place[1], [392, 112, 448, 1679, 1680])
            .add_yaxis(place[2], [224, 0, 616, 3359, 3360])
            .add_yaxis(place[3], [448, 112, 280, 1679, 1680])
            .add_yaxis(place[4], [504, 0, 336, 1680, 1679])
            .add_yaxis(place[-1], sum_house)
            # .add_yaxis("中位数", favourate_median)
            .set_global_opts(
            title_opts={"text": "贵阳各地房型\n数量"}
        )
    )
    return c
# bar().render("house_bar.html")
line = Line()
line.add_xaxis(place)
line.add_yaxis("贵阳各地房子平均面积\n(平米)", area)
line.render("Area_line.html")



list_num = favourate_avg
attr = place
# print(zip(attr, list_num))
s = [list(z) for z in zip(attr, list_num)]
c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="贵阳市各区楼房\n平均收藏人数"))
     .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
     )
c.render("pie_avg.html")

list_num = favourate_median
attr = place
# print(zip(attr, list_num))
s = [list(z) for z in zip(attr, list_num)]
c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="贵阳市各区楼房\n收藏人数中位数"))
     .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
     )
c.render("pie_median.html")

from pyecharts import options as  opts
from pyecharts.charts import Scatter3D
from pyecharts.faker import Faker


price=[float(i)/1 for i in price]
# print(price)
# types=list(map(mapfunc,df.house_type.values))
# type = [224, 56, 168, 1680, 1670]
data = []
# print(fav,type)
# for j in range(len(type)):
#     for k in range(len(fav)):
for j in range(100):
    for k in range(100):
        for i in range(500):
            try:
                data.append([type[j], favourate_avg[k],price[i]])
            except:
                continue
# print(data)
scatter = (
     Scatter3D(init_opts=opts.InitOpts(width='900px', height='600px'))  # 初始化
          .add("", data,
               grid3d_opts=opts.Grid3DOpts(
                    width=300, depth=300, rotate_speed=300, is_rotate=True,
               ),)

          # 设置全局配置项
          .set_global_opts(
          title_opts=opts.TitleOpts(title="房型——关注度——价格\n三维关系图"),  # 添加标题
          visualmap_opts=opts.VisualMapOpts(
               max_=100,  # 最大值
               pos_top=200,  # visualMap 组件离容器上侧的距离
               range_color=Faker.visual_color  # 颜色映射
          )
     )
          # .render("3D散点图.html")
)
print('数据分析和可视化结束，左边点开~')

同时赠送给大家另一个版本的：

爬虫：链家网：柳州市

# -*- coding: utf-8 -*-
import scrapy


import requests
from lxml import etree
import xlwt
import xlrd
import csv
import pandas as pd
import time

class LanjiaSpider:
    def __init__(self):
        self.url = 'https://liuzhou.lianjia.com/ershoufang/yufengqu/pg{}/'
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}

    def get_response_spider(self, url_str):  # 发送请求
        get_response = requests.get(self.url, headers=self.headers)
        time.sleep(2)
        response = get_response.content.decode()
        html = etree.HTML(response)
        return html
    def get_content_html(self, html):  # 使xpath获取数据
        self.houseInfo = html.xpath('//div[@class="houseInfo"]/text()')
        self.title = html.xpath('//div[@class="title"]/a/text()')
        self.positionInfo = html.xpath('//div[@class="positionInfo"]/a/text()')
        self.totalPrice = html.xpath('//div[@class="totalPrice"]/span/text()')
        self.unitPrice = html.xpath('//div[@class="unitPrice"]/span/text()')
        self.followInfo = html.xpath('//div[@class="followInfo"]/text()')
        self.tag = html.xpath('//div[@class="tag"]/span/text()')
        # print(title)
        # return houseInfo,title,positionInfo,totalPrice,unitPrice,followInfo,tag

    def xpath_houseInfo(self):
        #print(self.houseInfo)
        #print(type(self.houseInfo))
        # df = pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title,"positionInfo":self.positionInfo,"totaPrice":self.totalPrice,"unitPrice":self.unitPrice,"followInfo":self.followInfo,"tag":self.tag})
        # df=pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title})
        # df.to_excel(r'C:\Users\wy\Desktop\sublime\链家\pand3.xlsx')
        # a=len(self.houseInfo)
        for i in range(len(self.houseInfo)):
            # print(i)
            # yield i
            # print(type(self.houseInfo))
            yield self.houseInfo[i]

    def qingxi_data_houseInfo(self):  # 清洗数据

        self.xpath_houseInfo()
        self.xpath_title()
        self.xpath_positionInfo()
        self.xpath_totalPrice()
        self.xpath_unitPrice()
        self.xpath_followInfo()
        self.xpath_tag()
        get_houseInfo = self.xpath_houseInfo()
        get_title = self.xpath_title()
        get_positionInfo=self.xpath_positionInfo()
        get_totalPrice = self.xpath_totalPrice()
        get_unitPrice = self.xpath_unitPrice()
        get_followInfo=self.xpath_followInfo()
        get_tag=self.xpath_tag()
        i = 1
        while True:
            data_houseInfo= next(get_houseInfo)
            data_title=next(get_title)
            data_positionInfo=next(get_positionInfo)
            data_totalPrice=next(get_totalPrice)
            data_unitPrice=next(get_unitPrice)
            data_followInfo=next(get_followInfo)
            data_tag=next(get_tag)

            with open("yufengqu.csv", "a", newline="", encoding="utf-8-sig") as f:
                fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
                writer = csv.DictWriter(f, fieldnames=fieldnames)  # 写入表头
                writer.writeheader()
                list_1 = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
                list_2 = [data_houseInfo,data_title,data_positionInfo,data_totalPrice,data_unitPrice,data_followInfo,data_tag]
                list_3 = dict(zip(list_1, list_2))
                writer.writerow(list_3)
                print("写入第"+str(i)+"行数据")
            i += 1
            if i > len(self.houseInfo):
                break

    def xpath_title(self):
        for i in range(len(self.title)):
            yield self.title[i]

    def xpath_positionInfo(self):
        for i in range(len(self.positionInfo)):
            yield self.positionInfo[i]

    def xpath_totalPrice(self):
        for i in range(len(self.totalPrice)):
            yield self.totalPrice[i]
    def xpath_unitPrice(self):
        for i in range(len(self.unitPrice)):
            yield self.unitPrice[i]
    def xpath_followInfo(self):
        for i in range(len(self.followInfo)):
            yield self.followInfo[i]
    def xpath_tag(self):
        for i in range(len(self.tag)):
            yield self.tag[i]
    def run(self):
        i = 1
        while True:
            url_str = self.url.format(i)  # 构造请求url
            html = self.get_response_spider(url_str)
            self.get_content_html(html)
            self.qingxi_data_houseInfo()

            i += 1
            if i == 100:  # 不包括100页
                break


# if __name__ == "__main__":
#     lanjia = LanjiaSpider()
#     lanjia.run()

class MyspiderSpider(scrapy.Spider):
    name = 'myspider'
    allowed_domains = ['https://wh.lianjia.com/ershoufang/jianghan/']
    start_urls = ['https://wh.lianjia.com/ershoufang/jianghan//']

    def parse(self, response):
        print('爬取ing....')
        lanjia = LanjiaSpider()
        lanjia.run()

数据可视化：（优化版）

"""
数据分析及可视化
auuthor: 周小夏
"""
import pandas as pd
from pyecharts.charts import Line, Bar
import numpy as np
from pyecharts.globals import ThemeType
from pyecharts.charts import Pie
from pyecharts import options as opts


places = ['chengzhongqu', 'liubeiqu', 'liuchengxian', 'liujiangqu', 'liunanqu', 'yufengqu']
place = ['城中区', '柳北区', '柳城县', '柳江区', '柳南区', '鱼峰区']
avgs = []  # 房价均值
median = []  # 房价中位数
favourate_avg = []  # 房价收藏人数均值
favourate_median = []  # 房价收藏人数中位数
houseidfo = ['2室1厅', '3室1厅', '2室2厅', '3室2厅', '其他']  # 房型定义
houseidfos = ['2.1', '3.1', '2.2', '3.2']
sum_house = [0,  0, 0, 0, 0]  # 各房型数量
sum_houses = []
price = []  # 房价均值
unitprice = []  # 单价
fav = []  # 收藏人数
type = []
area = []  # 房间


def avg(name):
    df = pd.read_csv('./spiders/' + str(name)+'.csv', encoding='utf-8')
    pattern = '\d+'
    df['totalPrice/万元'] = df['totalPrice/万元'].str.findall(pattern)
    df['followInfo'] = df['followInfo'].str.findall(pattern)
    df['houseInfo'] = df['houseInfo'].str.findall(pattern)
    df['unitPrice'] = df['unitPrice'].str.findall(pattern)

    sum_houses = [0, 0, 0, 0, 0]
    # print(sum_house)
    avg_work_year = []
    areas = []
    unit_avg = []
    medians = []
    favourates = []
    k = 0
    k1 = 0
    k3 = 0
    k4 = 0

    for i in range(len(df)):
        if (i + 1) % 2 == 0:
            continue
        else:
            if len(df['unitPrice'][i]) >= 0:
                unit_avg.append(','.join(df['unitPrice'][i]).replace(',', '.'))
            if len(df['totalPrice/万元'][i]) >= 0:
                avg_work_year.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
                medians.append(float(','.join(df['totalPrice/万元'][i]).replace(',', '.'))*100)
                price.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
            if len(df['followInfo'][i]) ==2:
                favourates.append(int(','.join(df['followInfo'][i][:1])))
                fav.append(int(','.join(df['followInfo'][i][:1])))
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.1:
                k +=1
                sum_houses[0] =k
                type.append(2.1)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.1:
                k1 +=1
                sum_houses[1] =k1
                type.append(3.1)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.2:
                k3 +=1
                sum_houses[2] =k3
                type.append(2.2)
            if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.2:
                k4 +=1
                sum_houses[3] =k4
                type.append(3.2)
            else:
                k4 +=1
                sum_houses[4] = k4
                type.append('other')
            areas.append(float(','.join(df['houseInfo'][i][2:4]).replace(',', '.')))
    sum_house[0] =sum_houses[0]
    sum_house[1] = sum_houses[1]
    sum_house[2] = sum_houses[2]
    sum_house[3] = sum_houses[3]
    sum_house[4] = sum_houses[4]
    sum_house.append(sum_house[0])
    sum_house.append(sum_house[1])
    sum_house.append(sum_house[2])
    sum_house.append(sum_house[3])
    sum_house.append(sum_house[4])
    # print(sum_houses)
    favourates.sort()
    favourate_median.append(int(np.median(favourates)))
    medians.sort()
    median.append(np.median(medians))
    # price = avg_work_year
    b = len(avg_work_year)*100
    b1= len(favourates)
    b2 = len(unit_avg)
    b4 = len(areas)*100
    sum = 0
    sum1 = 0
    for i in unit_avg:
        sum = sum+float(i)
    unitprice.append(round(sum/b2, 2))
    for i in areas:
        sum = sum+float(i)
    area.append(round(sum/b4, 2))
    for i in avg_work_year:
        sum = sum+float(i)
    avgs.append(round(sum/b, 2))
    for i in favourates:
        sum1 = sum1+float(i)
    favourate_avg.append(round(int(sum1/b1), 2))

for i in places:
    avg(i)

print("各区平均房价", avgs)
print('各房型的出售总数:', sum_house)
print("房间面积", area)
print("房价单价", unitprice)

a = []
for i in median:
    a.append(i/100)
# print(median)
# print(favourate_avg,favourate_median)
line = Line()
line.add_xaxis(place)
line.add_yaxis("柳州市各地房价平均值(万元)", avgs)
line.add_yaxis("柳州市各地房价中位数值（万元）", a)
line.render("predict_line.html")

def bar() -> Bar:
    c = (
        Bar({"theme": ThemeType.MACARONS})
            .add_xaxis(place)
            .add_yaxis("平均值", unitprice)
            .set_global_opts(
            title_opts={"text": "柳州市各地房价单价(元)"}
        )
    )
    return c

bar().render("unit_prices.html")
def bar() -> Bar:
    c = (
        Bar({"theme": ThemeType.MACARONS})
            .add_xaxis(place)
            .add_yaxis("平均值", avgs)
            .add_yaxis("中位数", a)
            .set_global_opts(
            title_opts={"text": "柳州市各地房价(万元)"}
        )
    )
    return c
bar().render("predict_bar.html")
# print(sum_house)
def bar() -> Bar:
    c = (
        Bar({"theme": ThemeType.MACARONS})
            .add_xaxis(houseidfo)
            .add_yaxis(place[0], sum_house[0:5])
            .add_yaxis(place[1], sum_house[5:10])
            .add_yaxis(place[2], sum_house[10:15])
            .add_yaxis(place[3], sum_house[15:20])
            .add_yaxis(place[4], sum_house[20:25])
            .add_yaxis(place[-1], sum_house[25:30])
            # .add_yaxis("中位数", favourate_median)
            .set_global_opts(
            title_opts={"text": "柳州市各地房型\n数量"}
        )
    )
    return c
bar().render("house_bar.html")

line = Line()
line.add_xaxis(place)
line.add_yaxis("柳州市各地房子平均面积\n(平米)", area)
line.render("Area_line.html")



list_num = favourate_avg
attr = place
# print(zip(attr, list_num))
s = [list(z) for z in zip(attr, list_num)]
c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="柳州市各区楼房\n平均收藏人数"))
     .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
     )
c.render("pie_avg.html")

list_num = favourate_median
attr = place
# print(zip(attr, list_num))
s = [list(z) for z in zip(attr, list_num)]
c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="柳州市各区楼房\n收藏人数中位数"))
     .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
     )
c.render("pie_median.html")

from pyecharts import options as  opts
from pyecharts.charts import Scatter3D
from pyecharts.faker import Faker


line = Line()
line.add_xaxis(place)
line.add_yaxis("房间面积\n(平米)", area)
line.add_yaxis("房价\n(/万元)", avgs)
line.render("price1_line.html")


price=[float(i)/1 for i in price]
# print(price)
# types=list(map(mapfunc,df.house_type.values))
# type = [224, 56, 168, 1680, 1670]
data = []
# print(fav,type)
# for j in range(len(type)):
#     for k in range(len(fav)):
for j in range(100):
    for k in range(100):
        for i in range(500):
            try:
                data.append([type[j], favourate_avg[k],price[i]])
            except:
                continue
# print(data)
scatter = (
     Scatter3D(init_opts=opts.InitOpts(width='900px', height='600px'))  # 初始化
          .add("", data,
               grid3d_opts=opts.Grid3DOpts(
                    width=300, depth=300, rotate_speed=300, is_rotate=True,
               ),)

          # 设置全局配置项
          .set_global_opts(
          title_opts=opts.TitleOpts(title="房型——关注度——价格\n三维关系图"),  # 添加标题
          visualmap_opts=opts.VisualMapOpts(
               max_=300,  # 最大值
               pos_top=200,  # visualMap 组件离容器上侧的距离
               range_color=Faker.visual_color  # 颜色映射
          )
     )
          .render("3D散点图.html")
)
print('数据分析和可视化结束，左边点开~')

最后，别忘记了关注公众号~

分享最新算法！

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-CiQYvfTG-1594979733395)(D:\CSDN\pic\WeChat Image_20200716151357.jpg)]

本文地址：https://blog.csdn.net/qq_46098574/article/details/107414402

相关标签：实战项目数据挖掘可视化数据可视化大数据

【项目实战】数据爬虫 ＋ 数据清洗 + 数据可视化+开源代码啦