欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

国内疫情分析

程序员文章站 2024-03-07 20:32:09
...

利用Python获取国内疫情数据并进行数据可视化

数据收集

  • 利用request获取国内疫情相关数据
  • 利用josn模块获取解析HTML数据,并保存为json格式文件
import requests
import json
# 获取疫情数据链接
china_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
headers = {
    'referer':
    'https://news.qq.com/zt2020/page/feiyan.htm'
    'user-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
# 获取疫情json数据
response = requests.get(url=china_url, headers=headers).json()
# print(response)
print(type(response))
# 把json格式数据转化为Python的数据格式
data = json.loads(response['data'])

with open('./国内疫情数据.json', 'w', encoding='utf-8') as f:
    #把Python数据格式data转化为json数据格式,然后在写入文件进行保存
    f.write(json.dumps(data, ensure_ascii=False, indent=2))  #indent 格式化字符串后输出
<class 'dict'>

获取我们需要的数据

  • 分别获取国家、省、市名称及其相应的数据
  • 利用pandas转化为dateframe数据格式
  • 拆解today和total两个字段
  • 保存为Excel格式文件
import pandas as pd
from openpyxl import load_workbook

with open('./国内疫情数据.json', 'r', encoding='utf-8') as f:
    data = f.read()
type(data)
data = json.loads(data)
type(data)
# print(data)
# 获取China数据
ChinaArea = data["areaTree"][0]
# print(ChinaArea)
# 获取各省名字province
provincearea = ChinaArea['children']
# print(provincearea)
city_list = []  # Y用于存放城市的数据
for x in range(len(provincearea)):
    province = provincearea[x]['name']
    # 获取各省下的城市
    cityarea = provincearea[x]['children']
    for y in range(len(cityarea)):
        city = cityarea[y]['name']
        today = cityarea[y]['today']
        total = cityarea[y]['total']
        # print(city,today,total)
        city_dict = {'province': province, 'city': city,
                     'today': today, 'total': total}
        city_list.append(city_dict)
# print(city_list)
# 转为dateframe格式
df = pd.DataFrame(city_list)
# print(df)
# 拆解today和total列
confirmlist = []  # 确诊人数
suspectlist = []  # 嫌疑人数
deadlist = []  # 死亡人数
deadratelist = []  # 死亡率
heallist = []  # 治愈人数
healratelist = []  # 治愈率
for value in df['total'].values.tolist():
    confirmlist.append(value['confirm'])
    suspectlist.append(value['suspect'])
    deadlist.append(value['dead'])
    deadratelist.append(value['deadRate'])
    heallist.append(value['heal'])
    healratelist.append(value['healRate'])
df['confirm'] = confirmlist
df['suspect'] = suspectlist
df['dead'] = deadlist
df['deadrate'] = deadratelist
df['heal'] = heallist
df['healrate'] = healratelist


# 拆解today列字段
today_confirmlist = []  # 今日确诊
today_confirmcutslist = []  # 今日隔离
for value in df['today'].values.tolist():
    today_confirmlist.append(value['confirm'])
    today_confirmcutslist.append(value['confirmCuts'])
df['today_confirm'] = today_confirmlist
df['today_confirmCuts'] = today_confirmcutslist


# 删除total字段列
df.drop(['total', 'today'], axis=1, inplace=True)
df

# 将df保存到Excel文件中
# 将df保存到Excel文件中
book = load_workbook('国内疫情.xlsx')
writer = pd.ExcelWriter('国内疫情.xlsx', engine='openpyxl')
writer.book = book
'''writer.sheets语句主要是确保每次运行时把数据写到已有的相应sheets表格中,如果原有表格没有就创建新的
表格,如果取消此语句就会导致每次运行都会生成一个新的sheet表格来存放数据 '''
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
# 增加sheet_name方便于把一段时间内收集获取的数据放到同一个Excel文件中的不同表格
lastUpdateTime = data['lastUpdateTime']  # 获取数据更新日期
sheet_name = lastUpdateTime[:lastUpdateTime.find(' ')]  # 只获取日期,不需要后面的时间
# 保存到Excel文件
df.to_excel(writer, index=False,
            sheet_name=sheet_name)
writer.save()  # 文件保存
writer.close()  # 关闭

数据分析和可视化

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['font.family'] = 'SimHei'
# 各省份累计确诊人数和治愈人数
plt.figure(figsize=(12, 15), dpi=80)
province_confirm = df.groupby(by='province')['confirm'].sum().sort_values()
province_heal = df.groupby(by='province')['heal'].sum().sort_values()
index=np.arange(len(province_heal))
bar_width=0.4
plt.barh(index,province_confirm.values.tolist(), height=bar_width, color='b',label='累计确诊人数')
plt.barh(index+bar_width+0.1,province_heal.values.tolist(), height=bar_width, color='r',label='累计治愈人数')
plt.yticks(index+(bar_width+0.1)/2,province_heal.index) 
plt.title('国内疫情累计确诊和治愈人数')
plt.xlabel('人数') 
plt.ylabel('省份') 
plt.ylim(0,34)
plt.legend(loc='best') 
plt.show() 

国内疫情分析

使用pyecharts数据可视化-map

from pyecharts import options as opts
import pandas as pd
from pyecharts.charts import Map

data = pd.read_excel('./国内疫情.xlsx', sheet_name=sheet_name)
#根据国内省份进行分组汇总,绘制各省确诊人数地图
data_groupby = data.groupby(by=['province'], as_index=False).sum()
data_groupby
#获取各省确诊人数
data_groupby_list = list(
    zip(data_groupby['province'].values.tolist(),
        data_groupby['confirm'].values.tolist()))
data_groupby_list
def china_map():
    c=(
        Map()
        .add(series_name='确诊病例',data_pair=data_groupby_list,maptype='china')
        .set_global_opts(
            title_opts=opts.TitleOpts(title='疫情地图'),visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                    pieces=[{'max':9,'min':0,'label':'0-9','color':'#FFE4E1'},
                           {'max':99,'min':10,'label':'10-99','color':'#FF7F50'},
                           {'max':499,'min':100,'label':'100-499','color':'#F08080'},
                           {'max':999,'min':500,'label':'500-999','color':'#CD5C5C'},
                           {'max':9999,'min':1000,'label':'1000-9999','color':'#990000'},
                           {'max':99999,'min':10000,'label':'>10000','color':'#660000'},
                           ])      
        )  
    )
    return c 
d_map =china_map()
d_map.render('国内疫情确诊地图.html')
d_map.render_notebook() 
    <div id="99085df893a346e1973efbeca526ce2d" style="width:900px; height:500px;"></div>

国内疫情分析