欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

爬取天气信息并到简单数据可视化

程序员文章站 2022-06-05 18:15:37
```python import requests import pandas as pd from matplotlib import pyplot as plt from lxml import etree url = 'http://www.tianqihoubao.com/lishi/cha... ......
import requests
import pandas as pd

from matplotlib import pyplot as plt
from lxml import etree


url = 'http://www.tianqihoubao.com/lishi/changping/month/201911.html'
headers = {
    'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/78.0.3904.97 safari/537.36'
}

dates,conditions,tem = [],[],[]
response = requests.get(url,headers=headers).text

tree = etree.html(response)
tr_list = tree.xpath('//*[@id="content"]/table//tr')
for tr in tr_list[1:]:
    date = tr.xpath('./td[1]/a/text()')[0].replace('\r\n','').strip()
    condition = tr.xpath('./td[2]/text()')[0].replace('\r\n                                        ','').strip()
    temp = tr.xpath('./td[3]/text()')[0].replace('\r\n                                        ','').strip()
    dates.append(date)
    conditions.append(condition)
    tem.append(temp)

_date = pd.dataframe()
_date['日期'] = dates
_date['天气状况'] = conditions
_date['气温'] = tem

# 重写索引()
pd.concat([_date,]).reset_index(drop=true)
data = pd.concat([_date,])
data.to_csv('changping.csv',index=false,encoding='utf-8')

# 数据可视化
# 解决中文编码问题
plt.rcparams['font.sans-serif'] = ['simhei']

# 解决负号显示问题
plt.rcparams['axes.unicode_minus'] = false

df = pd.read_csv('guangzhou.csv')
# print((df.isnull()).sum())#检查是否有空值,并求出数量
# 日期      0
# 天气状况    0
# 气温      0
# dtype: int64

df['最高气温'] = df['气温'].str.split('/',expand=true)[0]
df['最低气温'] = df['气温'].str.split('/',expand=true)[1]

df['最高气温'] = df['最高气温'].map(lambda x:int(x.replace('℃','')))
df['最低气温'] = df['最低气温'].map(lambda x:int(x.replace('℃','')))

dates = df['日期']
tem_hight = df['最高气温']
tem_low = df['最低气温']

flg = plt.figure(dpi=128,figsize=(10,6)) #展示生成的图大大小
plt.plot(dates, tem_hight, c='red', alpha=0.5)#c='red',:颜色 alpha=0.5:透明底
plt.plot(dates, tem_low, c='blue', alpha=0.5)


plt.fill_between(dates,tem_hight,tem_low,facecolor='blue',alpha=0.2)

# 图标格式
plt.title('北京昌平2019年11月天气',fontsize=24) #标题
plt.xlabel('日期',fontsize=6) #横坐标标题以及字体大小
flg.autofmt_xdate() 
plt.ylabel('气温',fontsize=12) #纵坐标标题以及字体大小
plt.tick_params(axis='both',which='major',labelsize=10)

plt.xticks(dates[::20])
plt.show()

爬取天气信息并到简单数据可视化