爬取天气信息并到简单数据可视化
程序员文章站
2022-06-05 18:15:37
```python import requests import pandas as pd from matplotlib import pyplot as plt from lxml import etree url = 'http://www.tianqihoubao.com/lishi/cha... ......
import requests import pandas as pd from matplotlib import pyplot as plt from lxml import etree url = 'http://www.tianqihoubao.com/lishi/changping/month/201911.html' headers = { 'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/78.0.3904.97 safari/537.36' } dates,conditions,tem = [],[],[] response = requests.get(url,headers=headers).text tree = etree.html(response) tr_list = tree.xpath('//*[@id="content"]/table//tr') for tr in tr_list[1:]: date = tr.xpath('./td[1]/a/text()')[0].replace('\r\n','').strip() condition = tr.xpath('./td[2]/text()')[0].replace('\r\n ','').strip() temp = tr.xpath('./td[3]/text()')[0].replace('\r\n ','').strip() dates.append(date) conditions.append(condition) tem.append(temp) _date = pd.dataframe() _date['日期'] = dates _date['天气状况'] = conditions _date['气温'] = tem # 重写索引() pd.concat([_date,]).reset_index(drop=true) data = pd.concat([_date,]) data.to_csv('changping.csv',index=false,encoding='utf-8') # 数据可视化 # 解决中文编码问题 plt.rcparams['font.sans-serif'] = ['simhei'] # 解决负号显示问题 plt.rcparams['axes.unicode_minus'] = false df = pd.read_csv('guangzhou.csv') # print((df.isnull()).sum())#检查是否有空值,并求出数量 # 日期 0 # 天气状况 0 # 气温 0 # dtype: int64 df['最高气温'] = df['气温'].str.split('/',expand=true)[0] df['最低气温'] = df['气温'].str.split('/',expand=true)[1] df['最高气温'] = df['最高气温'].map(lambda x:int(x.replace('℃',''))) df['最低气温'] = df['最低气温'].map(lambda x:int(x.replace('℃',''))) dates = df['日期'] tem_hight = df['最高气温'] tem_low = df['最低气温'] flg = plt.figure(dpi=128,figsize=(10,6)) #展示生成的图大大小 plt.plot(dates, tem_hight, c='red', alpha=0.5)#c='red',:颜色 alpha=0.5:透明底 plt.plot(dates, tem_low, c='blue', alpha=0.5) plt.fill_between(dates,tem_hight,tem_low,facecolor='blue',alpha=0.2) # 图标格式 plt.title('北京昌平2019年11月天气',fontsize=24) #标题 plt.xlabel('日期',fontsize=6) #横坐标标题以及字体大小 flg.autofmt_xdate() plt.ylabel('气温',fontsize=12) #纵坐标标题以及字体大小 plt.tick_params(axis='both',which='major',labelsize=10) plt.xticks(dates[::20]) plt.show()