Python实战---使用BeautifulSoup和pyecharts爬取中国天气网并展示
程序员文章站
2022-07-14 16:51:12
...
使用BeautifulSoup和pyecharts爬取中国天气网并展示
1、使用requests进行页面抓取
2、使用BeautifulSoup进行页面分析
3、使用pyecharts进行数据展示(柱状图)
爬取的页面为:
爬取的数据字段:
城市 city
最低温度 min_temperature
实现代码:
'''
@Description: 爬取中国天气网
@Author: sikaozhifu
@Date: 2020-06-08 13:53:15
@LastEditTime: 2020-06-08 16:17:11
@LastEditors: Please set LastEditors
'''
import requests
from bs4 import BeautifulSoup
from pyecharts.charts import Bar
from pyecharts import options
city_data = []
def parse_weather(url):
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
response = requests.get(url, headers=headers)
text = response.content.decode('utf-8')
# soup = BeautifulSoup(text, 'lxml') # 港澳台地区的天气页面不规范
soup = BeautifulSoup(text, 'html5lib')
conMidtab = soup.find('div', class_='conMidtab')
tables = conMidtab.find_all('table')
for table in tables:
trs = table.find_all('tr')[2:]
for index, tr in enumerate(trs):
city = ''
if index == 0:
city = list(tr.find_all('td')[1].stripped_strings)[0]
else:
city = list(tr.find_all('td')[0].stripped_strings)[0]
min_temperature = list(tr.find_all('td')[-2].stripped_strings)[0]
# print({'city': city, 'min_temperature': int(min_temperature)})
city_data.append({
'city': city,
'min_temperature': int(min_temperature)
})
def weather_spider_main():
urls = [
'http://www.weather.com.cn/textFC/hb.shtml',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml',
'http://www.weather.com.cn/textFC/gat.shtml'
]
for url in urls:
parse_weather(url)
# print(city_data)
def show_weather():
city_data.sort(key=lambda data: data['min_temperature'])
data = city_data[0:10]
cities = list(map(lambda temp_data: temp_data['city'], data))
min_temperatures = list(map(lambda temp_data: temp_data['min_temperature'], data))
bar = Bar()
bar.add_xaxis(cities)
bar.add_yaxis('温度', min_temperatures)
bar.set_global_opts(title_opts=options.TitleOpts(title='中国温度最低的十大城市'))
bar.render('bar_min_temperature.html')
if __name__ == "__main__":
weather_spider_main()
show_weather()
效果图展示:
上一篇: 爬取全国各个城市的最低气温