爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示
程序员文章站
2022-03-22 20:48:40
...
网站地址: 中国天气网
from bs4 import BeautifulSoup
import requests
from pyecharts.charts import Bar
from pyecharts import options as opts
from pymongo import MongoClient
class Weather:
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36"}
self.cities_temp = []
self.client = MongoClient(host="127.0.0.1", port=27017)
self.weather = self.client['spider']['weather']
def get_text(self, url):
r = requests.get(url, headers=self.headers)
r.encoding = r.apparent_encoding
text = r.text
return text
def parse_html(self, text):
soup = BeautifulSoup(text, 'html5lib')
# html5lib解析能力较强,可以自动补全标签
comMidtab = soup.select_one('.conMidtab')
tabs = comMidtab.select('table')
for tab in tabs:
trs = tab.select('tr')[2:]
for index, tr in enumerate(trs):
if index == 0:
city = list(tr.select('td')[1].stripped_strings)[0]
else:
city = list(tr.select('td')[0].stripped_strings)[0]
low_temp = int(list(tr.select('td')[-2].stripped_strings)[0])
self.cities_temp.append({'city': city, 'low_temp': low_temp})
def run(self):
tplt_url = "http://www.weather.com.cn/textFC/{}.shtml#"
list = ['hb', 'db', 'hd', 'hz', 'hn', 'xb', 'xn', 'gat']
for item in list:
url = tplt_url.format(item)
text = self.get_text(url)
self.parse_html(text)
self.draw_bar_chart()
def sort_cities_temp(self):
self.cities_temp.sort(key=lambda item: item['low_temp'])
def print_cities_temp(self):
for item in self.cities_temp[0:10]:
print(item['city'], item['low_temp'])
def draw_bar_chart(self):
self.sort_cities_temp()
self.save_to_db(self.cities_temp)
cities = list(map(lambda x: x['city'], self.cities_temp))[0:10]
low_temp = list(map(lambda x: x['low_temp'], self.cities_temp))[0:10]
bar = Bar()
bar.add_xaxis(cities)
bar.add_yaxis("温度", low_temp)
bar.set_global_opts(title_opts=opts.TitleOpts(title="中国气温最低十大城市"))
bar.render()
def save_to_db(self, cities_temp):
self.weather.delete_many({})
self.weather.insert_many(cities_temp)
w = Weather()
w.run()
上一篇: jquery怎么删除select中的选项