欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示

程序员文章站 2022-03-22 20:48:40
...

网站地址: 中国天气网

爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示
爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示
爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示
爬取中国天气网上中国所有城市最低气温,存入mongodb,并用pyecharts展示

from bs4 import BeautifulSoup
import requests
from pyecharts.charts import Bar
from pyecharts import options as opts
from pymongo import MongoClient


class Weather:
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36"}
        self.cities_temp = []
        self.client = MongoClient(host="127.0.0.1", port=27017)
        self.weather = self.client['spider']['weather']

    def get_text(self, url):
        r = requests.get(url, headers=self.headers)
        r.encoding = r.apparent_encoding
        text = r.text
        return text

    def parse_html(self, text):
        soup = BeautifulSoup(text, 'html5lib')
        # html5lib解析能力较强,可以自动补全标签
        comMidtab = soup.select_one('.conMidtab')
        tabs = comMidtab.select('table')
        for tab in tabs:
            trs = tab.select('tr')[2:]
            for index, tr in enumerate(trs):
                if index == 0:
                    city = list(tr.select('td')[1].stripped_strings)[0]
                else:
                    city = list(tr.select('td')[0].stripped_strings)[0]
                low_temp = int(list(tr.select('td')[-2].stripped_strings)[0])
                self.cities_temp.append({'city': city, 'low_temp': low_temp})

    def run(self):
        tplt_url = "http://www.weather.com.cn/textFC/{}.shtml#"
        list = ['hb', 'db', 'hd', 'hz', 'hn', 'xb', 'xn', 'gat']
        for item in list:
            url = tplt_url.format(item)
            text = self.get_text(url)
            self.parse_html(text)
        self.draw_bar_chart()

    def sort_cities_temp(self):
        self.cities_temp.sort(key=lambda item: item['low_temp'])

    def print_cities_temp(self):
        for item in self.cities_temp[0:10]:
            print(item['city'], item['low_temp'])

    def draw_bar_chart(self):
        self.sort_cities_temp()
        self.save_to_db(self.cities_temp)
        cities = list(map(lambda x: x['city'], self.cities_temp))[0:10]
        low_temp = list(map(lambda x: x['low_temp'], self.cities_temp))[0:10]
        bar = Bar()
        bar.add_xaxis(cities)
        bar.add_yaxis("温度", low_temp)
        bar.set_global_opts(title_opts=opts.TitleOpts(title="中国气温最低十大城市"))
        bar.render()

    def save_to_db(self, cities_temp):
        self.weather.delete_many({})
        self.weather.insert_many(cities_temp)

w = Weather()
w.run()
相关标签: 爬虫