欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬虫程序------天气

程序员文章站 2022-05-04 11:24:54
...

输入城市,爬取该城市在中国天气网的七天天气

代码如下:

# author:WN
# datetime:2019/10/27 11:03
import requests
import re
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit


def find_city_num():
    """查找出城市所对应的编号"""
    city = input("请输入要查询的城市:").encode('utf8')
    city_name = str(city).replace(r'\x', '%')[1:]  # 编码后的城市名
    url = 'http://toy1.weather.com.cn/search?cityname=' + city_name
    # 设置请求头
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
    }

    response = requests.get(url, headers=headers)
    data = response.text
    try:
        city_num = re.search(r'\d+', data).group()
    except Exception:
        print('没有该城市!')
    else:
        city_weather(city_num, headers)


def city_weather(city_num, headers):
    """查询该城市的七天天气"""
    url = 'http://www.weather.com.cn/weather/%s.shtml' % city_num
    response = requests.get(url, headers=headers)
    # 解决网页的编码
    dammit = UnicodeDammit(response.content, ['gbk', 'utf8'])
    str_html = dammit.unicode_markup
    # 解析
    data = BeautifulSoup(str_html, 'lxml')
    weather_msg = data.select('ul[class="t clearfix"] li')
    for li in weather_msg:
        # 日期
        date = li.select('h1')[0].text
        # 天气
        weather = li.select('p')[0].text
        # 温度
        temp = li.select('p[class="tem"]')[0].text.strip('\n')
        # 风力
        windy = li.select('p[class="win"] i')[0].text
        print('{0}  {1}  {2}  {3}'.format(date, weather, temp, windy))


if __name__ == '__main__':
    find_city_num()