爬虫程序------天气
程序员文章站
2022-05-04 11:24:54
...
输入城市,爬取该城市在中国天气网的七天天气
代码如下:
# author:WN
# datetime:2019/10/27 11:03
import requests
import re
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
def find_city_num():
"""查找出城市所对应的编号"""
city = input("请输入要查询的城市:").encode('utf8')
city_name = str(city).replace(r'\x', '%')[1:] # 编码后的城市名
url = 'http://toy1.weather.com.cn/search?cityname=' + city_name
# 设置请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
response = requests.get(url, headers=headers)
data = response.text
try:
city_num = re.search(r'\d+', data).group()
except Exception:
print('没有该城市!')
else:
city_weather(city_num, headers)
def city_weather(city_num, headers):
"""查询该城市的七天天气"""
url = 'http://www.weather.com.cn/weather/%s.shtml' % city_num
response = requests.get(url, headers=headers)
# 解决网页的编码
dammit = UnicodeDammit(response.content, ['gbk', 'utf8'])
str_html = dammit.unicode_markup
# 解析
data = BeautifulSoup(str_html, 'lxml')
weather_msg = data.select('ul[class="t clearfix"] li')
for li in weather_msg:
# 日期
date = li.select('h1')[0].text
# 天气
weather = li.select('p')[0].text
# 温度
temp = li.select('p[class="tem"]')[0].text.strip('\n')
# 风力
windy = li.select('p[class="win"] i')[0].text
print('{0} {1} {2} {3}'.format(date, weather, temp, windy))
if __name__ == '__main__':
find_city_num()
上一篇: 豆瓣爬虫程序
下一篇: 读Zepto源码之操作DOM