用python来爬取中国天气网北京,上海,成都8-15天的天气
程序员文章站
2022-07-14 17:56:03
...
2 爬取北京,上海,成都的天气
from bs4 import BeautifulSoup
import random
import requests
import socket
import time
import http.client
import csv
def get_html(url,data=None):
"""
模拟浏览器来获取网页的html代码
"""
header={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
}
timeout=random.choice(range(80,180))
while True:
try:
rep=requests.get(url,headers=header,timeout=timeout)
rep.encoding="utf-8"
break
except socket.timeout as e:
print("3:",e)
time.sleep(random.choice(range(8,15)))
except socket.error as e:
print("4:",e)
time.sleep(random.choice(range(20,60)))
except http.client.BadStatusLine as e:
print("5:",e)
time.sleep(random.choice(range(30,80)))
except http.client.IncompleteRead as e:
print("6:",e)
time.sleep(random.choice(range(5,15)))
return rep.text
def get_data(html_txt):
final=[]
bs=BeautifulSoup(html_txt,"html.parser")
body=bs.body
data=body.find("div",{"id":"15d"},{"class":"c15d"})
ul=data.find("ul")
li=ul.find_all("li")
for day in li:
temp=[]
date=day.find("span",{"class":"time"}).string
temp.append(date)
inf=day.find("span",{"class":"wea"}).string
temp.append(inf)
wind = day.find("span",{"class":"wind"}).string
temp.append(wind)
wind1 = day.find("span",{"class":"wind1"}).string
temp.append(wind1)
temperature = day.find("em").string
temp.append(temperature)
final.append(temp)
return final
def get_url():
city={
"北京":"101010100",
"上海":"101020100",
"成都":"101190401",
}
for k in city:
print(k)
city_name=input("请输入你要查询的城市名字:")
city_num=city[city_name]
weather_url="http://www.weather.com.cn/weather15d/%s.shtml"%city_num
return weather_url
if __name__=="__main__":
# url="http://www.weather.com.cn/weather/101190401.shtml"
while(1):
url=get_url()
html=get_html(url)
result=get_data(html)
for i in result:
print(i)
上一篇: 练习题︱基于今日头条开源数据的词共现、新热词发现、短语发现
下一篇: hadoop伪分布式的搭建