python 爬虫---爬取一周天气预报信息
程序员文章站
2022-07-14 15:46:03
...
爬取湛江一周的天气状况(日期、天气状况、当天的最该最低温度)
1、爬取网站信息
def parse_url(url, headers):
# 解释url
response = requests.get(url, headers=headers)
return response.content.decode("utf-8")
2、提取有用数据
def get_weather_data(html_content):
# 提取信息 div-url-ui
metree = lxml.html.etree
# 获得解析对象
parser = metree.HTML(html_content, metree.HTMLParser())
# 使用Xpath语法获得li所有标签
li_list = parser.xpath("//div[@class='c7d']/ul[@class='t clearfix']/li")
# print(li_list)
data = []
for element in li_list:
item = {}
item["name"] = element.xpath("./h1/text()")[0]
# item["data"] = li_list[0].xpath("./h1/text()")[0]
item["weather"] = element.xpath("./p[@class='wea']/text()")[0]
item["tem_low"] = element.xpath("./p[@class='tem']/i/text()")[0]
# item["tem_height"] = element.xpath("./p[@class='tem']/span/text()")[0] 会数组越界,因为,没有了最高温度(还没解决)
item["tem_up"] = element.xpath("./p[@class='tem']/span/text()")
# tem_up = element.xpath("./p[@class='tem']/span/text()")
# if (item["tem_up"][0] == null)
# item["tem_up"] = "none"
# else
# item["tem_up"] = item["tem_up"][0]
# print(item)
data.append(item)
# print(data)
return data
3、保存提取的数据
def save_weather_file(weather_data):
# 保存文件
# 列表转json字符串,并保存到文件中
json_strs = json.dumps(weather_data, ensure_ascii=False, indent=2)
print(json_strs)
with open("./weather/weather.json","w",encoding="utf-8") as files:
files.write(json_strs)
# print("以保存")
main
def main():
# 1, 获取天气预报的信息
http_url = "http://www.weather.com.cn/weather/10128100101A.shtml"
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"}
# 解释url
html_data = parse_url(http_url,header)
# print(html_data)
# 2提取七天的数据
weather_data = get_weather_data(html_data)
# print(weather_data)
# 3保存文件 json
save_weather_file(weather_data)
if __name__ == '__main__':
main()
结果
还没解决的问题是(要是最高温度为空)
所以只能输出整个最高温度的列表(带有“[ ]")
上一篇: python爬取地区天气情况
下一篇: 光谱预处理——R语言prospectr包