Python爬取猫眼榜单
程序员文章站
2022-03-02 19:25:19
...
import urllib.request
import urllib.parse
url = "http://maoyan.com/board/4?"
headers ={“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36”}
i = 1
while 1:
offset = (i-1)*10
parms = {‘offset’:offset}
parms = urllib.parse.urlencode(parms)
urls = url + parms
request = urllib.request.Request(urls,headers =headers)
response = urllib.request.urlopen(request)
html = response.read().decode(“utf-8”)
with open("第%d页.html" % i, 'a', encoding='utf-8') as f:
print("正在写入第%d页" % i)
f.write(html)
print("第%d页写入完成" % i)
# if not response:
# print("爬取已完成,爬虫自动关闭")
# break
num = input("是否继续爬取(y/n)")
if num == "y":
i = i + 1
offset = (i - 1) * 10
if offset > 90:
print("已全部爬取,爬虫自动关闭")
break
parms = {'offset': offset}
parms = urllib.parse.urlencode(parms)
urls = url + parms
request = urllib.request.Request(urls, headers=headers)
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
else:
print("爬取结束谢谢使用")
break