python requests+pyquery爬取猫眼top100
程序员文章站
2022-05-02 18:08:48
...
import requests
from pyquery import PyQuery as pq
import time
import json
def get_one_page(url):
res=requests.get(url)
return res.content.decode()
def parse_one_page(html):
doc=pq(html)
dd=doc('.board-wrapper dd')
for item in dd.items():
a=item.find('i').eq(0).text()
b=item.find('.name>a').text()
c=item.find('.star').text()
d=item.find('.releasetime').text()
e=item.find('.score i').eq(0).text()+item.find('.score i').eq(1).text()
yield{
'排名':a,
'剧名':b,
'主演':c[3:],
'上映时间':d[5:],
'评分':e
}
def write_one_page(parse):
with open('maoyan.txt','a',encoding='utf-8') as f:
f.write(json.dumps(parse,ensure_ascii=False)+'\n')
def main(offset):
url='https://maoyan.com/board/4?offset='+str(offset)
html=get_one_page(url)
for parse in parse_one_page(html):
print(parse)
write_one_page(parse)
if __name__ == '__main__':
for i in range(10):
print('第%d页解析成功'%(i+1))
main(i*10)
time.sleep(2)
上一篇: pyquery抓取学校招生信息
下一篇: python字典根据值去排序