Python爬取猫眼电影排行
程序员文章站
2023-11-30 15:39:34
```Python
import requests
import pyquery def crawl_page(url: str) -> None: headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW... ......
import requests import pyquery def crawl_page(url: str) -> none: headers = { 'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) \ chrome/72.0.3626.121 safari/537.36', } response = requests.get(url, headers=headers) parse_page(response.text) def parse_page(source_code: str) -> none: html = pyquery.pyquery(source_code) dd_elements = html('.board-wrapper dd') for dd_element in dd_elements.items(): data = { '排名': dd_element.find('i.board-index').text(), '电影名': dd_element.find('a.image-link').attr('title'), '主演': dd_element.find('p.star').text().split(':')[1], '上映时间': dd_element.find('p.releasetime').text().split(':')[1], '评分': dd_element.find('p.score').text(), } print(data) save_data(data) def save_data(data: dict) -> none: data = str(data) with open('maoyan.txt', 'a+', encoding='utf8') as f: f.write(data+'\n') return none def main(): for i in range(0, 100, 10): url = 'https://maoyan.com/board/4?offset={}'.format(i) crawl_page(url) if __name__ == '__main__': main()
下一篇: 微信小程序云开发如何使用云函数生成二维码