案例 — 使用bs4 爬取猫眼电影热榜
程序员文章站
2022-05-27 08:53:57
使用 BeautiSoup库bs4模块 主要使用select 、for循环和 存入本地txt ......
使用 beautisoup库bs4模块 主要使用select 、for循环和 存入本地txt
from bs4 import beautifulsoup from urllib import request url = "http://maoyan.com/board" rsq = request.urlopen(url) html = rsq.read().decode() soup = beautifulsoup(html,"lxml") items = soup.select('dd') # 查找所有 <dd> </dd> with open("d://maoyan.txt","w",encoding="utf-8") as f: # 构建本地txt文档 for item in items: title = item.select('p a[data-act="boarditem-click" ]')[0].get_text() # 提取标题 star = item.select('p[class = "star"]')[0].get_text().replace("\n","").strip(" ") # 提取主演 score = item.select('p[class = "score"]')[0].get_text().strip('\n').strip(' ') # 提取分数 releasetime = item.select('p[class = "releasetime"]')[0].get_text() # 提取上映时间 datas = title + " " + releasetime + " "+ star + " " + score + "\n" # 数据整合 print(datas) f.write(datas) # 利用for循环把每条datas信息写入本地 f.close() print("sucessful")