python3爬虫 —— 爬取豆瓣电影信息
程序员文章站
2022-05-02 16:49:55
...
爬取豆瓣网站的电影信息,并保存到excel中。
代码:
import re,xlwt,requests
#初始化并创建一个工作簿
book = xlwt.Workbook()
#创建一个名为sheetname的表单
sheet = book.add_sheet('movie') #重复写入数据
headings = [u'排名', u'电影名称',u'导演',u'国家',u'年份',u'评分']
k =0
for j in headings:
sheet.write(0, k, j)
k = k+1
url = ' https://movie.douban.com/top250'
#头部信息
headers = {
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
try:
r= requests.get(url,timeout=30,headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
text = r.text
movie_info = re.findall(r'div class="pic">([\d\D]*?)<p class="quote">',text)
count = 1
for i in movie_info:
rank = re.findall(r'<em class="">([\d]*)</em>',i)
name = re.findall(r'span class="title">(\w*)</span>',i)
director = re.findall(r'导演:([\d\D]*?) ',i)
year = re.findall(r'(\d{4}) / ',i)
country = re.findall(r'\d{4} / ([\d\D]*?) / ',i)
score = re.findall(r'<span class="rating_num" property="v:average">([\d.\d]*)',i)
sheet.write(count,0,rank)
sheet.write(count, 1, name)
sheet.write(count, 2, director)
sheet.write(count, 3, year)
sheet.write(count, 4, country)
sheet.write(count, 5, score)
count = count + 1
book.save('电影信息.xls')
except:
print('失败')
上一篇: python爬虫,爬取豆瓣电影信息