python爬取梨视频中的视频数据
程序员文章站
2022-04-11 17:13:30
...
# 爬取梨视频的视频数据
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
}
# 原则:线程池处理的是阻塞且耗时的操作
# 对下列url发起请求解析出视频详情页的url和视频名称
url = 'https://www.pearvideo.com/category_5'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
li_list = tree.xpath('//ul[@id="listvideoListUl"]/li')
for li in li_list:
href_url = 'https://www.pearvideo.com/' + li.xpath('./div/a/@href')[0]
title = li.xpath('./div/a/div[2]/text()')[0]
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
'Referer': href_url
}
url1 = 'https://www.pearvideo.com/videoStatus.jsp?contId=' + href_url.split('_')[-1]
response = requests.get(url=url1, headers=headers).json()
srcurl = response['videoInfo']['videos']['srcUrl']
# src = "https://video.pearvideo.com/mp4/short/20210712/cont-1734895-15716990-hd.mp4"
cont = 'cont-' + href_url.split('_')[-1]
new_url = srcurl.replace(srcurl.split('-')[0].split('/')[-1], cont)
filename = cont + '.mp4'
with open('./video/' + filename, 'wb') as fp:
fp.write(requests.get(url=new_url, headers=headers).content)
print(filename, '下载完成!')