python3爬取—梨视频的短视频链接
程序员文章站
2022-09-20 14:59:35
python3爬取梨视频的视频链接运行效果如图#____author:"xie"#date:2020-11-12# -*- coding: utf-8 -*-import requestsimport re,time#需求:爬取梨视频的视频数据headers = { 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/7...
python3爬取梨视频的视频链接
运行效果如图
#____author:"xie"
#date:2020-11-12
# -*- coding: utf-8 -*-
import requests
import re,time
#需求:爬取梨视频的视频数据
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
#对下述url发起请求解析出视频详情页的url和视频的名称
def video_url():
Current = 0
Total = 10
while Current <= Total:
tat = Current * 12
url = 'https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=8&start={}'.format(tat)
response = requests.get(url=url, headers=headers)
list_url = re.findall(r'style="background-image: url(.*?);', response.text, re.I)
for i in list_url:
link_id = str(i).rsplit('-')[1]
if len(link_id) != 6:
detail_url = 'https://www.pearvideo.com/video_'+link_id
time.sleep(0.5)
response = requests.get(url=detail_url, headers=headers)
data_name = re.findall(r' <h1 class="video-tt">(.*?)</h1>', response.text, re.I)[0]+'.mp4'
headers['Referer'] = detail_url
vedio_url = 'https://www.pearvideo.com/videoStatus.jsp?'
param = {'contId': link_id} # 请求的id参数
res = requests.get(url=vedio_url, params=param, headers=headers).json() # 获取视频伪装过的下载链接
down_url = res['videoInfo']['videos']['srcUrl'] # 被伪装的下载地址
ex = "third/.*?/(.*?)-.*?"
ex1 = "short/.*?/(.*?)-.*?"
# 获取需要被替换的字符串
try:
need_replace = re.findall(ex1, down_url)[0]
except:
need_replace = re.findall(ex, down_url)[0]
replaced = 'cont-' + link_id
down_url = down_url.replace(need_replace, replaced)
print('第{}页的内容有:{} url:{}'.format(Current, data_name, down_url))
Current += 1
if __name__ == '__main__':
video_url()
本文地址:https://blog.csdn.net/ranranran52/article/details/109646809