【Python】利用requests库下载B站视频(半成品)
程序员文章站
2022-06-05 18:55:32
...
介绍
写这篇博客的时候,导师突然发消息,导致现在没太大心情解释具体步骤了,先放这里,回头再更…
脚本如下
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Author: Guo Yingwei
Date: 2021-02-09 17:51:46
E-mail: [email protected]
Description: Download bilibili video
LastEditors: gyw
LastEditTime: 2021-02-25 19:08:37
'''
import re
import requests
import json
headers = {
'referer' : 'https://www.bilibili.com',
'user-agent' : 'Mozilla/5.0 (Windows. NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
}
def get_page(url):
try:
s = requests.Session().get(url, headers = headers)
s.raise_for_status()
s.encoding = s.apparent_encoding
return s
except:
return('解析网页失败,请检查!')
def parse_page(data):
json_str = re.findall('<script>window.__playinfo__=(.*?)</script>', data, re.S)[0]
json_data = json.loads(json_str)
down_list = []
title = re.findall('name="title" content="(.*?)">', data, re.S)[0]
audio_url = json_data['data']['dash']['audio'][0]['backup_url'][-1]
video_url = json_data['data']['dash']['video'][0]['backup_url'][-1]
down_list.append(audio_url)
down_list.append(video_url)
down_list.append(title)
return down_list
def write_res(filename, data):
with open(r'D:\\test\\' + filename, 'wb') as f:
f.write(data)
def video_audio_merge(video_name):
pass #
def main(bv):
url = f'http://www.bilibili.com/video/{bv}'
html_data = get_page(url).text
hfile = open('hhh.html', 'w', encoding='utf-8')
hfile.write(html_data)
down_url_list = parse_page(html_data)
# sava audio
audio_url = down_url_list[0]
title = down_url_list[-1].replace(' ', '').replace('_哔哩哔哩 (゜-゜)つロ 干杯~-bilibili', '')
audio_content = get_page(url=audio_url).content
print('saving audio...')
write_res(filename=title + '.mp3',data = audio_content)
#save video
video_url = down_url_list[1]
title = down_url_list[-1].replace(' ', '').replace('_哔哩哔哩 (゜-゜)つロ 干杯~-bilibili', '')
video_content = get_page(url=video_url).content
print('saving video...')
write_res(filename=title + '.mp4',data = video_content)
video_audio_merge(video_name=title)
if __name__ == '__main__':
main('BV1VJ411v7Ss')
上一篇: 整数分划Java
下一篇: 爬虫基础(案例:北京新发地信息爬取)