爬取b站音视频以及合并弹幕
程序员文章站
2022-03-02 13:25:54
...
废话不多说,直接上源码
需要用到ffmpeg合并音视频,以及合并弹幕
还需要下载danmaku2ass
链接: link.
提取码:xx2t
关于danmaku2ass的使用,直接把下载好的xml格式的弹幕文件拖入danmaku中等几秒就可以生成ass格式的文件了。
需要注意的是使用ffmpeg的时候,很容易出现路径错误,找不到文件之类的,那是因为音视频以及弹幕文件的命名,不能有空格,路径中都不能出现空格,
import requests
import re
import json
import os
class BilibiliPro():
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66',
}
self.BV = input('输入*:')
def my_match(self, text, pattern):
match = re.search(pattern, text)
# print(match.group(1))
# print()
return json.loads(match.group(1))
def download_video(self, old_video_url, video_url, audio_url, video_name):
self.headers.update({"Referer": old_video_url})
print("开始下载视频:%s" % video_name)
video_content = requests.get(video_url, headers=self.headers)
print('%s视频大小:' % video_name, video_content.headers['content-length'])
audio_content = requests.get(audio_url, headers=self.headers)
print('%s音频大小:' % video_name, audio_content.headers['content-length'])
# 下载视频开始
received_video = 0
with open('%s_video.mp4' % video_name, 'ab') as output:
while int(video_content.headers['content-length']) > received_video:
self.headers['Range'] = 'bytes=' + str(received_video) + '-'
response = requests.get(video_url, headers=self.headers)
output.write(response.content)
received_video += len(response.content)
# 下载视频结束
# 下载音频开始
received_audio = 0
with open('%s_audio.mp4' % video_name, 'ab') as output:
while int(audio_content.headers['content-length']) > received_audio:
# 视频分片下载
self.headers['Range'] = 'bytes=' + str(received_audio) + '-'
response = requests.get(audio_url, headers=self.headers)
output.write(response.content)
received_audio += len(response.content)
# 下载音频结束
return video_name
def get_oid(self, url):
# 获取cid
response = requests.get(url=url, headers=self.headers)
match_cid = 'cid=(.*?)&aid'
return re.search(match_cid, response.text).group().replace('cid=', '').replace('&aid', '')
def get_danmu(self, cid, video_name):
# 获取弹幕的同时保存xml,为后期视频合并弹幕作准备
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(cid)
R = "<d.*?>(.*?)</d>"
response = requests.get(url=url, headers=self.headers)
response.encoding = 'utf-8'
with open('./%s.xml' % video_name, 'w', encoding='utf-8') as f:
f.write(response.text)
danmus = re.findall(R, response.text)
return danmus
def save(self, video_name, cid):
# 将弹幕一条一条存放在csv文件中
with open('./%s.csv' % video_name, 'w', encoding='utf-8') as fp:
for i in self.get_danmu(cid, video_name):
fp.write(i + '\n')
if __name__ == '__main__':
bilibili = BilibiliPro()
url = 'https://www.bilibili.com/video/{}'.format(bilibili.BV)
cid = bilibili.get_oid(url) # 获取Cid
# bilibili.get_danmu(cid) # 爬取弹幕
response = requests.get(url=url, headers=bilibili.headers)
playinfo = bilibili.my_match(response.text, '__playinfo__=(.*?)</script><script>')
initial_state = bilibili.my_match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)')
video_url = playinfo['data']['dash']['video'][0]['baseUrl']
# 取出音频地址
audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']
video_name = initial_state['videoData']['title']
video_name = video_name.replace(' ', "") # 去除名字中的空格
bilibili.save(video_name, cid) # 存储弹幕
print('视频名字为:', video_name)
print('视频地址为:', video_url)
print('音频地址为:', audio_url)
bilibili.download_video(url, video_url, audio_url, video_name)
audio = r'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s_audio.mp4' % video_name
video = r'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s_video.mp4' % video_name
COMMAND = f'E:\\FFmpeg\\ffmpeg-N\\bin\\ffmpeg -i {video} -i {audio} -c:v copy -c:a aac -strict experimental {video_name}.mp4'
os.system(COMMAND)
# 为视频添加弹幕
# Merged_video = 'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s.mp4'%video_name
# ass = '../bilibili/danmaku/PGONE发布新曲,疯狂Diss自己进行忏悔?《KILLTHEONE》.ass'
# command = f'E:\\FFmpeg\\ffmpeg-N\\bin\\ffmpeg -i {Merged_video} -vf subtitles={ass} -vcodec libx264 pgone.mp4'