欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬取b站音视频以及合并弹幕

程序员文章站 2022-03-02 13:25:54
...

废话不多说,直接上源码

需要用到ffmpeg合并音视频,以及合并弹幕
还需要下载danmaku2ass
链接: link.
提取码:xx2t
关于danmaku2ass的使用,直接把下载好的xml格式的弹幕文件拖入danmaku中等几秒就可以生成ass格式的文件了。
需要注意的是使用ffmpeg的时候,很容易出现路径错误,找不到文件之类的,那是因为音视频以及弹幕文件的命名,不能有空格,路径中都不能出现空格,

import requests
import re
import json
import os



class BilibiliPro():
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66',

        }
        self.BV = input('输入*:')

    def my_match(self, text, pattern):
        match = re.search(pattern, text)
        # print(match.group(1))
        # print()
        return json.loads(match.group(1))

    def download_video(self, old_video_url, video_url, audio_url, video_name):
        self.headers.update({"Referer": old_video_url})
        print("开始下载视频:%s" % video_name)
        video_content = requests.get(video_url, headers=self.headers)
        print('%s视频大小:' % video_name, video_content.headers['content-length'])
        audio_content = requests.get(audio_url, headers=self.headers)
        print('%s音频大小:' % video_name, audio_content.headers['content-length'])
        # 下载视频开始
        received_video = 0
        with open('%s_video.mp4' % video_name, 'ab') as output:
            while int(video_content.headers['content-length']) > received_video:
                self.headers['Range'] = 'bytes=' + str(received_video) + '-'
                response = requests.get(video_url, headers=self.headers)
                output.write(response.content)
                received_video += len(response.content)
        # 下载视频结束
        # 下载音频开始
        received_audio = 0
        with open('%s_audio.mp4' % video_name, 'ab') as output:
            while int(audio_content.headers['content-length']) > received_audio:
                # 视频分片下载
                self.headers['Range'] = 'bytes=' + str(received_audio) + '-'
                response = requests.get(audio_url, headers=self.headers)
                output.write(response.content)
                received_audio += len(response.content)
        # 下载音频结束
        return video_name

    def get_oid(self, url):
        # 获取cid
        response = requests.get(url=url, headers=self.headers)
        match_cid = 'cid=(.*?)&aid'
        return re.search(match_cid, response.text).group().replace('cid=', '').replace('&aid', '')

    def get_danmu(self, cid, video_name):
        # 获取弹幕的同时保存xml,为后期视频合并弹幕作准备
        url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(cid)
        R = "<d.*?>(.*?)</d>"
        response = requests.get(url=url, headers=self.headers)
        response.encoding = 'utf-8'
        with open('./%s.xml' % video_name, 'w', encoding='utf-8') as f:
            f.write(response.text)
        danmus = re.findall(R, response.text)
        return danmus

    def save(self, video_name, cid):
        # 将弹幕一条一条存放在csv文件中
        with open('./%s.csv' % video_name, 'w', encoding='utf-8') as fp:
            for i in self.get_danmu(cid, video_name):
                fp.write(i + '\n')


if __name__ == '__main__':
    bilibili = BilibiliPro()
    url = 'https://www.bilibili.com/video/{}'.format(bilibili.BV)
    cid = bilibili.get_oid(url)  # 获取Cid
    # bilibili.get_danmu(cid) # 爬取弹幕
    response = requests.get(url=url, headers=bilibili.headers)
    playinfo = bilibili.my_match(response.text, '__playinfo__=(.*?)</script><script>')
    initial_state = bilibili.my_match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)')
    video_url = playinfo['data']['dash']['video'][0]['baseUrl']
    # 取出音频地址
    audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']
    video_name = initial_state['videoData']['title']
    video_name = video_name.replace(' ', "") # 去除名字中的空格
    bilibili.save(video_name, cid)  # 存储弹幕
    print('视频名字为:', video_name)
    print('视频地址为:', video_url)
    print('音频地址为:', audio_url)
    bilibili.download_video(url, video_url, audio_url, video_name)
    audio = r'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s_audio.mp4' % video_name
    video = r'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s_video.mp4' % video_name
    COMMAND = f'E:\\FFmpeg\\ffmpeg-N\\bin\\ffmpeg -i {video} -i {audio} -c:v copy -c:a aac -strict experimental {video_name}.mp4'
    os.system(COMMAND)
    # 为视频添加弹幕
    # Merged_video = 'D:\\Users\\Lenovo\\PycharmProjects\\bilibili\\%s.mp4'%video_name
    # ass = '../bilibili/danmaku/PGONE发布新曲,疯狂Diss自己进行忏悔?《KILLTHEONE》.ass'
    # command = f'E:\\FFmpeg\\ffmpeg-N\\bin\\ffmpeg -i {Merged_video} -vf subtitles={ass} -vcodec libx264  pgone.mp4'
相关标签: python ffmpeg