欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬虫-抖音无水印视频下载

程序员文章站 2022-04-11 16:52:10
...

直接撸代码,复制即用

import re
import requests

class Douyin:
    def __init__(self,url):
        """
        初始化
        :param url: 用手机分享的视频链接
        """
        self.headers = {"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}
        self.url = url

    # 获取拼接视频播放地址的参数,获取网页源代码中的参数
    def get_url_data(self):
        res = requests.get(url=self.url, headers=self.headers).text
        try:
            parent_rid = re.findall('parent_rid: "(.*?)"', res)[0]
            itemId = re.findall('itemId: "(.*?)"', res)[0]
            uid = re.findall('uid: "(.*?)"', res)[0]
            dytk = re.findall('dytk: "(.*?)"', res)[0]
            authorName = re.findall('authorName: "(.*?)"', res)[0].encode('utf-8').decode(
                'unicode_escape')  # 原为unicode编码 需要解码
            data = {
                "parent_rid": parent_rid,
                "itemId": itemId,
                "uid": uid,
                "dytk": dytk,
                "code": 0,         # 这里定义一个状态码 0:成功   1:失败
                "authorName": authorName
            }
            return data
        except Exception as e:
            data = {"code": 1}
            return data

    # 获取手机播放地址(只能把浏览器切换成手机模式播放视频)
    def get_phone_video_url(self,data):
        video_url = "https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={}&dytk={}".format(data["itemId"],data["dytk"])
        rep = requests.get(url=video_url, headers=self.headers).json()
        try:
            for item in rep["item_list"]:
                desc = item["desc"]  # 标题
                duration = item["duration"]  # 时长
                url = item["video"]["play_addr"]["url_list"][0]  # 一共两个视频链接 都可以播放  选择一个就行
                comment_count = item["statistics"]["comment_count"]  # 评论数
                digg_count = item["statistics"]["digg_count"]    # 点赞数
                data["desc"] = desc
                data["duration/(毫秒)"] = duration
                data["duration/(分钟)"] = round(duration / 60000, 3)  # 保留小数点后两位
                data["appurl"] = url
                data["code"] = 0
                data["comment_count"] = comment_count
                data["digg_count"] = digg_count
            return data
        except Exception as e:
            data["code"] = 1
            return data

    # 获取web播放地址
    def get_video_web_url(self,data):
        if data["code"] == 0:
            res = requests.get(data["appurl"], headers=self.headers, allow_redirects=False).headers
            data["url"] = res["location"]
            data.pop("appurl")
            return data
        else:
            return {}

    # 程序入口
    def main(self):
        url_data = self.get_url_data()
        phone_video_url = self.get_phone_video_url(url_data)
        data = self.get_video_web_url(phone_video_url)
        return data

if __name__ == '__main__':
    d = Douyin("https://v.douyin.com/cLf9Kn/")  # 复制分享的视频链接
    w = d.main()
    print(w)
{'parent_rid': '20200406141604010194044226966C77', 'itemId': '6777940418903543055', 'uid': '60965882796', 'dytk': 'a81b990970be47f46d3e5037e275c7f6ce5a28e349ab64589492411443b90567', 'code': 0, 'authorName': '房岩小哥', 'desc': '千万不要熬夜了。这种结果你怕不怕', 'duration/(毫秒)': 50133, 'duration/(分钟)': 0.836, 'comment_count': 1709, 'digg_count': 47416, 'url': 'http://v6-dy-z.ixigua.com/c1f5f994a5f3f78a5e17ab39f06ee7a7/5e8ad767/video/tos/cn/tos-cn-ve-15/9a7f50ebae5c4712a536a0fdaed84e7a/?a=1128&br=0&bt=863&cr=0&cs=0&dr=0&ds=6&er=&l=2020040614160501001203309001599158&lr=&qs=0&rc=anQ3N3dmcHE0cjMzPGkzM0ApOmhoODU4PDxnNzpkZDw0OGdmaGJeL15mNjVfLS00LS9zczY0XjReMC1eM19iNF4wLjA6Yw%3D%3D&vl=&vr='}