视频爬虫
程序员文章站
2022-03-10 19:49:08
import os# 音视频处理import ffmpy3import requestsfrom bs4 import BeautifulSoup# 多线程包from multiprocessing.dummy import Pool as ThreadPoolsearch_keyword = '越狱第一季'search_url = 'http://www.jisudhw.com/index.php'serach_params = { 'm': 'vod-search'}se...
import os
import ffmpy3
import requests
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool
search_keyword = '越狱第一季'
search_url = 'http://www.jisudhw.com/index.php'
serach_params = {'m': 'vod-search'}
serach_headers = {
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'Referer': 'http://www.jisudhw.com/',
'Origin': 'http://www.jisudhw.com',
'Host': 'www.jisudhw.com'
}
serach_datas = {'wd': search_keyword, 'submit': 'search'}
video_dir = ''
r = requests.post(url=search_url,
params=serach_params,
headers=serach_headers,
data=serach_datas)
r.encoding = 'utf-8'
server = 'http://www.jisudhw.com'
search_html = BeautifulSoup(r.text, 'lxml')
search_spans = search_html.find_all('span', class_='xing_vb4')
for span in search_spans:
url = server + span.a.get('href')
name = span.a.string
print(name)
print(url)
video_dir = name
if name not in os.listdir('./'):
os.mkdir(name)
detail_url = url
r = requests.get(url=detail_url)
r.encoding = 'utf-8'
detail_bf = BeautifulSoup(r.text, 'lxml')
num = 1
serach_res = {}
for each_url in detail_bf.find_all('input'):
if 'm3u8' in each_url.get('value'):
url = each_url.get('value')
if url not in serach_res.keys():
serach_res[url] = num
print('第%03d集:' % num)
print(url)
num += 1
def downVideo(url):
num = serach_res[url]
name = os.path.join(video_dir, '第%03d集.mp4' % num)
ffmpy3.FFmpeg(executable='D:\\program files\\ffmpeg\\bin\\ffmpeg.exe',
inputs={
url: None
},
outputs={
name: None
}).run()
# 开8个线程池
pool = ThreadPool(8)
results = pool.map(downVideo, serach_res.keys())
pool.close()
pool.join()
运行时效果
视频列表
本文地址:https://blog.csdn.net/m0_37712876/article/details/107655530
下一篇: 在Python 2.x中如何使用中文
推荐阅读
-
神箭手云爬虫-爬取携程【国际】航班/机票信息-利用python解析返回的json文件将信息存储进Mysql数据库
-
爬虫爬取千千音乐榜单音乐
-
找出bilibili隐藏在后方的视频并突破访问限制实现爬取
-
Python高级爬虫开发,高难度JS解密教程,绝地求生模拟登陆!
-
ipad如何观看本地视频 怎么用iPad观看电脑上的影片
-
零基础写Java知乎爬虫之获取知乎编辑推荐内容(3)
-
看起来很好吃的面包视频 其实是个加字幕神器
-
YouTube等视频网站成功入侵电视:有线电视被冷落
-
爬虫出现TypeError: cannot use a string pattern on a bytes-like object报错
-
七牛云客户端上传视频