欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

学习爬虫day01:抓取网站信息并且下载视频

程序员文章站 2022-03-25 20:17:59
import requestsimport reimport timeimport uuiddef save_video(content): with open(f'{uuid.uuid4()}.mp4','wb') as f: f.write(content) print('OK!')def ge... ......

学习爬虫day01:抓取网站信息并且下载视频

学习爬虫day01:抓取网站信息并且下载视频

import requests
import re
import time
import uuid
def save_video(content):
with open(f'{uuid.uuid4()}.mp4','wb') as f:
f.write(content)
print('ok!')
def get_page(url):
response = requests.get(url)
return response

def parse_index(html):
detail_urls = re.findall(
'<div class="items"><a class="imglink" href="(.*?)"',html,re.s
)
print(detail_urls)
return detail_urls

def parse_detail(html):
movie_url = re.findall('<source src="(.*?)">',html,re.s)
if movie_url:
return movie_url[0]

if __name__ == '__main__':
url = 'http://www.xiaohuar.com/v/'
response = get_page(url)
#print(response)
#print(response.status_code)
# print(response.text)
detail_urls = parse_index(response.text)

for detail_url in detail_urls:
#print(detail_url)
#response = get_page(detail_url)
detail_res = get_page(detail_url)
#print(response.text)

movie_url = parse_detail(detail_res.text)

if movie_url:
print(movie_url)

movie_res = get_page(movie_url)

save_video(movie_res.content)

学习爬虫day01:抓取网站信息并且下载视频