学习爬虫day01:抓取网站信息并且下载视频
程序员文章站
2022-03-25 20:17:59
import requestsimport reimport timeimport uuiddef save_video(content): with open(f'{uuid.uuid4()}.mp4','wb') as f: f.write(content) print('OK!')def ge... ......
import requests
import re
import time
import uuid
def save_video(content):
with open(f'{uuid.uuid4()}.mp4','wb') as f:
f.write(content)
print('ok!')
def get_page(url):
response = requests.get(url)
return response
def parse_index(html):
detail_urls = re.findall(
'<div class="items"><a class="imglink" href="(.*?)"',html,re.s
)
print(detail_urls)
return detail_urls
def parse_detail(html):
movie_url = re.findall('<source src="(.*?)">',html,re.s)
if movie_url:
return movie_url[0]
if __name__ == '__main__':
url = 'http://www.xiaohuar.com/v/'
response = get_page(url)
#print(response)
#print(response.status_code)
# print(response.text)
detail_urls = parse_index(response.text)
for detail_url in detail_urls:
#print(detail_url)
#response = get_page(detail_url)
detail_res = get_page(detail_url)
#print(response.text)
movie_url = parse_detail(detail_res.text)
if movie_url:
print(movie_url)
movie_res = get_page(movie_url)
save_video(movie_res.content)
下一篇: Requests库